Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskyload <skyload@1f5c12ca-751b-0410-a591-d2e778427230>2010-04-21 15:53:28 +0400
committerskyload <skyload@1f5c12ca-751b-0410-a591-d2e778427230>2010-04-21 15:53:28 +0400
commit05a3e1f0c759eae80e50f8cfc66d4eae4e266e7a (patch)
treea945834c1aca6802312ea7793c368c52ebd4ed05
parent640e892c02c102ff47210698bc577b386bd7cc8d (diff)
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/DPR_MOSES@3156 1f5c12ca-751b-0410-a591-d2e778427230
-rw-r--r--moses/.cdtbuild63
-rw-r--r--moses/.cdtproject14
-rw-r--r--moses/.cproject143
-rw-r--r--moses/.project82
-rw-r--r--moses/.settings/org.eclipse.cdt.core.prefs3
-rw-r--r--moses/.settings/org.eclipse.cdt.managedbuilder.core.prefs21
-rw-r--r--moses/moses.vcproj1026
-rw-r--r--moses/moses.xcodeproj/project.pbxproj913
-rw-r--r--moses/src/BitmapContainer.cpp499
-rw-r--r--moses/src/BitmapContainer.h249
-rw-r--r--moses/src/ConfusionNet.cpp245
-rw-r--r--moses/src/ConfusionNet.h63
-rw-r--r--moses/src/DPR_reordering.cpp577
-rw-r--r--moses/src/DPR_reordering.h130
-rw-r--r--moses/src/DecodeGraph.cpp36
-rw-r--r--moses/src/DecodeGraph.h68
-rw-r--r--moses/src/DecodeStep.cpp66
-rw-r--r--moses/src/DecodeStep.h113
-rw-r--r--moses/src/DecodeStepGeneration.cpp176
-rw-r--r--moses/src/DecodeStepGeneration.h60
-rw-r--r--moses/src/DecodeStepTranslation.cpp136
-rw-r--r--moses/src/DecodeStepTranslation.h67
-rw-r--r--moses/src/Dictionary.cpp38
-rw-r--r--moses/src/Dictionary.h68
-rw-r--r--moses/src/DummyScoreProducers.cpp154
-rw-r--r--moses/src/DummyScoreProducers.h70
-rw-r--r--moses/src/DynSAInclude/fdstream.h147
-rw-r--r--moses/src/DynSAInclude/file.cpp160
-rw-r--r--moses/src/DynSAInclude/file.h61
-rw-r--r--moses/src/DynSAInclude/types.h32
-rw-r--r--moses/src/DynSAInclude/utils.h81
-rw-r--r--moses/src/DynSAInclude/vocab.cpp93
-rw-r--r--moses/src/DynSAInclude/vocab.h64
-rw-r--r--moses/src/DynSuffixArray.cpp237
-rw-r--r--moses/src/DynSuffixArray.h50
-rw-r--r--moses/src/FFState.cpp8
-rw-r--r--moses/src/FFState.h13
-rw-r--r--moses/src/Factor.cpp53
-rw-r--r--moses/src/Factor.h147
-rw-r--r--moses/src/FactorCollection.cpp117
-rw-r--r--moses/src/FactorCollection.h91
-rw-r--r--moses/src/FactorTypeSet.cpp59
-rw-r--r--moses/src/FactorTypeSet.h53
-rw-r--r--moses/src/FeatureFunction.cpp22
-rw-r--r--moses/src/FeatureFunction.h64
-rw-r--r--moses/src/File.cpp4
-rw-r--r--moses/src/File.h122
-rw-r--r--moses/src/FilePtr.h55
-rw-r--r--moses/src/FloydWarshall.cpp34
-rw-r--r--moses/src/FloydWarshall.h12
-rw-r--r--moses/src/GenerationDictionary.cpp164
-rw-r--r--moses/src/GenerationDictionary.h96
-rw-r--r--moses/src/GlobalLexicalModel.cpp185
-rw-r--r--moses/src/GlobalLexicalModel.h76
-rw-r--r--moses/src/Hypothesis.cpp512
-rw-r--r--moses/src/Hypothesis.h322
-rw-r--r--moses/src/HypothesisStack.cpp31
-rw-r--r--moses/src/HypothesisStack.h48
-rw-r--r--moses/src/HypothesisStackCubePruning.cpp315
-rw-r--r--moses/src/HypothesisStackCubePruning.h154
-rw-r--r--moses/src/HypothesisStackNormal.cpp303
-rw-r--r--moses/src/HypothesisStackNormal.h137
-rw-r--r--moses/src/InputFileStream.cpp62
-rw-r--r--moses/src/InputFileStream.h48
-rw-r--r--moses/src/InputType.cpp59
-rw-r--r--moses/src/InputType.h132
-rw-r--r--moses/src/LMList.cpp54
-rw-r--r--moses/src/LMList.h23
-rw-r--r--moses/src/LVoc.cpp7
-rw-r--r--moses/src/LVoc.h68
-rw-r--r--moses/src/LanguageModel.cpp191
-rw-r--r--moses/src/LanguageModel.h146
-rw-r--r--moses/src/LanguageModelFactory.cpp151
-rw-r--r--moses/src/LanguageModelFactory.h34
-rw-r--r--moses/src/LanguageModelIRST.cpp236
-rw-r--r--moses/src/LanguageModelIRST.h88
-rw-r--r--moses/src/LanguageModelInternal.cpp272
-rw-r--r--moses/src/LanguageModelInternal.h41
-rw-r--r--moses/src/LanguageModelJoint.cpp22
-rw-r--r--moses/src/LanguageModelJoint.h133
-rw-r--r--moses/src/LanguageModelMultiFactor.cpp56
-rw-r--r--moses/src/LanguageModelMultiFactor.h60
-rw-r--r--moses/src/LanguageModelRandLM.cpp114
-rw-r--r--moses/src/LanguageModelRandLM.h67
-rw-r--r--moses/src/LanguageModelRemote.cpp139
-rw-r--r--moses/src/LanguageModelRemote.h43
-rw-r--r--moses/src/LanguageModelSRI.cpp174
-rw-r--r--moses/src/LanguageModelSRI.h65
-rw-r--r--moses/src/LanguageModelSingleFactor.cpp60
-rw-r--r--moses/src/LanguageModelSingleFactor.h87
-rw-r--r--moses/src/LanguageModelSkip.cpp22
-rw-r--r--moses/src/LanguageModelSkip.h129
-rw-r--r--moses/src/LexicalReordering.cpp269
-rw-r--r--moses/src/LexicalReordering.h159
-rw-r--r--moses/src/LexicalReorderingTable.cpp686
-rw-r--r--moses/src/LexicalReorderingTable.h158
-rw-r--r--moses/src/Makefile.am232
-rw-r--r--moses/src/Manager.cpp859
-rw-r--r--moses/src/Manager.h141
-rw-r--r--moses/src/NGramCollection.cpp67
-rw-r--r--moses/src/NGramCollection.h57
-rw-r--r--moses/src/NGramNode.cpp26
-rw-r--r--moses/src/NGramNode.h79
-rw-r--r--moses/src/ObjectPool.h127
-rw-r--r--moses/src/PCNTools.cpp138
-rw-r--r--moses/src/PCNTools.h46
-rw-r--r--moses/src/PDTAimp.h546
-rw-r--r--moses/src/Parameter.cpp592
-rw-r--r--moses/src/Parameter.h81
-rw-r--r--moses/src/PartialTranslOptColl.cpp104
-rw-r--r--moses/src/PartialTranslOptColl.h85
-rw-r--r--moses/src/Phrase.cpp387
-rw-r--r--moses/src/Phrase.h178
-rw-r--r--moses/src/PhraseDictionary.cpp145
-rw-r--r--moses/src/PhraseDictionary.h133
-rw-r--r--moses/src/PhraseDictionaryDynSuffixArray.cpp494
-rw-r--r--moses/src/PhraseDictionaryDynSuffixArray.h125
-rw-r--r--moses/src/PhraseDictionaryMemory.cpp222
-rw-r--r--moses/src/PhraseDictionaryMemory.h72
-rw-r--r--moses/src/PhraseDictionaryNode.cpp94
-rw-r--r--moses/src/PhraseDictionaryNode.h85
-rw-r--r--moses/src/PhraseDictionaryTree.cpp715
-rw-r--r--moses/src/PhraseDictionaryTree.h134
-rw-r--r--moses/src/PhraseDictionaryTreeAdaptor.cpp126
-rw-r--r--moses/src/PhraseDictionaryTreeAdaptor.h77
-rw-r--r--moses/src/PrefixTree.h277
-rw-r--r--moses/src/PrefixTreeMap.cpp219
-rw-r--r--moses/src/PrefixTreeMap.h137
-rw-r--r--moses/src/ReorderingConstraint.cpp249
-rw-r--r--moses/src/ReorderingConstraint.h97
-rw-r--r--moses/src/ScoreComponentCollection.cpp15
-rw-r--r--moses/src/ScoreComponentCollection.h207
-rw-r--r--moses/src/ScoreIndexManager.cpp148
-rw-r--r--moses/src/ScoreIndexManager.h70
-rw-r--r--moses/src/ScoreProducer.cpp21
-rw-r--r--moses/src/ScoreProducer.h64
-rw-r--r--moses/src/Search.cpp32
-rw-r--r--moses/src/Search.h40
-rw-r--r--moses/src/SearchCubePruning.cpp347
-rw-r--r--moses/src/SearchCubePruning.h47
-rw-r--r--moses/src/SearchNormal.cpp388
-rw-r--r--moses/src/SearchNormal.h49
-rw-r--r--moses/src/Sentence.cpp200
-rw-r--r--moses/src/Sentence.h104
-rw-r--r--moses/src/SentenceStats.cpp51
-rw-r--r--moses/src/SentenceStats.h172
-rw-r--r--moses/src/SquareMatrix.cpp128
-rw-r--r--moses/src/SquareMatrix.h88
-rw-r--r--moses/src/StaticData.cpp1138
-rw-r--r--moses/src/StaticData.h535
-rw-r--r--moses/src/TargetPhrase.cpp224
-rw-r--r--moses/src/TargetPhrase.h161
-rw-r--r--moses/src/TargetPhraseCollection.cpp49
-rw-r--r--moses/src/TargetPhraseCollection.h76
-rw-r--r--moses/src/Timer.cpp113
-rw-r--r--moses/src/Timer.h40
-rw-r--r--moses/src/TranslationOption.cpp175
-rw-r--r--moses/src/TranslationOption.h190
-rw-r--r--moses/src/TranslationOptionCollection.cpp655
-rw-r--r--moses/src/TranslationOptionCollection.h154
-rw-r--r--moses/src/TranslationOptionCollectionConfusionNet.cpp38
-rw-r--r--moses/src/TranslationOptionCollectionConfusionNet.h21
-rw-r--r--moses/src/TranslationOptionCollectionText.cpp77
-rw-r--r--moses/src/TranslationOptionCollectionText.h48
-rw-r--r--moses/src/TranslationOptionList.cpp26
-rw-r--r--moses/src/TranslationOptionList.h56
-rw-r--r--moses/src/TrellisPath.cpp231
-rw-r--r--moses/src/TrellisPath.h116
-rw-r--r--moses/src/TrellisPathCollection.cpp34
-rw-r--r--moses/src/TrellisPathCollection.h96
-rw-r--r--moses/src/TrellisPathList.h76
-rw-r--r--moses/src/TypeDef.h194
-rw-r--r--moses/src/UniqueObject.h55
-rw-r--r--moses/src/UserMessage.cpp65
-rw-r--r--moses/src/UserMessage.h55
-rw-r--r--moses/src/Util.cpp184
-rw-r--r--moses/src/Util.h300
-rw-r--r--moses/src/Word.cpp109
-rw-r--r--moses/src/Word.h123
-rw-r--r--moses/src/WordLattice.cpp169
-rw-r--r--moses/src/WordLattice.h36
-rw-r--r--moses/src/WordsBitmap.cpp64
-rw-r--r--moses/src/WordsBitmap.h254
-rw-r--r--moses/src/WordsRange.cpp34
-rw-r--r--moses/src/WordsRange.h95
-rw-r--r--moses/src/XmlOption.cpp406
-rw-r--r--moses/src/XmlOption.h32
-rw-r--r--moses/src/gzfilebuf.h81
-rw-r--r--moses/src/hash.cpp60
-rw-r--r--moses/src/hash.h8
-rw-r--r--moses/src/hypergraph.proto30
-rw-r--r--moses/src/rule.proto10
192 files changed, 29267 insertions, 0 deletions
diff --git a/moses/.cdtbuild b/moses/.cdtbuild
new file mode 100644
index 000000000..62ea215c4
--- /dev/null
+++ b/moses/.cdtbuild
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?fileVersion 3.1.0?>
+
+<ManagedProjectBuildInfo>
+<project id="moses.cdt.managedbuild.target.gnu.lib.1654550987" name="Static Library (Gnu)" projectType="cdt.managedbuild.target.gnu.lib">
+<configuration artifactExtension="a" artifactName="moses" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.debug.2082569407" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
+<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.2031285777" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.debug">
+<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.1319234555" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.1022475428" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug">
+<option id="gnu.cpp.compiler.option.preprocessor.def.2071633498" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+<listOptionValue builtIn="false" value="LM_SRI"/>
+<listOptionValue builtIn="false" value="LM_IRST"/>
+<listOptionValue builtIn="false" value="LM_INTERNAL"/>
+<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+<listOptionValue builtIn="false" value="_FILE_OFFSET_BITS=64"/>
+<listOptionValue builtIn="false" value="_LARGE_FILES"/>
+</option>
+<option id="gnu.cpp.compiler.option.include.paths.560695438" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../irstlm/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/misc/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/dstruct/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/include&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/lm/src&quot;"/>
+</option>
+<option id="gnu.cpp.compiler.option.debugging.gprof.1598624147" superClass="gnu.cpp.compiler.option.debugging.gprof" value="false" valueType="boolean"/>
+<option id="gnu.cpp.compiler.option.warnings.allwarn.1855070516" superClass="gnu.cpp.compiler.option.warnings.allwarn" value="false" valueType="boolean"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.archiver.lib.debug.187676627" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.debug"/>
+<tool id="cdt.managedbuild.tool.gnu.assembler.lib.debug.1660142337" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.debug"/>
+<macros/>
+</toolChain>
+</configuration>
+<configuration artifactExtension="a" artifactName="moses" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.release.875756117" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
+<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.1956672127" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.release">
+<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.release.1930771681" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.release"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.703142952" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release">
+<option id="gnu.cpp.compiler.option.include.paths.28728819" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../irstlm/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/misc/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/dstruct/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/include&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/lm/src&quot;"/>
+</option>
+<option id="gnu.cpp.compiler.option.preprocessor.def.444585015" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+<listOptionValue builtIn="false" value="LM_SRI"/>
+<listOptionValue builtIn="false" value="LM_IRST"/>
+<listOptionValue builtIn="false" value="LM_INTERNAL"/>
+<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+<listOptionValue builtIn="false" value="_FILE_OFFSET_BITS=64"/>
+<listOptionValue builtIn="false" value="_LARGE_FILES"/>
+</option>
+<option id="gnu.cpp.compiler.option.debugging.gprof.1449614968" superClass="gnu.cpp.compiler.option.debugging.gprof" value="false" valueType="boolean"/>
+<option id="gnu.cpp.compiler.option.warnings.allwarn.1037990342" superClass="gnu.cpp.compiler.option.warnings.allwarn" value="true" valueType="boolean"/>
+<option id="gnu.cpp.compiler.option.warnings.pedantic.280967211" superClass="gnu.cpp.compiler.option.warnings.pedantic" value="false" valueType="boolean"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.archiver.lib.release.1731276863" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.release"/>
+<tool id="cdt.managedbuild.tool.gnu.assembler.lib.release.693340097" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.release"/>
+<macros/>
+</toolChain>
+</configuration>
+<macros/>
+</project>
+</ManagedProjectBuildInfo>
diff --git a/moses/.cdtproject b/moses/.cdtproject
new file mode 100644
index 000000000..420497012
--- /dev/null
+++ b/moses/.cdtproject
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?eclipse-cdt version="2.0"?>
+
+<cdtproject id="org.eclipse.cdt.managedbuilder.core.managedMake">
+<extension id="org.eclipse.cdt.managedbuilder.core.ManagedBuildManager" point="org.eclipse.cdt.core.ScannerInfoProvider"/>
+<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+<extension id="org.eclipse.cdt.core.nullindexer" point="org.eclipse.cdt.core.CIndexer"/>
+<data>
+<item id="org.eclipse.cdt.core.pathentry">
+<pathentry kind="out" path=""/>
+<pathentry kind="con" path="org.eclipse.cdt.managedbuilder.MANAGED_CONTAINER"/>
+</item>
+</data>
+</cdtproject>
diff --git a/moses/.cproject b/moses/.cproject
new file mode 100644
index 000000000..2ca509b9d
--- /dev/null
+++ b/moses/.cproject
@@ -0,0 +1,143 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?>
+
+<cproject>
+<storageModule moduleId="org.eclipse.cdt.core.settings">
+<cconfiguration id="cdt.managedbuild.config.gnu.lib.debug.2082569407">
+<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.lib.debug.2082569407" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+<externalSettings>
+<externalSetting>
+<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses"/>
+<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/moses/Debug"/>
+</externalSetting>
+</externalSettings>
+<extensions>
+<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+<extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+</extensions>
+</storageModule>
+<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+<configuration artifactExtension="a" artifactName="moses" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.debug.2082569407" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
+<folderInfo id="cdt.managedbuild.config.gnu.lib.debug.2082569407.280491634" name="/" resourcePath="">
+<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.2031285777" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.debug">
+<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.debug.157058940" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.debug"/>
+<builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.lib.debug.188556568" keepEnvironmentInBuildfile="false" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.lib.debug"/>
+<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.1319234555" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug">
+<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.lib.debug.option.optimization.level.1576819994" name="Optimization Level" superClass="gnu.c.compiler.lib.debug.option.optimization.level" valueType="enumerated"/>
+<option id="gnu.c.compiler.lib.debug.option.debugging.level.532786472" name="Debug Level" superClass="gnu.c.compiler.lib.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1713267187" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.1022475428" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug">
+<option id="gnu.cpp.compiler.option.preprocessor.def.2071633498" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+<listOptionValue builtIn="false" value="LM_SRI"/>
+<listOptionValue builtIn="false" value="LM_RAND"/>
+<listOptionValue builtIn="false" value="LM_IRST"/>
+<listOptionValue builtIn="false" value="LM_INTERNAL"/>
+<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+<listOptionValue builtIn="false" value="_FILE_OFFSET_BITS=64"/>
+<listOptionValue builtIn="false" value="_LARGE_FILES"/>
+</option>
+<option id="gnu.cpp.compiler.option.include.paths.560695438" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../irstlm/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../randlm/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/misc/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/dstruct/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/include&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/lm/src&quot;"/>
+</option>
+<option id="gnu.cpp.compiler.option.debugging.gprof.1598624147" name="Generate gprof information (-pg)" superClass="gnu.cpp.compiler.option.debugging.gprof" value="false" valueType="boolean"/>
+<option id="gnu.cpp.compiler.option.warnings.allwarn.1855070516" name="All warnings (-Wall)" superClass="gnu.cpp.compiler.option.warnings.allwarn" value="false" valueType="boolean"/>
+<option id="gnu.cpp.compiler.lib.debug.option.optimization.level.1543054613" name="Optimization Level" superClass="gnu.cpp.compiler.lib.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+<option id="gnu.cpp.compiler.lib.debug.option.debugging.level.1229946089" name="Debug Level" superClass="gnu.cpp.compiler.lib.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1215038936" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.archiver.lib.debug.187676627" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.debug"/>
+<tool id="cdt.managedbuild.tool.gnu.assembler.lib.debug.1660142337" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.debug">
+<inputType id="cdt.managedbuild.tool.gnu.assembler.input.142461973" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.c.linker.base.546140613" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.base"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.linker.base.149153117" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.base"/>
+</toolChain>
+</folderInfo>
+</configuration>
+</storageModule>
+<storageModule moduleId="org.eclipse.cdt.core.language.mapping"/>
+<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
+<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+<storageModule moduleId="scannerConfiguration"/>
+</cconfiguration>
+<cconfiguration id="cdt.managedbuild.config.gnu.lib.release.875756117">
+<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.lib.release.875756117" moduleId="org.eclipse.cdt.core.settings" name="Release">
+<externalSettings>
+<externalSetting>
+<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses"/>
+<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/moses/Release"/>
+</externalSetting>
+</externalSettings>
+<extensions>
+<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+<extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+</extensions>
+</storageModule>
+<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+<configuration artifactExtension="a" artifactName="moses" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.release.875756117" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
+<folderInfo id="cdt.managedbuild.config.gnu.lib.release.875756117.719008845" name="/" resourcePath="">
+<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.1956672127" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.release">
+<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.release.1093853083" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.release"/>
+<builder buildPath="${workspace_loc:/moses/Release}" id="cdt.managedbuild.target.gnu.builder.lib.release.484433680" keepEnvironmentInBuildfile="false" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.lib.release"/>
+<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.release.1930771681" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.release">
+<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.lib.release.option.optimization.level.1063318824" name="Optimization Level" superClass="gnu.c.compiler.lib.release.option.optimization.level" valueType="enumerated"/>
+<option id="gnu.c.compiler.lib.release.option.debugging.level.672202361" name="Debug Level" superClass="gnu.c.compiler.lib.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.70231818" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.703142952" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release">
+<option id="gnu.cpp.compiler.option.include.paths.28728819" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../irstlm/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../randlm/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/misc/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/dstruct/src&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/include&quot;"/>
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../srilm/lm/src&quot;"/>
+</option>
+<option id="gnu.cpp.compiler.option.preprocessor.def.444585015" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+<listOptionValue builtIn="false" value="LM_SRI"/>
+<listOptionValue builtIn="false" value="LM_RAND"/>
+<listOptionValue builtIn="false" value="LM_IRST"/>
+<listOptionValue builtIn="false" value="LM_INTERNAL"/>
+<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+<listOptionValue builtIn="false" value="_FILE_OFFSET_BITS=64"/>
+<listOptionValue builtIn="false" value="_LARGE_FILES"/>
+</option>
+<option id="gnu.cpp.compiler.option.debugging.gprof.1449614968" name="Generate gprof information (-pg)" superClass="gnu.cpp.compiler.option.debugging.gprof" value="false" valueType="boolean"/>
+<option id="gnu.cpp.compiler.option.warnings.allwarn.1037990342" name="All warnings (-Wall)" superClass="gnu.cpp.compiler.option.warnings.allwarn" value="true" valueType="boolean"/>
+<option id="gnu.cpp.compiler.option.warnings.pedantic.280967211" name="Pedantic (-pedantic)" superClass="gnu.cpp.compiler.option.warnings.pedantic" value="false" valueType="boolean"/>
+<option id="gnu.cpp.compiler.lib.release.option.optimization.level.481705900" name="Optimization Level" superClass="gnu.cpp.compiler.lib.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+<option id="gnu.cpp.compiler.lib.release.option.debugging.level.1978909896" name="Debug Level" superClass="gnu.cpp.compiler.lib.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1037157329" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.archiver.lib.release.1731276863" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.release"/>
+<tool id="cdt.managedbuild.tool.gnu.assembler.lib.release.693340097" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.release">
+<inputType id="cdt.managedbuild.tool.gnu.assembler.input.384778575" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.c.linker.base.461032414" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.base"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.linker.base.1057349768" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.base"/>
+</toolChain>
+</folderInfo>
+</configuration>
+</storageModule>
+<storageModule moduleId="org.eclipse.cdt.core.language.mapping"/>
+<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
+<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+<storageModule moduleId="scannerConfiguration"/>
+</cconfiguration>
+</storageModule>
+<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+<project id="moses.cdt.managedbuild.target.gnu.lib.1654550987" name="Static Library (Gnu)" projectType="cdt.managedbuild.target.gnu.lib"/>
+</storageModule>
+</cproject>
diff --git a/moses/.project b/moses/.project
new file mode 100644
index 000000000..23b858747
--- /dev/null
+++ b/moses/.project
@@ -0,0 +1,82 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>moses</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+ <triggers>clean,full,incremental,</triggers>
+ <arguments>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
+ <value>clean</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.enableCleanBuild</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>?name?</key>
+ <value></value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.append_environment</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.stopOnError</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.buildCommand</key>
+ <value>make</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.contents</key>
+ <value>org.eclipse.cdt.make.core.activeConfigSettings</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.buildLocation</key>
+ <value>${workspace_loc:/moses/Debug}</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.enableAutoBuild</key>
+ <value>false</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.enableFullBuild</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.buildArguments</key>
+ <value></value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.fullBuildTarget</key>
+ <value>all</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.autoBuildTarget</key>
+ <value>all</value>
+ </dictionary>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.cdt.core.ccnature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+ <nature>org.eclipse.cdt.core.cnature</nature>
+ </natures>
+</projectDescription>
diff --git a/moses/.settings/org.eclipse.cdt.core.prefs b/moses/.settings/org.eclipse.cdt.core.prefs
new file mode 100644
index 000000000..f472b6eca
--- /dev/null
+++ b/moses/.settings/org.eclipse.cdt.core.prefs
@@ -0,0 +1,3 @@
+#Thu Oct 30 19:29:53 GMT 2008
+eclipse.preferences.version=1
+indexerId=org.eclipse.cdt.core.nullindexer
diff --git a/moses/.settings/org.eclipse.cdt.managedbuilder.core.prefs b/moses/.settings/org.eclipse.cdt.managedbuilder.core.prefs
new file mode 100644
index 000000000..e86307e17
--- /dev/null
+++ b/moses/.settings/org.eclipse.cdt.managedbuilder.core.prefs
@@ -0,0 +1,21 @@
+#Tue Nov 14 22:56:27 GMT 2006
+cdt.managedbuild.config.gnu.lib.debug.2082569407/internalBuilder/enabled=false
+cdt.managedbuild.config.gnu.lib.debug.2082569407/internalBuilder/ignoreErr=true
+cdt.managedbuild.config.gnu.lib.release.875756117/internalBuilder/enabled=false
+cdt.managedbuild.config.gnu.lib.release.875756117/internalBuilder/ignoreErr=true
+eclipse.preferences.version=1
+environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.exe.debug.182985892=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
+environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.exe.debug.454331295=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
+environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.exe.release.1765997310=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
+environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.exe.release.659132175=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
+environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.debug.2082569407=<?xml version\="1.0" encoding\="UTF-8"?>\r\n<environment>\r\n<variable name\="CPATH" operation\="remove" value\=""/>\r\n<variable name\="C_INCLUDE_PATH" operation\="remove"/>\r\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove" value\=""/>\r\n</environment>\r\n
+environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.debug.837708408=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
+environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.release.1054653020=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
+environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.release.875756117=<?xml version\="1.0" encoding\="UTF-8"?>\r\n<environment>\r\n<variable name\="CPATH" operation\="remove" value\=""/>\r\n<variable name\="C_INCLUDE_PATH" operation\="remove"/>\r\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove" value\=""/>\r\n</environment>\r\n
+environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.exe.debug.182985892=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="LIBRARY_PATH" operation\="remove"/>\n</environment>\n
+environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.exe.debug.454331295=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="LIBRARY_PATH" operation\="remove"/>\n</environment>\n
+environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.exe.release.1765997310=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="LIBRARY_PATH" operation\="remove"/>\n</environment>\n
+environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.exe.release.659132175=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="LIBRARY_PATH" operation\="remove"/>\n</environment>\n
+environment/project=<?xml version\="1.0" encoding\="UTF-8"?>\r\n<environment/>\r\n
+environment/project/cdt.managedbuild.config.gnu.lib.debug.2082569407=<?xml version\="1.0" encoding\="UTF-8"?>\r\n<environment/>\r\n
+environment/project/cdt.managedbuild.config.gnu.lib.release.875756117=<?xml version\="1.0" encoding\="UTF-8"?>\r\n<environment/>\r\n
diff --git a/moses/moses.vcproj b/moses/moses.vcproj
new file mode 100644
index 000000000..e259f9e4b
--- /dev/null
+++ b/moses/moses.vcproj
@@ -0,0 +1,1026 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="9.00"
+ Name="moses"
+ ProjectGUID="{8122157A-0DE5-44FF-8E5B-024ED6ACE7AF}"
+ RootNamespace="moses"
+ Keyword="Win32Proj"
+ TargetFrameworkVersion="131072"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ </Platforms>
+ <ToolFiles>
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="4"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)..\mysqlpp\lib&quot;;&quot;$(SolutionDir)..\irstlm\src&quot;;&quot;$(SolutionDir)..\srilm\src&quot;"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="3"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="4"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="$(OutDir)\$(ProjectName).lib"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="4"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ InlineFunctionExpansion="2"
+ EnableIntrinsicFunctions="true"
+ FavorSizeOrSpeed="1"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)..\mysqlpp\lib&quot;;&quot;$(SolutionDir)..\irstlm\src&quot;;&quot;$(SolutionDir)..\srilm\src&quot;"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
+ RuntimeLibrary="2"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="3"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="$(OutDir)\$(ProjectName).lib"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release-withSRILM|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="4"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ InlineFunctionExpansion="2"
+ EnableIntrinsicFunctions="true"
+ FavorSizeOrSpeed="1"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)..\mysqlpp\lib&quot;;&quot;$(SolutionDir)..\irstlm\src&quot;;&quot;$(SolutionDir)..\srilm\src&quot;"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;LM_INTERNAL;LM_SRI;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
+ RuntimeLibrary="0"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="3"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="$(OutDir)\$(ProjectName).lib"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug-withSRILM|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="4"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)..\mysqlpp\lib&quot;;&quot;$(SolutionDir)..\irstlm\src&quot;;&quot;$(SolutionDir)..\srilm\src&quot;"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;LM_INTERNAL;LM_SRI;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="4"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="$(OutDir)\$(ProjectName).lib"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Source Files"
+ Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+ UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+ >
+ <File
+ RelativePath=".\src\BitmapContainer.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ConfusionNet.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DecodeGraph.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DecodeStep.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DecodeStepGeneration.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DecodeStepTranslation.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Dictionary.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DummyScoreProducers.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Factor.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FactorCollection.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FactorTypeSet.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FeatureFunction.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FFState.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FloydWarshall.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\GenerationDictionary.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\GlobalLexicalModel.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\hash.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Hypothesis.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\HypothesisStack.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\HypothesisStackCubePruning.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\HypothesisStackNormal.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\InputFileStream.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\InputType.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModel.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelFactory.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelInternal.cpp"
+ >
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ ObjectFile="$(IntDir)\$(InputName)1.obj"
+ XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ ObjectFile="$(IntDir)\$(InputName)1.obj"
+ XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release-withSRILM|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ ObjectFile="$(IntDir)\$(InputName)1.obj"
+ XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug-withSRILM|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ ObjectFile="$(IntDir)\$(InputName)1.obj"
+ XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelJoint.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelMultiFactor.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelSingleFactor.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelSkip.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelSRI.cpp"
+ >
+ <FileConfiguration
+ Name="Debug|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\src\LexicalReordering.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LexicalReorderingTable.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LMList.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LVoc.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Manager.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\NGramCollection.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\NGramNode.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Parameter.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PartialTranslOptColl.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PCNTools.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Phrase.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionary.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionaryMemory.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionaryNode.cpp"
+ >
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ ObjectFile="$(IntDir)\$(InputName)1.obj"
+ XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ ObjectFile="$(IntDir)\$(InputName)1.obj"
+ XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release-withSRILM|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ ObjectFile="$(IntDir)\$(InputName)1.obj"
+ XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug-withSRILM|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ ObjectFile="$(IntDir)\$(InputName)1.obj"
+ XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionaryTree.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionaryTreeAdaptor.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PrefixTreeMap.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ReorderingConstraint.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ScoreComponentCollection.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ScoreIndexManager.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ScoreProducer.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Search.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\SearchCubePruning.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\SearchNormal.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Sentence.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\SentenceStats.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\SquareMatrix.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\StaticData.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TargetPhrase.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TargetPhraseCollection.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Timer.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOption.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOptionCollection.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOptionCollectionConfusionNet.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOptionCollectionText.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOptionList.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TrellisPath.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TrellisPathCollection.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\UserMessage.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Util.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Word.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\WordLattice.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\WordsBitmap.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\WordsRange.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\src\XmlOption.cpp"
+ >
+ </File>
+ </Filter>
+ <Filter
+ Name="Header Files"
+ Filter="h;hpp;hxx;hm;inl;inc;xsd"
+ UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+ >
+ <File
+ RelativePath=".\src\BitmapContainer.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ConfusionNet.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DecodeGraph.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DecodeStep.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DecodeStepGeneration.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DecodeStepTranslation.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Dictionary.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\DummyScoreProducers.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Factor.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FactorCollection.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FactorTypeSet.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FeatureFunction.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FFState.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\File.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\FilePtr.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\GenerationDictionary.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\GlobalLexicalModel.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\gzfilebuf.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\hash.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Hypothesis.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\HypothesisStack.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\HypothesisStackCubePruning.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\HypothesisStackNormal.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\InputFileStream.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\InputType.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModel.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelFactory.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelInternal.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelJoint.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelMultiFactor.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelSingleFactor.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelSkip.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LanguageModelSRI.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LexicalReordering.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LexicalReorderingTable.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LMList.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\LVoc.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Manager.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\NGramCollection.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\NGramNode.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ObjectPool.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Parameter.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PartialTranslOptColl.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PCNTools.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PDTAimp.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Phrase.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionary.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionaryMemory.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionaryNode.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionaryTree.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PhraseDictionaryTreeAdaptor.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PrefixTree.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\PrefixTreeMap.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ReorderingConstraint.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ScoreComponentCollection.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ScoreIndexManager.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\ScoreProducer.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Search.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\SearchCubePruning.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\SearchNormal.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Sentence.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\SentenceStats.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\SquareMatrix.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\StaticData.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TargetPhrase.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TargetPhraseCollection.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Timer.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOption.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOptionCollection.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOptionCollectionConfusionNet.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOptionCollectionText.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TranslationOptionList.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TrellisPath.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TrellisPathCollection.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TrellisPathList.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\TypeDef.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\UserMessage.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Util.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\Word.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\WordLattice.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\WordsBitmap.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\WordsRange.h"
+ >
+ </File>
+ <File
+ RelativePath=".\src\XmlOption.h"
+ >
+ </File>
+ </Filter>
+ <Filter
+ Name="Resource Files"
+ Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+ UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+ >
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
diff --git a/moses/moses.xcodeproj/project.pbxproj b/moses/moses.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..b2cf38dfd
--- /dev/null
+++ b/moses/moses.xcodeproj/project.pbxproj
@@ -0,0 +1,913 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 42;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 037C639A0C8EBFB400584F2E /* DecodeGraph.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 037C63980C8EBFB400584F2E /* DecodeGraph.cpp */; };
+ 037C639B0C8EBFB400584F2E /* DecodeGraph.h in Headers */ = {isa = PBXBuildFile; fileRef = 037C63990C8EBFB400584F2E /* DecodeGraph.h */; };
+ 0396E1A70C0B189200D95CFF /* HypothesisStack.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0396E1960C0B189200D95CFF /* HypothesisStack.cpp */; };
+ 0396E1A80C0B189200D95CFF /* HypothesisStack.h in Headers */ = {isa = PBXBuildFile; fileRef = 0396E1970C0B189200D95CFF /* HypothesisStack.h */; };
+ 0396E1A90C0B189200D95CFF /* LexicalReorderingTable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0396E1980C0B189200D95CFF /* LexicalReorderingTable.cpp */; };
+ 0396E1AA0C0B189200D95CFF /* LexicalReorderingTable.h in Headers */ = {isa = PBXBuildFile; fileRef = 0396E1990C0B189200D95CFF /* LexicalReorderingTable.h */; };
+ 0396E1AB0C0B189200D95CFF /* LVoc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0396E19A0C0B189200D95CFF /* LVoc.cpp */; };
+ 0396E1AC0C0B189200D95CFF /* LVoc.h in Headers */ = {isa = PBXBuildFile; fileRef = 0396E19B0C0B189200D95CFF /* LVoc.h */; };
+ 0396E1AD0C0B189200D95CFF /* PCNTools.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0396E19C0C0B189200D95CFF /* PCNTools.cpp */; };
+ 0396E1AE0C0B189200D95CFF /* PCNTools.h in Headers */ = {isa = PBXBuildFile; fileRef = 0396E19D0C0B189200D95CFF /* PCNTools.h */; };
+ 0396E1AF0C0B189200D95CFF /* PrefixTreeMap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0396E19E0C0B189200D95CFF /* PrefixTreeMap.cpp */; };
+ 0396E1B00C0B189200D95CFF /* PrefixTreeMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 0396E19F0C0B189200D95CFF /* PrefixTreeMap.h */; };
+ 0396E1B10C0B189200D95CFF /* TrellisPath.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0396E1A00C0B189200D95CFF /* TrellisPath.cpp */; };
+ 0396E1B20C0B189200D95CFF /* TrellisPath.h in Headers */ = {isa = PBXBuildFile; fileRef = 0396E1A10C0B189200D95CFF /* TrellisPath.h */; };
+ 0396E1B30C0B189200D95CFF /* TrellisPathCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0396E1A20C0B189200D95CFF /* TrellisPathCollection.cpp */; };
+ 0396E1B40C0B189200D95CFF /* TrellisPathCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 0396E1A30C0B189200D95CFF /* TrellisPathCollection.h */; };
+ 0396E1B50C0B189200D95CFF /* TrellisPathList.h in Headers */ = {isa = PBXBuildFile; fileRef = 0396E1A40C0B189200D95CFF /* TrellisPathList.h */; };
+ 0396E1B60C0B189200D95CFF /* WordLattice.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0396E1A50C0B189200D95CFF /* WordLattice.cpp */; };
+ 0396E1B70C0B189200D95CFF /* WordLattice.h in Headers */ = {isa = PBXBuildFile; fileRef = 0396E1A60C0B189200D95CFF /* WordLattice.h */; };
+ 1C5009BF0FB9E09700DFD24F /* FeatureFunction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C5009BB0FB9E09700DFD24F /* FeatureFunction.cpp */; };
+ 1C5009C00FB9E09700DFD24F /* FeatureFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C5009BC0FB9E09700DFD24F /* FeatureFunction.h */; };
+ 1C5009C10FB9E09700DFD24F /* FFState.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C5009BD0FB9E09700DFD24F /* FFState.cpp */; };
+ 1C5009C20FB9E09700DFD24F /* FFState.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C5009BE0FB9E09700DFD24F /* FFState.h */; };
+ 1C8CFE8F0AD67A9700FA22E2 /* ConfusionNet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE120AD67A9600FA22E2 /* ConfusionNet.cpp */; };
+ 1C8CFE900AD67A9700FA22E2 /* ConfusionNet.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE130AD67A9600FA22E2 /* ConfusionNet.h */; };
+ 1C8CFE910AD67A9700FA22E2 /* DecodeStep.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE140AD67A9600FA22E2 /* DecodeStep.cpp */; };
+ 1C8CFE920AD67A9700FA22E2 /* DecodeStep.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE150AD67A9600FA22E2 /* DecodeStep.h */; };
+ 1C8CFE930AD67A9700FA22E2 /* DecodeStepGeneration.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE160AD67A9600FA22E2 /* DecodeStepGeneration.cpp */; };
+ 1C8CFE940AD67A9700FA22E2 /* DecodeStepGeneration.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE170AD67A9600FA22E2 /* DecodeStepGeneration.h */; };
+ 1C8CFE950AD67A9700FA22E2 /* DecodeStepTranslation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE180AD67A9600FA22E2 /* DecodeStepTranslation.cpp */; };
+ 1C8CFE960AD67A9700FA22E2 /* DecodeStepTranslation.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE190AD67A9700FA22E2 /* DecodeStepTranslation.h */; };
+ 1C8CFE970AD67A9700FA22E2 /* Dictionary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE1A0AD67A9700FA22E2 /* Dictionary.cpp */; };
+ 1C8CFE980AD67A9700FA22E2 /* Dictionary.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE1B0AD67A9700FA22E2 /* Dictionary.h */; };
+ 1C8CFE990AD67A9700FA22E2 /* DummyScoreProducers.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE1C0AD67A9700FA22E2 /* DummyScoreProducers.cpp */; };
+ 1C8CFE9A0AD67A9700FA22E2 /* DummyScoreProducers.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE1D0AD67A9700FA22E2 /* DummyScoreProducers.h */; };
+ 1C8CFE9B0AD67A9700FA22E2 /* Factor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE1E0AD67A9700FA22E2 /* Factor.cpp */; };
+ 1C8CFE9C0AD67A9700FA22E2 /* Factor.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE1F0AD67A9700FA22E2 /* Factor.h */; };
+ 1C8CFE9D0AD67A9700FA22E2 /* FactorCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE200AD67A9700FA22E2 /* FactorCollection.cpp */; };
+ 1C8CFE9E0AD67A9700FA22E2 /* FactorCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE210AD67A9700FA22E2 /* FactorCollection.h */; };
+ 1C8CFE9F0AD67A9700FA22E2 /* FactorTypeSet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE220AD67A9700FA22E2 /* FactorTypeSet.cpp */; };
+ 1C8CFEA00AD67A9700FA22E2 /* FactorTypeSet.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE230AD67A9700FA22E2 /* FactorTypeSet.h */; };
+ 1C8CFEA10AD67A9700FA22E2 /* File.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE240AD67A9700FA22E2 /* File.h */; };
+ 1C8CFEA20AD67A9700FA22E2 /* FilePtr.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE250AD67A9700FA22E2 /* FilePtr.h */; };
+ 1C8CFEA30AD67A9700FA22E2 /* GenerationDictionary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE260AD67A9700FA22E2 /* GenerationDictionary.cpp */; };
+ 1C8CFEA40AD67A9700FA22E2 /* GenerationDictionary.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE270AD67A9700FA22E2 /* GenerationDictionary.h */; };
+ 1C8CFEA50AD67A9700FA22E2 /* gzfilebuf.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE280AD67A9700FA22E2 /* gzfilebuf.h */; };
+ 1C8CFEA60AD67A9700FA22E2 /* hash.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE290AD67A9700FA22E2 /* hash.cpp */; };
+ 1C8CFEA70AD67A9700FA22E2 /* hash.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE2A0AD67A9700FA22E2 /* hash.h */; };
+ 1C8CFEA80AD67A9700FA22E2 /* Hypothesis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE2B0AD67A9700FA22E2 /* Hypothesis.cpp */; };
+ 1C8CFEA90AD67A9700FA22E2 /* Hypothesis.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE2C0AD67A9700FA22E2 /* Hypothesis.h */; };
+ 1C8CFEAC0AD67A9700FA22E2 /* InputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE2F0AD67A9700FA22E2 /* InputFileStream.cpp */; };
+ 1C8CFEAD0AD67A9700FA22E2 /* InputFileStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE300AD67A9700FA22E2 /* InputFileStream.h */; };
+ 1C8CFEB00AD67A9700FA22E2 /* InputType.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE330AD67A9700FA22E2 /* InputType.cpp */; };
+ 1C8CFEB10AD67A9700FA22E2 /* InputType.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE340AD67A9700FA22E2 /* InputType.h */; };
+ 1C8CFEB20AD67A9700FA22E2 /* LanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE350AD67A9700FA22E2 /* LanguageModel.cpp */; };
+ 1C8CFEB30AD67A9700FA22E2 /* LanguageModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE360AD67A9700FA22E2 /* LanguageModel.h */; };
+ 1C8CFEB60AD67A9700FA22E2 /* LanguageModelFactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE390AD67A9700FA22E2 /* LanguageModelFactory.cpp */; };
+ 1C8CFEB70AD67A9700FA22E2 /* LanguageModelFactory.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE3A0AD67A9700FA22E2 /* LanguageModelFactory.h */; };
+ 1C8CFEBA0AD67A9700FA22E2 /* LanguageModelJoint.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE3D0AD67A9700FA22E2 /* LanguageModelJoint.cpp */; };
+ 1C8CFEBB0AD67A9700FA22E2 /* LanguageModelJoint.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE3E0AD67A9700FA22E2 /* LanguageModelJoint.h */; };
+ 1C8CFEBC0AD67A9700FA22E2 /* LanguageModelMultiFactor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE3F0AD67A9700FA22E2 /* LanguageModelMultiFactor.cpp */; };
+ 1C8CFEBD0AD67A9700FA22E2 /* LanguageModelMultiFactor.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE400AD67A9700FA22E2 /* LanguageModelMultiFactor.h */; };
+ 1C8CFEBE0AD67A9700FA22E2 /* LanguageModelSingleFactor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE410AD67A9700FA22E2 /* LanguageModelSingleFactor.cpp */; };
+ 1C8CFEBF0AD67A9700FA22E2 /* LanguageModelSingleFactor.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE420AD67A9700FA22E2 /* LanguageModelSingleFactor.h */; };
+ 1C8CFEC00AD67A9700FA22E2 /* LanguageModelSRI.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE430AD67A9700FA22E2 /* LanguageModelSRI.cpp */; };
+ 1C8CFEC10AD67A9700FA22E2 /* LanguageModelSRI.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE440AD67A9700FA22E2 /* LanguageModelSRI.h */; };
+ 1C8CFEC60AD67A9700FA22E2 /* LexicalReordering.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE490AD67A9700FA22E2 /* LexicalReordering.cpp */; };
+ 1C8CFEC70AD67A9700FA22E2 /* LexicalReordering.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE4A0AD67A9700FA22E2 /* LexicalReordering.h */; };
+ 1C8CFEC80AD67A9700FA22E2 /* LMList.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE4B0AD67A9700FA22E2 /* LMList.cpp */; };
+ 1C8CFEC90AD67A9700FA22E2 /* LMList.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE4C0AD67A9700FA22E2 /* LMList.h */; };
+ 1C8CFECA0AD67A9700FA22E2 /* Manager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE4D0AD67A9700FA22E2 /* Manager.cpp */; };
+ 1C8CFECB0AD67A9700FA22E2 /* Manager.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE4E0AD67A9700FA22E2 /* Manager.h */; };
+ 1C8CFED00AD67A9700FA22E2 /* ObjectPool.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE530AD67A9700FA22E2 /* ObjectPool.h */; };
+ 1C8CFED10AD67A9700FA22E2 /* Parameter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE540AD67A9700FA22E2 /* Parameter.cpp */; };
+ 1C8CFED20AD67A9700FA22E2 /* Parameter.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE550AD67A9700FA22E2 /* Parameter.h */; };
+ 1C8CFED30AD67A9700FA22E2 /* PartialTranslOptColl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE560AD67A9700FA22E2 /* PartialTranslOptColl.cpp */; };
+ 1C8CFED40AD67A9700FA22E2 /* PartialTranslOptColl.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE570AD67A9700FA22E2 /* PartialTranslOptColl.h */; };
+ 1C8CFED50AD67A9700FA22E2 /* PDTAimp.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE580AD67A9700FA22E2 /* PDTAimp.h */; };
+ 1C8CFED60AD67A9700FA22E2 /* Phrase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE590AD67A9700FA22E2 /* Phrase.cpp */; };
+ 1C8CFED70AD67A9700FA22E2 /* Phrase.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE5A0AD67A9700FA22E2 /* Phrase.h */; };
+ 1C8CFED80AD67A9700FA22E2 /* PhraseDictionary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE5B0AD67A9700FA22E2 /* PhraseDictionary.cpp */; };
+ 1C8CFED90AD67A9700FA22E2 /* PhraseDictionary.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE5C0AD67A9700FA22E2 /* PhraseDictionary.h */; };
+ 1C8CFEDC0AD67A9700FA22E2 /* PhraseDictionaryNode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE5F0AD67A9700FA22E2 /* PhraseDictionaryNode.cpp */; };
+ 1C8CFEDD0AD67A9700FA22E2 /* PhraseDictionaryNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE600AD67A9700FA22E2 /* PhraseDictionaryNode.h */; };
+ 1C8CFEDE0AD67A9700FA22E2 /* PhraseDictionaryTree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE610AD67A9700FA22E2 /* PhraseDictionaryTree.cpp */; };
+ 1C8CFEDF0AD67A9700FA22E2 /* PhraseDictionaryTree.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE620AD67A9700FA22E2 /* PhraseDictionaryTree.h */; };
+ 1C8CFEE00AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE630AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.cpp */; };
+ 1C8CFEE10AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE640AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.h */; };
+ 1C8CFEE40AD67A9700FA22E2 /* PrefixTree.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE670AD67A9700FA22E2 /* PrefixTree.h */; };
+ 1C8CFEE50AD67A9700FA22E2 /* ScoreComponentCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE680AD67A9700FA22E2 /* ScoreComponentCollection.cpp */; };
+ 1C8CFEE60AD67A9700FA22E2 /* ScoreComponentCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE690AD67A9700FA22E2 /* ScoreComponentCollection.h */; };
+ 1C8CFEE70AD67A9700FA22E2 /* ScoreIndexManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE6A0AD67A9700FA22E2 /* ScoreIndexManager.cpp */; };
+ 1C8CFEE80AD67A9700FA22E2 /* ScoreIndexManager.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE6B0AD67A9700FA22E2 /* ScoreIndexManager.h */; };
+ 1C8CFEE90AD67A9700FA22E2 /* ScoreProducer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE6C0AD67A9700FA22E2 /* ScoreProducer.cpp */; };
+ 1C8CFEEA0AD67A9700FA22E2 /* ScoreProducer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE6D0AD67A9700FA22E2 /* ScoreProducer.h */; };
+ 1C8CFEEB0AD67A9700FA22E2 /* Sentence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE6E0AD67A9700FA22E2 /* Sentence.cpp */; };
+ 1C8CFEEC0AD67A9700FA22E2 /* Sentence.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE6F0AD67A9700FA22E2 /* Sentence.h */; };
+ 1C8CFEED0AD67A9700FA22E2 /* SentenceStats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE700AD67A9700FA22E2 /* SentenceStats.cpp */; };
+ 1C8CFEEE0AD67A9700FA22E2 /* SentenceStats.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE710AD67A9700FA22E2 /* SentenceStats.h */; };
+ 1C8CFEEF0AD67A9700FA22E2 /* SquareMatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE720AD67A9700FA22E2 /* SquareMatrix.cpp */; };
+ 1C8CFEF00AD67A9700FA22E2 /* SquareMatrix.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE730AD67A9700FA22E2 /* SquareMatrix.h */; };
+ 1C8CFEF10AD67A9700FA22E2 /* StaticData.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE740AD67A9700FA22E2 /* StaticData.cpp */; };
+ 1C8CFEF20AD67A9700FA22E2 /* StaticData.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE750AD67A9700FA22E2 /* StaticData.h */; };
+ 1C8CFEF30AD67A9700FA22E2 /* TargetPhrase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE760AD67A9700FA22E2 /* TargetPhrase.cpp */; };
+ 1C8CFEF40AD67A9700FA22E2 /* TargetPhrase.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE770AD67A9700FA22E2 /* TargetPhrase.h */; };
+ 1C8CFEF50AD67A9700FA22E2 /* TargetPhraseCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE780AD67A9700FA22E2 /* TargetPhraseCollection.cpp */; };
+ 1C8CFEF60AD67A9700FA22E2 /* TargetPhraseCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE790AD67A9700FA22E2 /* TargetPhraseCollection.h */; };
+ 1C8CFEF70AD67A9700FA22E2 /* Timer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE7A0AD67A9700FA22E2 /* Timer.h */; };
+ 1C8CFEF80AD67A9700FA22E2 /* TranslationOption.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE7B0AD67A9700FA22E2 /* TranslationOption.cpp */; };
+ 1C8CFEF90AD67A9700FA22E2 /* TranslationOption.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE7C0AD67A9700FA22E2 /* TranslationOption.h */; };
+ 1C8CFEFA0AD67A9700FA22E2 /* TranslationOptionCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE7D0AD67A9700FA22E2 /* TranslationOptionCollection.cpp */; };
+ 1C8CFEFB0AD67A9700FA22E2 /* TranslationOptionCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE7E0AD67A9700FA22E2 /* TranslationOptionCollection.h */; };
+ 1C8CFEFC0AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE7F0AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.cpp */; };
+ 1C8CFEFD0AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE800AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.h */; };
+ 1C8CFEFE0AD67A9700FA22E2 /* TranslationOptionCollectionText.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE810AD67A9700FA22E2 /* TranslationOptionCollectionText.cpp */; };
+ 1C8CFEFF0AD67A9700FA22E2 /* TranslationOptionCollectionText.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE820AD67A9700FA22E2 /* TranslationOptionCollectionText.h */; };
+ 1C8CFF000AD67A9700FA22E2 /* TypeDef.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE830AD67A9700FA22E2 /* TypeDef.h */; };
+ 1C8CFF010AD67A9700FA22E2 /* UniqueObject.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE840AD67A9700FA22E2 /* UniqueObject.h */; };
+ 1C8CFF020AD67A9700FA22E2 /* UserMessage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE850AD67A9700FA22E2 /* UserMessage.cpp */; };
+ 1C8CFF030AD67A9700FA22E2 /* UserMessage.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE860AD67A9700FA22E2 /* UserMessage.h */; };
+ 1C8CFF040AD67A9700FA22E2 /* Util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE870AD67A9700FA22E2 /* Util.cpp */; };
+ 1C8CFF050AD67A9700FA22E2 /* Util.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE880AD67A9700FA22E2 /* Util.h */; };
+ 1C8CFF060AD67A9700FA22E2 /* Word.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE890AD67A9700FA22E2 /* Word.cpp */; };
+ 1C8CFF070AD67A9700FA22E2 /* Word.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE8A0AD67A9700FA22E2 /* Word.h */; };
+ 1C8CFF080AD67A9700FA22E2 /* WordsBitmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE8B0AD67A9700FA22E2 /* WordsBitmap.cpp */; };
+ 1C8CFF090AD67A9700FA22E2 /* WordsBitmap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE8C0AD67A9700FA22E2 /* WordsBitmap.h */; };
+ 1C8CFF0A0AD67A9700FA22E2 /* WordsRange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE8D0AD67A9700FA22E2 /* WordsRange.cpp */; };
+ 1C8CFF0B0AD67A9700FA22E2 /* WordsRange.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE8E0AD67A9700FA22E2 /* WordsRange.h */; };
+ 1CB459EA0FD2DFEC000030BE /* GlobalLexicalModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1CB459E80FD2DFEC000030BE /* GlobalLexicalModel.cpp */; };
+ 1CB459EB0FD2DFEC000030BE /* GlobalLexicalModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 1CB459E90FD2DFEC000030BE /* GlobalLexicalModel.h */; };
+ 1CCE5B10114E60A500F79AD5 /* DPR_reordering.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1CCE5B0E114E60A500F79AD5 /* DPR_reordering.cpp */; };
+ 1CCE5B11114E60A500F79AD5 /* DPR_reordering.h in Headers */ = {isa = PBXBuildFile; fileRef = 1CCE5B0F114E60A500F79AD5 /* DPR_reordering.h */; };
+ B219B8690E93836100EAB407 /* Timer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B219B8680E93836100EAB407 /* Timer.cpp */; };
+ B23821380EB73DCB007303C3 /* LanguageModelIRST.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B23821360EB73DCB007303C3 /* LanguageModelIRST.cpp */; };
+ B23821390EB73DCB007303C3 /* LanguageModelIRST.h in Headers */ = {isa = PBXBuildFile; fileRef = B23821370EB73DCB007303C3 /* LanguageModelIRST.h */; };
+ B2639DEA0EF199D400A67519 /* ReorderingConstraint.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B2639DE60EF199D400A67519 /* ReorderingConstraint.cpp */; };
+ B2639DEB0EF199D400A67519 /* ReorderingConstraint.h in Headers */ = {isa = PBXBuildFile; fileRef = B2639DE70EF199D400A67519 /* ReorderingConstraint.h */; };
+ B2639DEC0EF199D400A67519 /* TranslationOptionList.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B2639DE80EF199D400A67519 /* TranslationOptionList.cpp */; };
+ B2639DED0EF199D400A67519 /* TranslationOptionList.h in Headers */ = {isa = PBXBuildFile; fileRef = B2639DE90EF199D400A67519 /* TranslationOptionList.h */; };
+ D39BA82D0AFBB7090089AE6A /* LanguageModelSkip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D39BA8270AFBB7090089AE6A /* LanguageModelSkip.cpp */; };
+ D39BA82E0AFBB7090089AE6A /* LanguageModelSkip.h in Headers */ = {isa = PBXBuildFile; fileRef = D39BA8280AFBB7090089AE6A /* LanguageModelSkip.h */; };
+ D39BA82F0AFBB7090089AE6A /* PhraseDictionaryMemory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D39BA8290AFBB7090089AE6A /* PhraseDictionaryMemory.cpp */; };
+ D39BA8300AFBB7090089AE6A /* PhraseDictionaryMemory.h in Headers */ = {isa = PBXBuildFile; fileRef = D39BA82A0AFBB7090089AE6A /* PhraseDictionaryMemory.h */; };
+ D39BA8480B11FF0C0089AE6A /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D39BA8400B11FF0C0089AE6A /* File.cpp */; };
+ D39BA8490B11FF0C0089AE6A /* LanguageModelInternal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D39BA8410B11FF0C0089AE6A /* LanguageModelInternal.cpp */; };
+ D39BA84A0B11FF0C0089AE6A /* LanguageModelInternal.h in Headers */ = {isa = PBXBuildFile; fileRef = D39BA8420B11FF0C0089AE6A /* LanguageModelInternal.h */; };
+ D39BA84B0B11FF0C0089AE6A /* NGramCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D39BA8440B11FF0C0089AE6A /* NGramCollection.cpp */; };
+ D39BA84C0B11FF0C0089AE6A /* NGramCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = D39BA8450B11FF0C0089AE6A /* NGramCollection.h */; };
+ D39BA84D0B11FF0C0089AE6A /* NGramNode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D39BA8460B11FF0C0089AE6A /* NGramNode.cpp */; };
+ D39BA84E0B11FF0C0089AE6A /* NGramNode.h in Headers */ = {isa = PBXBuildFile; fileRef = D39BA8470B11FF0C0089AE6A /* NGramNode.h */; };
+ E21C110E0DFEE86B00ADAED0 /* HypothesisStackCubePruning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E21C110A0DFEE86B00ADAED0 /* HypothesisStackCubePruning.cpp */; };
+ E21C110F0DFEE86B00ADAED0 /* HypothesisStackCubePruning.h in Headers */ = {isa = PBXBuildFile; fileRef = E21C110B0DFEE86B00ADAED0 /* HypothesisStackCubePruning.h */; };
+ E21C11100DFEE86B00ADAED0 /* HypothesisStackNormal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E21C110C0DFEE86B00ADAED0 /* HypothesisStackNormal.cpp */; };
+ E21C11110DFEE86B00ADAED0 /* HypothesisStackNormal.h in Headers */ = {isa = PBXBuildFile; fileRef = E21C110D0DFEE86B00ADAED0 /* HypothesisStackNormal.h */; };
+ E21C11180DFEE88800ADAED0 /* Search.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E21C11120DFEE88800ADAED0 /* Search.cpp */; };
+ E21C11190DFEE88800ADAED0 /* Search.h in Headers */ = {isa = PBXBuildFile; fileRef = E21C11130DFEE88800ADAED0 /* Search.h */; };
+ E21C111A0DFEE88800ADAED0 /* SearchCubePruning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E21C11140DFEE88800ADAED0 /* SearchCubePruning.cpp */; };
+ E21C111B0DFEE88800ADAED0 /* SearchCubePruning.h in Headers */ = {isa = PBXBuildFile; fileRef = E21C11150DFEE88800ADAED0 /* SearchCubePruning.h */; };
+ E21C111C0DFEE88800ADAED0 /* SearchNormal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E21C11160DFEE88800ADAED0 /* SearchNormal.cpp */; };
+ E21C111D0DFEE88800ADAED0 /* SearchNormal.h in Headers */ = {isa = PBXBuildFile; fileRef = E21C11170DFEE88800ADAED0 /* SearchNormal.h */; };
+ E2B7C8F20DDA19190089EFE0 /* BitmapContainer.h in Headers */ = {isa = PBXBuildFile; fileRef = E2B7C8F00DDA19190089EFE0 /* BitmapContainer.h */; };
+ E2B7C9590DDB1AEF0089EFE0 /* BitmapContainer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E2B7C9580DDB1AEF0089EFE0 /* BitmapContainer.cpp */; };
+ E2B7CA720DDB3B5C0089EFE0 /* FloydWarshall.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E2B7CA700DDB3B5C0089EFE0 /* FloydWarshall.cpp */; };
+ E2B7CA730DDB3B5C0089EFE0 /* FloydWarshall.h in Headers */ = {isa = PBXBuildFile; fileRef = E2B7CA710DDB3B5C0089EFE0 /* FloydWarshall.h */; };
+ E2B7CA760DDB3B700089EFE0 /* XmlOption.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E2B7CA740DDB3B700089EFE0 /* XmlOption.cpp */; };
+ E2B7CA770DDB3B700089EFE0 /* XmlOption.h in Headers */ = {isa = PBXBuildFile; fileRef = E2B7CA750DDB3B700089EFE0 /* XmlOption.h */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+ 037C63980C8EBFB400584F2E /* DecodeGraph.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = DecodeGraph.cpp; path = src/DecodeGraph.cpp; sourceTree = "<group>"; };
+ 037C63990C8EBFB400584F2E /* DecodeGraph.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = DecodeGraph.h; path = src/DecodeGraph.h; sourceTree = "<group>"; };
+ 0396E1960C0B189200D95CFF /* HypothesisStack.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = HypothesisStack.cpp; path = src/HypothesisStack.cpp; sourceTree = "<group>"; };
+ 0396E1970C0B189200D95CFF /* HypothesisStack.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = HypothesisStack.h; path = src/HypothesisStack.h; sourceTree = "<group>"; };
+ 0396E1980C0B189200D95CFF /* LexicalReorderingTable.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LexicalReorderingTable.cpp; path = src/LexicalReorderingTable.cpp; sourceTree = "<group>"; };
+ 0396E1990C0B189200D95CFF /* LexicalReorderingTable.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LexicalReorderingTable.h; path = src/LexicalReorderingTable.h; sourceTree = "<group>"; };
+ 0396E19A0C0B189200D95CFF /* LVoc.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LVoc.cpp; path = src/LVoc.cpp; sourceTree = "<group>"; };
+ 0396E19B0C0B189200D95CFF /* LVoc.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LVoc.h; path = src/LVoc.h; sourceTree = "<group>"; };
+ 0396E19C0C0B189200D95CFF /* PCNTools.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PCNTools.cpp; path = src/PCNTools.cpp; sourceTree = "<group>"; };
+ 0396E19D0C0B189200D95CFF /* PCNTools.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PCNTools.h; path = src/PCNTools.h; sourceTree = "<group>"; };
+ 0396E19E0C0B189200D95CFF /* PrefixTreeMap.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PrefixTreeMap.cpp; path = src/PrefixTreeMap.cpp; sourceTree = "<group>"; };
+ 0396E19F0C0B189200D95CFF /* PrefixTreeMap.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PrefixTreeMap.h; path = src/PrefixTreeMap.h; sourceTree = "<group>"; };
+ 0396E1A00C0B189200D95CFF /* TrellisPath.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TrellisPath.cpp; path = src/TrellisPath.cpp; sourceTree = "<group>"; };
+ 0396E1A10C0B189200D95CFF /* TrellisPath.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TrellisPath.h; path = src/TrellisPath.h; sourceTree = "<group>"; };
+ 0396E1A20C0B189200D95CFF /* TrellisPathCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TrellisPathCollection.cpp; path = src/TrellisPathCollection.cpp; sourceTree = "<group>"; };
+ 0396E1A30C0B189200D95CFF /* TrellisPathCollection.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TrellisPathCollection.h; path = src/TrellisPathCollection.h; sourceTree = "<group>"; };
+ 0396E1A40C0B189200D95CFF /* TrellisPathList.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TrellisPathList.h; path = src/TrellisPathList.h; sourceTree = "<group>"; };
+ 0396E1A50C0B189200D95CFF /* WordLattice.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = WordLattice.cpp; path = src/WordLattice.cpp; sourceTree = "<group>"; };
+ 0396E1A60C0B189200D95CFF /* WordLattice.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = WordLattice.h; path = src/WordLattice.h; sourceTree = "<group>"; };
+ 1C5009BB0FB9E09700DFD24F /* FeatureFunction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FeatureFunction.cpp; path = src/FeatureFunction.cpp; sourceTree = "<group>"; };
+ 1C5009BC0FB9E09700DFD24F /* FeatureFunction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FeatureFunction.h; path = src/FeatureFunction.h; sourceTree = "<group>"; };
+ 1C5009BD0FB9E09700DFD24F /* FFState.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FFState.cpp; path = src/FFState.cpp; sourceTree = "<group>"; };
+ 1C5009BE0FB9E09700DFD24F /* FFState.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FFState.h; path = src/FFState.h; sourceTree = "<group>"; };
+ 1C8CFE120AD67A9600FA22E2 /* ConfusionNet.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ConfusionNet.cpp; path = src/ConfusionNet.cpp; sourceTree = "<group>"; };
+ 1C8CFE130AD67A9600FA22E2 /* ConfusionNet.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ConfusionNet.h; path = src/ConfusionNet.h; sourceTree = "<group>"; };
+ 1C8CFE140AD67A9600FA22E2 /* DecodeStep.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = DecodeStep.cpp; path = src/DecodeStep.cpp; sourceTree = "<group>"; };
+ 1C8CFE150AD67A9600FA22E2 /* DecodeStep.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = DecodeStep.h; path = src/DecodeStep.h; sourceTree = "<group>"; };
+ 1C8CFE160AD67A9600FA22E2 /* DecodeStepGeneration.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = DecodeStepGeneration.cpp; path = src/DecodeStepGeneration.cpp; sourceTree = "<group>"; };
+ 1C8CFE170AD67A9600FA22E2 /* DecodeStepGeneration.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = DecodeStepGeneration.h; path = src/DecodeStepGeneration.h; sourceTree = "<group>"; };
+ 1C8CFE180AD67A9600FA22E2 /* DecodeStepTranslation.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = DecodeStepTranslation.cpp; path = src/DecodeStepTranslation.cpp; sourceTree = "<group>"; };
+ 1C8CFE190AD67A9700FA22E2 /* DecodeStepTranslation.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = DecodeStepTranslation.h; path = src/DecodeStepTranslation.h; sourceTree = "<group>"; };
+ 1C8CFE1A0AD67A9700FA22E2 /* Dictionary.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Dictionary.cpp; path = src/Dictionary.cpp; sourceTree = "<group>"; };
+ 1C8CFE1B0AD67A9700FA22E2 /* Dictionary.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Dictionary.h; path = src/Dictionary.h; sourceTree = "<group>"; };
+ 1C8CFE1C0AD67A9700FA22E2 /* DummyScoreProducers.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = DummyScoreProducers.cpp; path = src/DummyScoreProducers.cpp; sourceTree = "<group>"; };
+ 1C8CFE1D0AD67A9700FA22E2 /* DummyScoreProducers.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = DummyScoreProducers.h; path = src/DummyScoreProducers.h; sourceTree = "<group>"; };
+ 1C8CFE1E0AD67A9700FA22E2 /* Factor.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Factor.cpp; path = src/Factor.cpp; sourceTree = "<group>"; };
+ 1C8CFE1F0AD67A9700FA22E2 /* Factor.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Factor.h; path = src/Factor.h; sourceTree = "<group>"; };
+ 1C8CFE200AD67A9700FA22E2 /* FactorCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = FactorCollection.cpp; path = src/FactorCollection.cpp; sourceTree = "<group>"; };
+ 1C8CFE210AD67A9700FA22E2 /* FactorCollection.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = FactorCollection.h; path = src/FactorCollection.h; sourceTree = "<group>"; };
+ 1C8CFE220AD67A9700FA22E2 /* FactorTypeSet.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = FactorTypeSet.cpp; path = src/FactorTypeSet.cpp; sourceTree = "<group>"; };
+ 1C8CFE230AD67A9700FA22E2 /* FactorTypeSet.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = FactorTypeSet.h; path = src/FactorTypeSet.h; sourceTree = "<group>"; };
+ 1C8CFE240AD67A9700FA22E2 /* File.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = File.h; path = src/File.h; sourceTree = "<group>"; };
+ 1C8CFE250AD67A9700FA22E2 /* FilePtr.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = FilePtr.h; path = src/FilePtr.h; sourceTree = "<group>"; };
+ 1C8CFE260AD67A9700FA22E2 /* GenerationDictionary.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = GenerationDictionary.cpp; path = src/GenerationDictionary.cpp; sourceTree = "<group>"; };
+ 1C8CFE270AD67A9700FA22E2 /* GenerationDictionary.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = GenerationDictionary.h; path = src/GenerationDictionary.h; sourceTree = "<group>"; };
+ 1C8CFE280AD67A9700FA22E2 /* gzfilebuf.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = gzfilebuf.h; path = src/gzfilebuf.h; sourceTree = "<group>"; };
+ 1C8CFE290AD67A9700FA22E2 /* hash.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = hash.cpp; path = src/hash.cpp; sourceTree = "<group>"; };
+ 1C8CFE2A0AD67A9700FA22E2 /* hash.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = hash.h; path = src/hash.h; sourceTree = "<group>"; };
+ 1C8CFE2B0AD67A9700FA22E2 /* Hypothesis.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Hypothesis.cpp; path = src/Hypothesis.cpp; sourceTree = "<group>"; };
+ 1C8CFE2C0AD67A9700FA22E2 /* Hypothesis.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Hypothesis.h; path = src/Hypothesis.h; sourceTree = "<group>"; };
+ 1C8CFE2F0AD67A9700FA22E2 /* InputFileStream.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = InputFileStream.cpp; path = src/InputFileStream.cpp; sourceTree = "<group>"; };
+ 1C8CFE300AD67A9700FA22E2 /* InputFileStream.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = InputFileStream.h; path = src/InputFileStream.h; sourceTree = "<group>"; };
+ 1C8CFE330AD67A9700FA22E2 /* InputType.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = InputType.cpp; path = src/InputType.cpp; sourceTree = "<group>"; };
+ 1C8CFE340AD67A9700FA22E2 /* InputType.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = InputType.h; path = src/InputType.h; sourceTree = "<group>"; };
+ 1C8CFE350AD67A9700FA22E2 /* LanguageModel.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModel.cpp; path = src/LanguageModel.cpp; sourceTree = "<group>"; };
+ 1C8CFE360AD67A9700FA22E2 /* LanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LanguageModel.h; path = src/LanguageModel.h; sourceTree = "<group>"; };
+ 1C8CFE390AD67A9700FA22E2 /* LanguageModelFactory.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelFactory.cpp; path = src/LanguageModelFactory.cpp; sourceTree = "<group>"; };
+ 1C8CFE3A0AD67A9700FA22E2 /* LanguageModelFactory.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LanguageModelFactory.h; path = src/LanguageModelFactory.h; sourceTree = "<group>"; };
+ 1C8CFE3D0AD67A9700FA22E2 /* LanguageModelJoint.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelJoint.cpp; path = src/LanguageModelJoint.cpp; sourceTree = "<group>"; };
+ 1C8CFE3E0AD67A9700FA22E2 /* LanguageModelJoint.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LanguageModelJoint.h; path = src/LanguageModelJoint.h; sourceTree = "<group>"; };
+ 1C8CFE3F0AD67A9700FA22E2 /* LanguageModelMultiFactor.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelMultiFactor.cpp; path = src/LanguageModelMultiFactor.cpp; sourceTree = "<group>"; };
+ 1C8CFE400AD67A9700FA22E2 /* LanguageModelMultiFactor.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LanguageModelMultiFactor.h; path = src/LanguageModelMultiFactor.h; sourceTree = "<group>"; };
+ 1C8CFE410AD67A9700FA22E2 /* LanguageModelSingleFactor.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelSingleFactor.cpp; path = src/LanguageModelSingleFactor.cpp; sourceTree = "<group>"; };
+ 1C8CFE420AD67A9700FA22E2 /* LanguageModelSingleFactor.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LanguageModelSingleFactor.h; path = src/LanguageModelSingleFactor.h; sourceTree = "<group>"; };
+ 1C8CFE430AD67A9700FA22E2 /* LanguageModelSRI.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelSRI.cpp; path = src/LanguageModelSRI.cpp; sourceTree = "<group>"; };
+ 1C8CFE440AD67A9700FA22E2 /* LanguageModelSRI.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LanguageModelSRI.h; path = src/LanguageModelSRI.h; sourceTree = "<group>"; };
+ 1C8CFE490AD67A9700FA22E2 /* LexicalReordering.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LexicalReordering.cpp; path = src/LexicalReordering.cpp; sourceTree = "<group>"; };
+ 1C8CFE4A0AD67A9700FA22E2 /* LexicalReordering.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LexicalReordering.h; path = src/LexicalReordering.h; sourceTree = "<group>"; };
+ 1C8CFE4B0AD67A9700FA22E2 /* LMList.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LMList.cpp; path = src/LMList.cpp; sourceTree = "<group>"; };
+ 1C8CFE4C0AD67A9700FA22E2 /* LMList.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LMList.h; path = src/LMList.h; sourceTree = "<group>"; };
+ 1C8CFE4D0AD67A9700FA22E2 /* Manager.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Manager.cpp; path = src/Manager.cpp; sourceTree = "<group>"; };
+ 1C8CFE4E0AD67A9700FA22E2 /* Manager.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Manager.h; path = src/Manager.h; sourceTree = "<group>"; };
+ 1C8CFE530AD67A9700FA22E2 /* ObjectPool.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ObjectPool.h; path = src/ObjectPool.h; sourceTree = "<group>"; };
+ 1C8CFE540AD67A9700FA22E2 /* Parameter.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Parameter.cpp; path = src/Parameter.cpp; sourceTree = "<group>"; };
+ 1C8CFE550AD67A9700FA22E2 /* Parameter.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Parameter.h; path = src/Parameter.h; sourceTree = "<group>"; };
+ 1C8CFE560AD67A9700FA22E2 /* PartialTranslOptColl.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PartialTranslOptColl.cpp; path = src/PartialTranslOptColl.cpp; sourceTree = "<group>"; };
+ 1C8CFE570AD67A9700FA22E2 /* PartialTranslOptColl.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PartialTranslOptColl.h; path = src/PartialTranslOptColl.h; sourceTree = "<group>"; };
+ 1C8CFE580AD67A9700FA22E2 /* PDTAimp.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PDTAimp.h; path = src/PDTAimp.h; sourceTree = "<group>"; };
+ 1C8CFE590AD67A9700FA22E2 /* Phrase.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Phrase.cpp; path = src/Phrase.cpp; sourceTree = "<group>"; };
+ 1C8CFE5A0AD67A9700FA22E2 /* Phrase.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Phrase.h; path = src/Phrase.h; sourceTree = "<group>"; };
+ 1C8CFE5B0AD67A9700FA22E2 /* PhraseDictionary.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionary.cpp; path = src/PhraseDictionary.cpp; sourceTree = "<group>"; };
+ 1C8CFE5C0AD67A9700FA22E2 /* PhraseDictionary.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PhraseDictionary.h; path = src/PhraseDictionary.h; sourceTree = "<group>"; };
+ 1C8CFE5F0AD67A9700FA22E2 /* PhraseDictionaryNode.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryNode.cpp; path = src/PhraseDictionaryNode.cpp; sourceTree = "<group>"; };
+ 1C8CFE600AD67A9700FA22E2 /* PhraseDictionaryNode.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryNode.h; path = src/PhraseDictionaryNode.h; sourceTree = "<group>"; };
+ 1C8CFE610AD67A9700FA22E2 /* PhraseDictionaryTree.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryTree.cpp; path = src/PhraseDictionaryTree.cpp; sourceTree = "<group>"; };
+ 1C8CFE620AD67A9700FA22E2 /* PhraseDictionaryTree.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryTree.h; path = src/PhraseDictionaryTree.h; sourceTree = "<group>"; };
+ 1C8CFE630AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryTreeAdaptor.cpp; path = src/PhraseDictionaryTreeAdaptor.cpp; sourceTree = "<group>"; };
+ 1C8CFE640AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryTreeAdaptor.h; path = src/PhraseDictionaryTreeAdaptor.h; sourceTree = "<group>"; };
+ 1C8CFE670AD67A9700FA22E2 /* PrefixTree.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PrefixTree.h; path = src/PrefixTree.h; sourceTree = "<group>"; };
+ 1C8CFE680AD67A9700FA22E2 /* ScoreComponentCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ScoreComponentCollection.cpp; path = src/ScoreComponentCollection.cpp; sourceTree = "<group>"; };
+ 1C8CFE690AD67A9700FA22E2 /* ScoreComponentCollection.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ScoreComponentCollection.h; path = src/ScoreComponentCollection.h; sourceTree = "<group>"; };
+ 1C8CFE6A0AD67A9700FA22E2 /* ScoreIndexManager.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ScoreIndexManager.cpp; path = src/ScoreIndexManager.cpp; sourceTree = "<group>"; };
+ 1C8CFE6B0AD67A9700FA22E2 /* ScoreIndexManager.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ScoreIndexManager.h; path = src/ScoreIndexManager.h; sourceTree = "<group>"; };
+ 1C8CFE6C0AD67A9700FA22E2 /* ScoreProducer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ScoreProducer.cpp; path = src/ScoreProducer.cpp; sourceTree = "<group>"; };
+ 1C8CFE6D0AD67A9700FA22E2 /* ScoreProducer.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ScoreProducer.h; path = src/ScoreProducer.h; sourceTree = "<group>"; };
+ 1C8CFE6E0AD67A9700FA22E2 /* Sentence.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Sentence.cpp; path = src/Sentence.cpp; sourceTree = "<group>"; };
+ 1C8CFE6F0AD67A9700FA22E2 /* Sentence.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Sentence.h; path = src/Sentence.h; sourceTree = "<group>"; };
+ 1C8CFE700AD67A9700FA22E2 /* SentenceStats.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SentenceStats.cpp; path = src/SentenceStats.cpp; sourceTree = "<group>"; };
+ 1C8CFE710AD67A9700FA22E2 /* SentenceStats.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = SentenceStats.h; path = src/SentenceStats.h; sourceTree = "<group>"; };
+ 1C8CFE720AD67A9700FA22E2 /* SquareMatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SquareMatrix.cpp; path = src/SquareMatrix.cpp; sourceTree = "<group>"; };
+ 1C8CFE730AD67A9700FA22E2 /* SquareMatrix.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = SquareMatrix.h; path = src/SquareMatrix.h; sourceTree = "<group>"; };
+ 1C8CFE740AD67A9700FA22E2 /* StaticData.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = StaticData.cpp; path = src/StaticData.cpp; sourceTree = "<group>"; };
+ 1C8CFE750AD67A9700FA22E2 /* StaticData.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = StaticData.h; path = src/StaticData.h; sourceTree = "<group>"; };
+ 1C8CFE760AD67A9700FA22E2 /* TargetPhrase.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TargetPhrase.cpp; path = src/TargetPhrase.cpp; sourceTree = "<group>"; };
+ 1C8CFE770AD67A9700FA22E2 /* TargetPhrase.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TargetPhrase.h; path = src/TargetPhrase.h; sourceTree = "<group>"; };
+ 1C8CFE780AD67A9700FA22E2 /* TargetPhraseCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TargetPhraseCollection.cpp; path = src/TargetPhraseCollection.cpp; sourceTree = "<group>"; };
+ 1C8CFE790AD67A9700FA22E2 /* TargetPhraseCollection.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TargetPhraseCollection.h; path = src/TargetPhraseCollection.h; sourceTree = "<group>"; };
+ 1C8CFE7A0AD67A9700FA22E2 /* Timer.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Timer.h; path = src/Timer.h; sourceTree = "<group>"; };
+ 1C8CFE7B0AD67A9700FA22E2 /* TranslationOption.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TranslationOption.cpp; path = src/TranslationOption.cpp; sourceTree = "<group>"; };
+ 1C8CFE7C0AD67A9700FA22E2 /* TranslationOption.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TranslationOption.h; path = src/TranslationOption.h; sourceTree = "<group>"; };
+ 1C8CFE7D0AD67A9700FA22E2 /* TranslationOptionCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TranslationOptionCollection.cpp; path = src/TranslationOptionCollection.cpp; sourceTree = "<group>"; };
+ 1C8CFE7E0AD67A9700FA22E2 /* TranslationOptionCollection.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TranslationOptionCollection.h; path = src/TranslationOptionCollection.h; sourceTree = "<group>"; };
+ 1C8CFE7F0AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TranslationOptionCollectionConfusionNet.cpp; path = src/TranslationOptionCollectionConfusionNet.cpp; sourceTree = "<group>"; };
+ 1C8CFE800AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TranslationOptionCollectionConfusionNet.h; path = src/TranslationOptionCollectionConfusionNet.h; sourceTree = "<group>"; };
+ 1C8CFE810AD67A9700FA22E2 /* TranslationOptionCollectionText.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TranslationOptionCollectionText.cpp; path = src/TranslationOptionCollectionText.cpp; sourceTree = "<group>"; };
+ 1C8CFE820AD67A9700FA22E2 /* TranslationOptionCollectionText.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TranslationOptionCollectionText.h; path = src/TranslationOptionCollectionText.h; sourceTree = "<group>"; };
+ 1C8CFE830AD67A9700FA22E2 /* TypeDef.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TypeDef.h; path = src/TypeDef.h; sourceTree = "<group>"; };
+ 1C8CFE840AD67A9700FA22E2 /* UniqueObject.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = UniqueObject.h; path = src/UniqueObject.h; sourceTree = "<group>"; };
+ 1C8CFE850AD67A9700FA22E2 /* UserMessage.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = UserMessage.cpp; path = src/UserMessage.cpp; sourceTree = "<group>"; };
+ 1C8CFE860AD67A9700FA22E2 /* UserMessage.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = UserMessage.h; path = src/UserMessage.h; sourceTree = "<group>"; };
+ 1C8CFE870AD67A9700FA22E2 /* Util.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Util.cpp; path = src/Util.cpp; sourceTree = "<group>"; };
+ 1C8CFE880AD67A9700FA22E2 /* Util.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Util.h; path = src/Util.h; sourceTree = "<group>"; };
+ 1C8CFE890AD67A9700FA22E2 /* Word.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Word.cpp; path = src/Word.cpp; sourceTree = "<group>"; };
+ 1C8CFE8A0AD67A9700FA22E2 /* Word.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Word.h; path = src/Word.h; sourceTree = "<group>"; };
+ 1C8CFE8B0AD67A9700FA22E2 /* WordsBitmap.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = WordsBitmap.cpp; path = src/WordsBitmap.cpp; sourceTree = "<group>"; };
+ 1C8CFE8C0AD67A9700FA22E2 /* WordsBitmap.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = WordsBitmap.h; path = src/WordsBitmap.h; sourceTree = "<group>"; };
+ 1C8CFE8D0AD67A9700FA22E2 /* WordsRange.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = WordsRange.cpp; path = src/WordsRange.cpp; sourceTree = "<group>"; };
+ 1C8CFE8E0AD67A9700FA22E2 /* WordsRange.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = WordsRange.h; path = src/WordsRange.h; sourceTree = "<group>"; };
+ 1CB459E80FD2DFEC000030BE /* GlobalLexicalModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = GlobalLexicalModel.cpp; path = src/GlobalLexicalModel.cpp; sourceTree = "<group>"; };
+ 1CB459E90FD2DFEC000030BE /* GlobalLexicalModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = GlobalLexicalModel.h; path = src/GlobalLexicalModel.h; sourceTree = "<group>"; };
+ 1CCE5B0E114E60A500F79AD5 /* DPR_reordering.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = DPR_reordering.cpp; path = src/DPR_reordering.cpp; sourceTree = "<group>"; };
+ 1CCE5B0F114E60A500F79AD5 /* DPR_reordering.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DPR_reordering.h; path = src/DPR_reordering.h; sourceTree = "<group>"; };
+ B219B8680E93836100EAB407 /* Timer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Timer.cpp; path = src/Timer.cpp; sourceTree = "<group>"; };
+ B23821360EB73DCB007303C3 /* LanguageModelIRST.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelIRST.cpp; path = src/LanguageModelIRST.cpp; sourceTree = "<group>"; };
+ B23821370EB73DCB007303C3 /* LanguageModelIRST.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LanguageModelIRST.h; path = src/LanguageModelIRST.h; sourceTree = "<group>"; };
+ B2639DE60EF199D400A67519 /* ReorderingConstraint.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ReorderingConstraint.cpp; path = src/ReorderingConstraint.cpp; sourceTree = "<group>"; };
+ B2639DE70EF199D400A67519 /* ReorderingConstraint.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ReorderingConstraint.h; path = src/ReorderingConstraint.h; sourceTree = "<group>"; };
+ B2639DE80EF199D400A67519 /* TranslationOptionList.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TranslationOptionList.cpp; path = src/TranslationOptionList.cpp; sourceTree = "<group>"; };
+ B2639DE90EF199D400A67519 /* TranslationOptionList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TranslationOptionList.h; path = src/TranslationOptionList.h; sourceTree = "<group>"; };
+ D2AAC046055464E500DB518D /* libmoses.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libmoses.a; sourceTree = BUILT_PRODUCTS_DIR; };
+ D39BA8270AFBB7090089AE6A /* LanguageModelSkip.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelSkip.cpp; path = src/LanguageModelSkip.cpp; sourceTree = "<group>"; };
+ D39BA8280AFBB7090089AE6A /* LanguageModelSkip.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LanguageModelSkip.h; path = src/LanguageModelSkip.h; sourceTree = "<group>"; };
+ D39BA8290AFBB7090089AE6A /* PhraseDictionaryMemory.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryMemory.cpp; path = src/PhraseDictionaryMemory.cpp; sourceTree = "<group>"; };
+ D39BA82A0AFBB7090089AE6A /* PhraseDictionaryMemory.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryMemory.h; path = src/PhraseDictionaryMemory.h; sourceTree = "<group>"; };
+ D39BA8400B11FF0C0089AE6A /* File.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = File.cpp; path = src/File.cpp; sourceTree = "<group>"; };
+ D39BA8410B11FF0C0089AE6A /* LanguageModelInternal.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelInternal.cpp; path = src/LanguageModelInternal.cpp; sourceTree = "<group>"; };
+ D39BA8420B11FF0C0089AE6A /* LanguageModelInternal.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = LanguageModelInternal.h; path = src/LanguageModelInternal.h; sourceTree = "<group>"; };
+ D39BA8430B11FF0C0089AE6A /* Makefile.am */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; name = Makefile.am; path = src/Makefile.am; sourceTree = "<group>"; };
+ D39BA8440B11FF0C0089AE6A /* NGramCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = NGramCollection.cpp; path = src/NGramCollection.cpp; sourceTree = "<group>"; };
+ D39BA8450B11FF0C0089AE6A /* NGramCollection.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = NGramCollection.h; path = src/NGramCollection.h; sourceTree = "<group>"; };
+ D39BA8460B11FF0C0089AE6A /* NGramNode.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = NGramNode.cpp; path = src/NGramNode.cpp; sourceTree = "<group>"; };
+ D39BA8470B11FF0C0089AE6A /* NGramNode.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = NGramNode.h; path = src/NGramNode.h; sourceTree = "<group>"; };
+ E21C110A0DFEE86B00ADAED0 /* HypothesisStackCubePruning.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = HypothesisStackCubePruning.cpp; path = src/HypothesisStackCubePruning.cpp; sourceTree = "<group>"; };
+ E21C110B0DFEE86B00ADAED0 /* HypothesisStackCubePruning.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = HypothesisStackCubePruning.h; path = src/HypothesisStackCubePruning.h; sourceTree = "<group>"; };
+ E21C110C0DFEE86B00ADAED0 /* HypothesisStackNormal.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = HypothesisStackNormal.cpp; path = src/HypothesisStackNormal.cpp; sourceTree = "<group>"; };
+ E21C110D0DFEE86B00ADAED0 /* HypothesisStackNormal.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = HypothesisStackNormal.h; path = src/HypothesisStackNormal.h; sourceTree = "<group>"; };
+ E21C11120DFEE88800ADAED0 /* Search.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Search.cpp; path = src/Search.cpp; sourceTree = "<group>"; };
+ E21C11130DFEE88800ADAED0 /* Search.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Search.h; path = src/Search.h; sourceTree = "<group>"; };
+ E21C11140DFEE88800ADAED0 /* SearchCubePruning.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SearchCubePruning.cpp; path = src/SearchCubePruning.cpp; sourceTree = "<group>"; };
+ E21C11150DFEE88800ADAED0 /* SearchCubePruning.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = SearchCubePruning.h; path = src/SearchCubePruning.h; sourceTree = "<group>"; };
+ E21C11160DFEE88800ADAED0 /* SearchNormal.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SearchNormal.cpp; path = src/SearchNormal.cpp; sourceTree = "<group>"; };
+ E21C11170DFEE88800ADAED0 /* SearchNormal.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = SearchNormal.h; path = src/SearchNormal.h; sourceTree = "<group>"; };
+ E2B7C8F00DDA19190089EFE0 /* BitmapContainer.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = BitmapContainer.h; path = src/BitmapContainer.h; sourceTree = "<group>"; };
+ E2B7C9580DDB1AEF0089EFE0 /* BitmapContainer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = BitmapContainer.cpp; path = src/BitmapContainer.cpp; sourceTree = "<group>"; };
+ E2B7CA700DDB3B5C0089EFE0 /* FloydWarshall.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = FloydWarshall.cpp; path = src/FloydWarshall.cpp; sourceTree = "<group>"; };
+ E2B7CA710DDB3B5C0089EFE0 /* FloydWarshall.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = FloydWarshall.h; path = src/FloydWarshall.h; sourceTree = "<group>"; };
+ E2B7CA740DDB3B700089EFE0 /* XmlOption.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = XmlOption.cpp; path = src/XmlOption.cpp; sourceTree = "<group>"; };
+ E2B7CA750DDB3B700089EFE0 /* XmlOption.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = XmlOption.h; path = src/XmlOption.h; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ D289987405E68DCB004EDB86 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 08FB7794FE84155DC02AAC07 /* moses */ = {
+ isa = PBXGroup;
+ children = (
+ 08FB7795FE84155DC02AAC07 /* Source */,
+ C6A0FF2B0290797F04C91782 /* Documentation */,
+ 1AB674ADFE9D54B511CA2CBB /* Products */,
+ );
+ name = moses;
+ sourceTree = "<group>";
+ };
+ 08FB7795FE84155DC02AAC07 /* Source */ = {
+ isa = PBXGroup;
+ children = (
+ 1CCE5B0E114E60A500F79AD5 /* DPR_reordering.cpp */,
+ 1CCE5B0F114E60A500F79AD5 /* DPR_reordering.h */,
+ E2B7C9580DDB1AEF0089EFE0 /* BitmapContainer.cpp */,
+ E2B7C8F00DDA19190089EFE0 /* BitmapContainer.h */,
+ 1C8CFE120AD67A9600FA22E2 /* ConfusionNet.cpp */,
+ 1C8CFE130AD67A9600FA22E2 /* ConfusionNet.h */,
+ 037C63980C8EBFB400584F2E /* DecodeGraph.cpp */,
+ 037C63990C8EBFB400584F2E /* DecodeGraph.h */,
+ 1C8CFE140AD67A9600FA22E2 /* DecodeStep.cpp */,
+ 1C8CFE150AD67A9600FA22E2 /* DecodeStep.h */,
+ 1C8CFE160AD67A9600FA22E2 /* DecodeStepGeneration.cpp */,
+ 1C8CFE170AD67A9600FA22E2 /* DecodeStepGeneration.h */,
+ 1C8CFE180AD67A9600FA22E2 /* DecodeStepTranslation.cpp */,
+ 1C8CFE190AD67A9700FA22E2 /* DecodeStepTranslation.h */,
+ 1C8CFE1A0AD67A9700FA22E2 /* Dictionary.cpp */,
+ 1C8CFE1B0AD67A9700FA22E2 /* Dictionary.h */,
+ 1C8CFE1C0AD67A9700FA22E2 /* DummyScoreProducers.cpp */,
+ 1C8CFE1D0AD67A9700FA22E2 /* DummyScoreProducers.h */,
+ 1C8CFE1E0AD67A9700FA22E2 /* Factor.cpp */,
+ 1C8CFE1F0AD67A9700FA22E2 /* Factor.h */,
+ 1C8CFE200AD67A9700FA22E2 /* FactorCollection.cpp */,
+ 1C8CFE210AD67A9700FA22E2 /* FactorCollection.h */,
+ 1C8CFE220AD67A9700FA22E2 /* FactorTypeSet.cpp */,
+ 1C8CFE230AD67A9700FA22E2 /* FactorTypeSet.h */,
+ 1C5009BB0FB9E09700DFD24F /* FeatureFunction.cpp */,
+ 1C5009BC0FB9E09700DFD24F /* FeatureFunction.h */,
+ 1C5009BD0FB9E09700DFD24F /* FFState.cpp */,
+ 1C5009BE0FB9E09700DFD24F /* FFState.h */,
+ D39BA8400B11FF0C0089AE6A /* File.cpp */,
+ 1C8CFE240AD67A9700FA22E2 /* File.h */,
+ 1C8CFE250AD67A9700FA22E2 /* FilePtr.h */,
+ E2B7CA700DDB3B5C0089EFE0 /* FloydWarshall.cpp */,
+ E2B7CA710DDB3B5C0089EFE0 /* FloydWarshall.h */,
+ 1C8CFE260AD67A9700FA22E2 /* GenerationDictionary.cpp */,
+ 1C8CFE270AD67A9700FA22E2 /* GenerationDictionary.h */,
+ 1CB459E80FD2DFEC000030BE /* GlobalLexicalModel.cpp */,
+ 1CB459E90FD2DFEC000030BE /* GlobalLexicalModel.h */,
+ 1C8CFE280AD67A9700FA22E2 /* gzfilebuf.h */,
+ 1C8CFE290AD67A9700FA22E2 /* hash.cpp */,
+ 1C8CFE2A0AD67A9700FA22E2 /* hash.h */,
+ 1C8CFE2B0AD67A9700FA22E2 /* Hypothesis.cpp */,
+ 1C8CFE2C0AD67A9700FA22E2 /* Hypothesis.h */,
+ 0396E1960C0B189200D95CFF /* HypothesisStack.cpp */,
+ 0396E1970C0B189200D95CFF /* HypothesisStack.h */,
+ E21C110A0DFEE86B00ADAED0 /* HypothesisStackCubePruning.cpp */,
+ E21C110B0DFEE86B00ADAED0 /* HypothesisStackCubePruning.h */,
+ E21C110C0DFEE86B00ADAED0 /* HypothesisStackNormal.cpp */,
+ E21C110D0DFEE86B00ADAED0 /* HypothesisStackNormal.h */,
+ 1C8CFE2F0AD67A9700FA22E2 /* InputFileStream.cpp */,
+ 1C8CFE300AD67A9700FA22E2 /* InputFileStream.h */,
+ 1C8CFE330AD67A9700FA22E2 /* InputType.cpp */,
+ 1C8CFE340AD67A9700FA22E2 /* InputType.h */,
+ 1C8CFE350AD67A9700FA22E2 /* LanguageModel.cpp */,
+ 1C8CFE360AD67A9700FA22E2 /* LanguageModel.h */,
+ 1C8CFE390AD67A9700FA22E2 /* LanguageModelFactory.cpp */,
+ 1C8CFE3A0AD67A9700FA22E2 /* LanguageModelFactory.h */,
+ D39BA8410B11FF0C0089AE6A /* LanguageModelInternal.cpp */,
+ D39BA8420B11FF0C0089AE6A /* LanguageModelInternal.h */,
+ B23821360EB73DCB007303C3 /* LanguageModelIRST.cpp */,
+ B23821370EB73DCB007303C3 /* LanguageModelIRST.h */,
+ 1C8CFE3D0AD67A9700FA22E2 /* LanguageModelJoint.cpp */,
+ 1C8CFE3E0AD67A9700FA22E2 /* LanguageModelJoint.h */,
+ 1C8CFE3F0AD67A9700FA22E2 /* LanguageModelMultiFactor.cpp */,
+ 1C8CFE400AD67A9700FA22E2 /* LanguageModelMultiFactor.h */,
+ 1C8CFE410AD67A9700FA22E2 /* LanguageModelSingleFactor.cpp */,
+ 1C8CFE420AD67A9700FA22E2 /* LanguageModelSingleFactor.h */,
+ D39BA8270AFBB7090089AE6A /* LanguageModelSkip.cpp */,
+ D39BA8280AFBB7090089AE6A /* LanguageModelSkip.h */,
+ 1C8CFE430AD67A9700FA22E2 /* LanguageModelSRI.cpp */,
+ 1C8CFE440AD67A9700FA22E2 /* LanguageModelSRI.h */,
+ 1C8CFE490AD67A9700FA22E2 /* LexicalReordering.cpp */,
+ 1C8CFE4A0AD67A9700FA22E2 /* LexicalReordering.h */,
+ 0396E1980C0B189200D95CFF /* LexicalReorderingTable.cpp */,
+ 0396E1990C0B189200D95CFF /* LexicalReorderingTable.h */,
+ 1C8CFE4B0AD67A9700FA22E2 /* LMList.cpp */,
+ 1C8CFE4C0AD67A9700FA22E2 /* LMList.h */,
+ 0396E19A0C0B189200D95CFF /* LVoc.cpp */,
+ 0396E19B0C0B189200D95CFF /* LVoc.h */,
+ D39BA8430B11FF0C0089AE6A /* Makefile.am */,
+ 1C8CFE4D0AD67A9700FA22E2 /* Manager.cpp */,
+ 1C8CFE4E0AD67A9700FA22E2 /* Manager.h */,
+ D39BA8440B11FF0C0089AE6A /* NGramCollection.cpp */,
+ D39BA8450B11FF0C0089AE6A /* NGramCollection.h */,
+ D39BA8460B11FF0C0089AE6A /* NGramNode.cpp */,
+ D39BA8470B11FF0C0089AE6A /* NGramNode.h */,
+ 1C8CFE530AD67A9700FA22E2 /* ObjectPool.h */,
+ 1C8CFE540AD67A9700FA22E2 /* Parameter.cpp */,
+ 1C8CFE550AD67A9700FA22E2 /* Parameter.h */,
+ 1C8CFE560AD67A9700FA22E2 /* PartialTranslOptColl.cpp */,
+ 1C8CFE570AD67A9700FA22E2 /* PartialTranslOptColl.h */,
+ 0396E19C0C0B189200D95CFF /* PCNTools.cpp */,
+ 0396E19D0C0B189200D95CFF /* PCNTools.h */,
+ 1C8CFE580AD67A9700FA22E2 /* PDTAimp.h */,
+ 1C8CFE590AD67A9700FA22E2 /* Phrase.cpp */,
+ 1C8CFE5A0AD67A9700FA22E2 /* Phrase.h */,
+ 1C8CFE5B0AD67A9700FA22E2 /* PhraseDictionary.cpp */,
+ 1C8CFE5C0AD67A9700FA22E2 /* PhraseDictionary.h */,
+ D39BA8290AFBB7090089AE6A /* PhraseDictionaryMemory.cpp */,
+ D39BA82A0AFBB7090089AE6A /* PhraseDictionaryMemory.h */,
+ 1C8CFE5F0AD67A9700FA22E2 /* PhraseDictionaryNode.cpp */,
+ 1C8CFE600AD67A9700FA22E2 /* PhraseDictionaryNode.h */,
+ 1C8CFE610AD67A9700FA22E2 /* PhraseDictionaryTree.cpp */,
+ 1C8CFE620AD67A9700FA22E2 /* PhraseDictionaryTree.h */,
+ 1C8CFE630AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.cpp */,
+ 1C8CFE640AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.h */,
+ 1C8CFE670AD67A9700FA22E2 /* PrefixTree.h */,
+ 0396E19E0C0B189200D95CFF /* PrefixTreeMap.cpp */,
+ 0396E19F0C0B189200D95CFF /* PrefixTreeMap.h */,
+ B2639DE60EF199D400A67519 /* ReorderingConstraint.cpp */,
+ B2639DE70EF199D400A67519 /* ReorderingConstraint.h */,
+ 1C8CFE680AD67A9700FA22E2 /* ScoreComponentCollection.cpp */,
+ 1C8CFE690AD67A9700FA22E2 /* ScoreComponentCollection.h */,
+ 1C8CFE6A0AD67A9700FA22E2 /* ScoreIndexManager.cpp */,
+ 1C8CFE6B0AD67A9700FA22E2 /* ScoreIndexManager.h */,
+ 1C8CFE6C0AD67A9700FA22E2 /* ScoreProducer.cpp */,
+ 1C8CFE6D0AD67A9700FA22E2 /* ScoreProducer.h */,
+ E21C11120DFEE88800ADAED0 /* Search.cpp */,
+ E21C11130DFEE88800ADAED0 /* Search.h */,
+ E21C11140DFEE88800ADAED0 /* SearchCubePruning.cpp */,
+ E21C11150DFEE88800ADAED0 /* SearchCubePruning.h */,
+ E21C11160DFEE88800ADAED0 /* SearchNormal.cpp */,
+ E21C11170DFEE88800ADAED0 /* SearchNormal.h */,
+ 1C8CFE6E0AD67A9700FA22E2 /* Sentence.cpp */,
+ 1C8CFE6F0AD67A9700FA22E2 /* Sentence.h */,
+ 1C8CFE700AD67A9700FA22E2 /* SentenceStats.cpp */,
+ 1C8CFE710AD67A9700FA22E2 /* SentenceStats.h */,
+ 1C8CFE720AD67A9700FA22E2 /* SquareMatrix.cpp */,
+ 1C8CFE730AD67A9700FA22E2 /* SquareMatrix.h */,
+ 1C8CFE740AD67A9700FA22E2 /* StaticData.cpp */,
+ 1C8CFE750AD67A9700FA22E2 /* StaticData.h */,
+ 1C8CFE760AD67A9700FA22E2 /* TargetPhrase.cpp */,
+ 1C8CFE770AD67A9700FA22E2 /* TargetPhrase.h */,
+ 1C8CFE780AD67A9700FA22E2 /* TargetPhraseCollection.cpp */,
+ 1C8CFE790AD67A9700FA22E2 /* TargetPhraseCollection.h */,
+ B219B8680E93836100EAB407 /* Timer.cpp */,
+ 1C8CFE7A0AD67A9700FA22E2 /* Timer.h */,
+ 1C8CFE7B0AD67A9700FA22E2 /* TranslationOption.cpp */,
+ 1C8CFE7C0AD67A9700FA22E2 /* TranslationOption.h */,
+ 1C8CFE7D0AD67A9700FA22E2 /* TranslationOptionCollection.cpp */,
+ 1C8CFE7E0AD67A9700FA22E2 /* TranslationOptionCollection.h */,
+ 1C8CFE7F0AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.cpp */,
+ 1C8CFE800AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.h */,
+ 1C8CFE810AD67A9700FA22E2 /* TranslationOptionCollectionText.cpp */,
+ 1C8CFE820AD67A9700FA22E2 /* TranslationOptionCollectionText.h */,
+ B2639DE80EF199D400A67519 /* TranslationOptionList.cpp */,
+ B2639DE90EF199D400A67519 /* TranslationOptionList.h */,
+ 0396E1A00C0B189200D95CFF /* TrellisPath.cpp */,
+ 0396E1A10C0B189200D95CFF /* TrellisPath.h */,
+ 0396E1A20C0B189200D95CFF /* TrellisPathCollection.cpp */,
+ 0396E1A30C0B189200D95CFF /* TrellisPathCollection.h */,
+ 0396E1A40C0B189200D95CFF /* TrellisPathList.h */,
+ 1C8CFE830AD67A9700FA22E2 /* TypeDef.h */,
+ 1C8CFE840AD67A9700FA22E2 /* UniqueObject.h */,
+ 1C8CFE850AD67A9700FA22E2 /* UserMessage.cpp */,
+ 1C8CFE860AD67A9700FA22E2 /* UserMessage.h */,
+ 1C8CFE870AD67A9700FA22E2 /* Util.cpp */,
+ 1C8CFE880AD67A9700FA22E2 /* Util.h */,
+ 1C8CFE890AD67A9700FA22E2 /* Word.cpp */,
+ 1C8CFE8A0AD67A9700FA22E2 /* Word.h */,
+ 0396E1A50C0B189200D95CFF /* WordLattice.cpp */,
+ 0396E1A60C0B189200D95CFF /* WordLattice.h */,
+ 1C8CFE8B0AD67A9700FA22E2 /* WordsBitmap.cpp */,
+ 1C8CFE8C0AD67A9700FA22E2 /* WordsBitmap.h */,
+ 1C8CFE8D0AD67A9700FA22E2 /* WordsRange.cpp */,
+ 1C8CFE8E0AD67A9700FA22E2 /* WordsRange.h */,
+ E2B7CA740DDB3B700089EFE0 /* XmlOption.cpp */,
+ E2B7CA750DDB3B700089EFE0 /* XmlOption.h */,
+ );
+ name = Source;
+ sourceTree = "<group>";
+ };
+ 1AB674ADFE9D54B511CA2CBB /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ D2AAC046055464E500DB518D /* libmoses.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ C6A0FF2B0290797F04C91782 /* Documentation */ = {
+ isa = PBXGroup;
+ children = (
+ );
+ name = Documentation;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+ D2AAC043055464E500DB518D /* Headers */ = {
+ isa = PBXHeadersBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1C8CFE900AD67A9700FA22E2 /* ConfusionNet.h in Headers */,
+ 1C8CFE920AD67A9700FA22E2 /* DecodeStep.h in Headers */,
+ 1C8CFE940AD67A9700FA22E2 /* DecodeStepGeneration.h in Headers */,
+ 1C8CFE960AD67A9700FA22E2 /* DecodeStepTranslation.h in Headers */,
+ 1C8CFE980AD67A9700FA22E2 /* Dictionary.h in Headers */,
+ 1C8CFE9A0AD67A9700FA22E2 /* DummyScoreProducers.h in Headers */,
+ 1C8CFE9C0AD67A9700FA22E2 /* Factor.h in Headers */,
+ 1C8CFE9E0AD67A9700FA22E2 /* FactorCollection.h in Headers */,
+ 1C8CFEA00AD67A9700FA22E2 /* FactorTypeSet.h in Headers */,
+ 1C8CFEA10AD67A9700FA22E2 /* File.h in Headers */,
+ 1C8CFEA20AD67A9700FA22E2 /* FilePtr.h in Headers */,
+ 1C8CFEA40AD67A9700FA22E2 /* GenerationDictionary.h in Headers */,
+ 1C8CFEA50AD67A9700FA22E2 /* gzfilebuf.h in Headers */,
+ 1C8CFEA70AD67A9700FA22E2 /* hash.h in Headers */,
+ 1C8CFEA90AD67A9700FA22E2 /* Hypothesis.h in Headers */,
+ 1C8CFEAD0AD67A9700FA22E2 /* InputFileStream.h in Headers */,
+ 1C8CFEB10AD67A9700FA22E2 /* InputType.h in Headers */,
+ 1C8CFEB30AD67A9700FA22E2 /* LanguageModel.h in Headers */,
+ 1C8CFEB70AD67A9700FA22E2 /* LanguageModelFactory.h in Headers */,
+ 1C8CFEBB0AD67A9700FA22E2 /* LanguageModelJoint.h in Headers */,
+ 1C8CFEBD0AD67A9700FA22E2 /* LanguageModelMultiFactor.h in Headers */,
+ 1C8CFEBF0AD67A9700FA22E2 /* LanguageModelSingleFactor.h in Headers */,
+ 1C8CFEC10AD67A9700FA22E2 /* LanguageModelSRI.h in Headers */,
+ 1C8CFEC70AD67A9700FA22E2 /* LexicalReordering.h in Headers */,
+ 1C8CFEC90AD67A9700FA22E2 /* LMList.h in Headers */,
+ 1C8CFECB0AD67A9700FA22E2 /* Manager.h in Headers */,
+ 1C8CFED00AD67A9700FA22E2 /* ObjectPool.h in Headers */,
+ 1C8CFED20AD67A9700FA22E2 /* Parameter.h in Headers */,
+ 1C8CFED40AD67A9700FA22E2 /* PartialTranslOptColl.h in Headers */,
+ 1C8CFED50AD67A9700FA22E2 /* PDTAimp.h in Headers */,
+ 1C8CFED70AD67A9700FA22E2 /* Phrase.h in Headers */,
+ 1C8CFED90AD67A9700FA22E2 /* PhraseDictionary.h in Headers */,
+ 1C8CFEDD0AD67A9700FA22E2 /* PhraseDictionaryNode.h in Headers */,
+ 1C8CFEDF0AD67A9700FA22E2 /* PhraseDictionaryTree.h in Headers */,
+ 1C8CFEE10AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.h in Headers */,
+ 1C8CFEE40AD67A9700FA22E2 /* PrefixTree.h in Headers */,
+ 1C8CFEE60AD67A9700FA22E2 /* ScoreComponentCollection.h in Headers */,
+ 1C8CFEE80AD67A9700FA22E2 /* ScoreIndexManager.h in Headers */,
+ 1C8CFEEA0AD67A9700FA22E2 /* ScoreProducer.h in Headers */,
+ 1C8CFEEC0AD67A9700FA22E2 /* Sentence.h in Headers */,
+ 1C8CFEEE0AD67A9700FA22E2 /* SentenceStats.h in Headers */,
+ 1C8CFEF00AD67A9700FA22E2 /* SquareMatrix.h in Headers */,
+ 1C8CFEF20AD67A9700FA22E2 /* StaticData.h in Headers */,
+ 1C8CFEF40AD67A9700FA22E2 /* TargetPhrase.h in Headers */,
+ 1C8CFEF60AD67A9700FA22E2 /* TargetPhraseCollection.h in Headers */,
+ 1C8CFEF70AD67A9700FA22E2 /* Timer.h in Headers */,
+ 1C8CFEF90AD67A9700FA22E2 /* TranslationOption.h in Headers */,
+ 1C8CFEFB0AD67A9700FA22E2 /* TranslationOptionCollection.h in Headers */,
+ 1C8CFEFD0AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.h in Headers */,
+ 1C8CFEFF0AD67A9700FA22E2 /* TranslationOptionCollectionText.h in Headers */,
+ 1C8CFF000AD67A9700FA22E2 /* TypeDef.h in Headers */,
+ 1C8CFF010AD67A9700FA22E2 /* UniqueObject.h in Headers */,
+ 1C8CFF030AD67A9700FA22E2 /* UserMessage.h in Headers */,
+ 1C8CFF050AD67A9700FA22E2 /* Util.h in Headers */,
+ 1C8CFF070AD67A9700FA22E2 /* Word.h in Headers */,
+ 1C8CFF090AD67A9700FA22E2 /* WordsBitmap.h in Headers */,
+ 1C8CFF0B0AD67A9700FA22E2 /* WordsRange.h in Headers */,
+ D39BA82E0AFBB7090089AE6A /* LanguageModelSkip.h in Headers */,
+ D39BA8300AFBB7090089AE6A /* PhraseDictionaryMemory.h in Headers */,
+ D39BA84A0B11FF0C0089AE6A /* LanguageModelInternal.h in Headers */,
+ D39BA84C0B11FF0C0089AE6A /* NGramCollection.h in Headers */,
+ D39BA84E0B11FF0C0089AE6A /* NGramNode.h in Headers */,
+ 0396E1A80C0B189200D95CFF /* HypothesisStack.h in Headers */,
+ 0396E1AA0C0B189200D95CFF /* LexicalReorderingTable.h in Headers */,
+ 0396E1AC0C0B189200D95CFF /* LVoc.h in Headers */,
+ 0396E1AE0C0B189200D95CFF /* PCNTools.h in Headers */,
+ 0396E1B00C0B189200D95CFF /* PrefixTreeMap.h in Headers */,
+ 0396E1B20C0B189200D95CFF /* TrellisPath.h in Headers */,
+ 0396E1B40C0B189200D95CFF /* TrellisPathCollection.h in Headers */,
+ 0396E1B50C0B189200D95CFF /* TrellisPathList.h in Headers */,
+ 0396E1B70C0B189200D95CFF /* WordLattice.h in Headers */,
+ 037C639B0C8EBFB400584F2E /* DecodeGraph.h in Headers */,
+ E2B7C8F20DDA19190089EFE0 /* BitmapContainer.h in Headers */,
+ E2B7CA730DDB3B5C0089EFE0 /* FloydWarshall.h in Headers */,
+ E2B7CA770DDB3B700089EFE0 /* XmlOption.h in Headers */,
+ E21C110F0DFEE86B00ADAED0 /* HypothesisStackCubePruning.h in Headers */,
+ E21C11110DFEE86B00ADAED0 /* HypothesisStackNormal.h in Headers */,
+ E21C11190DFEE88800ADAED0 /* Search.h in Headers */,
+ E21C111B0DFEE88800ADAED0 /* SearchCubePruning.h in Headers */,
+ E21C111D0DFEE88800ADAED0 /* SearchNormal.h in Headers */,
+ B23821390EB73DCB007303C3 /* LanguageModelIRST.h in Headers */,
+ B2639DEB0EF199D400A67519 /* ReorderingConstraint.h in Headers */,
+ B2639DED0EF199D400A67519 /* TranslationOptionList.h in Headers */,
+ 1C5009C00FB9E09700DFD24F /* FeatureFunction.h in Headers */,
+ 1C5009C20FB9E09700DFD24F /* FFState.h in Headers */,
+ 1CB459EB0FD2DFEC000030BE /* GlobalLexicalModel.h in Headers */,
+ 1CCE5B11114E60A500F79AD5 /* DPR_reordering.h in Headers */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+ D2AAC045055464E500DB518D /* moses */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 0396E18C0C0B186F00D95CFF /* Build configuration list for PBXNativeTarget "moses" */;
+ buildPhases = (
+ D2AAC043055464E500DB518D /* Headers */,
+ D2AAC044055464E500DB518D /* Sources */,
+ D289987405E68DCB004EDB86 /* Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = moses;
+ productName = moses;
+ productReference = D2AAC046055464E500DB518D /* libmoses.a */;
+ productType = "com.apple.product-type.library.static";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 08FB7793FE84155DC02AAC07 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 0396E1900C0B186F00D95CFF /* Build configuration list for PBXProject "moses" */;
+ compatibilityVersion = "Xcode 2.4";
+ hasScannedForEncodings = 1;
+ mainGroup = 08FB7794FE84155DC02AAC07 /* moses */;
+ projectDirPath = "";
+ projectRoot = "";
+ targets = (
+ D2AAC045055464E500DB518D /* moses */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+ D2AAC044055464E500DB518D /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1C8CFE8F0AD67A9700FA22E2 /* ConfusionNet.cpp in Sources */,
+ 1C8CFE910AD67A9700FA22E2 /* DecodeStep.cpp in Sources */,
+ 1C8CFE930AD67A9700FA22E2 /* DecodeStepGeneration.cpp in Sources */,
+ 1C8CFE950AD67A9700FA22E2 /* DecodeStepTranslation.cpp in Sources */,
+ 1C8CFE970AD67A9700FA22E2 /* Dictionary.cpp in Sources */,
+ 1C8CFE990AD67A9700FA22E2 /* DummyScoreProducers.cpp in Sources */,
+ 1C8CFE9B0AD67A9700FA22E2 /* Factor.cpp in Sources */,
+ 1C8CFE9D0AD67A9700FA22E2 /* FactorCollection.cpp in Sources */,
+ 1C8CFE9F0AD67A9700FA22E2 /* FactorTypeSet.cpp in Sources */,
+ 1C8CFEA30AD67A9700FA22E2 /* GenerationDictionary.cpp in Sources */,
+ 1C8CFEA60AD67A9700FA22E2 /* hash.cpp in Sources */,
+ 1C8CFEA80AD67A9700FA22E2 /* Hypothesis.cpp in Sources */,
+ 1C8CFEAC0AD67A9700FA22E2 /* InputFileStream.cpp in Sources */,
+ 1C8CFEB00AD67A9700FA22E2 /* InputType.cpp in Sources */,
+ 1C8CFEB20AD67A9700FA22E2 /* LanguageModel.cpp in Sources */,
+ 1C8CFEB60AD67A9700FA22E2 /* LanguageModelFactory.cpp in Sources */,
+ 1C8CFEBA0AD67A9700FA22E2 /* LanguageModelJoint.cpp in Sources */,
+ 1C8CFEBC0AD67A9700FA22E2 /* LanguageModelMultiFactor.cpp in Sources */,
+ 1C8CFEBE0AD67A9700FA22E2 /* LanguageModelSingleFactor.cpp in Sources */,
+ 1C8CFEC00AD67A9700FA22E2 /* LanguageModelSRI.cpp in Sources */,
+ 1C8CFEC60AD67A9700FA22E2 /* LexicalReordering.cpp in Sources */,
+ 1C8CFEC80AD67A9700FA22E2 /* LMList.cpp in Sources */,
+ 1C8CFECA0AD67A9700FA22E2 /* Manager.cpp in Sources */,
+ 1C8CFED10AD67A9700FA22E2 /* Parameter.cpp in Sources */,
+ 1C8CFED30AD67A9700FA22E2 /* PartialTranslOptColl.cpp in Sources */,
+ 1C8CFED60AD67A9700FA22E2 /* Phrase.cpp in Sources */,
+ 1C8CFED80AD67A9700FA22E2 /* PhraseDictionary.cpp in Sources */,
+ 1C8CFEDC0AD67A9700FA22E2 /* PhraseDictionaryNode.cpp in Sources */,
+ 1C8CFEDE0AD67A9700FA22E2 /* PhraseDictionaryTree.cpp in Sources */,
+ 1C8CFEE00AD67A9700FA22E2 /* PhraseDictionaryTreeAdaptor.cpp in Sources */,
+ 1C8CFEE50AD67A9700FA22E2 /* ScoreComponentCollection.cpp in Sources */,
+ 1C8CFEE70AD67A9700FA22E2 /* ScoreIndexManager.cpp in Sources */,
+ 1C8CFEE90AD67A9700FA22E2 /* ScoreProducer.cpp in Sources */,
+ 1C8CFEEB0AD67A9700FA22E2 /* Sentence.cpp in Sources */,
+ 1C8CFEED0AD67A9700FA22E2 /* SentenceStats.cpp in Sources */,
+ 1C8CFEEF0AD67A9700FA22E2 /* SquareMatrix.cpp in Sources */,
+ 1C8CFEF10AD67A9700FA22E2 /* StaticData.cpp in Sources */,
+ 1C8CFEF30AD67A9700FA22E2 /* TargetPhrase.cpp in Sources */,
+ 1C8CFEF50AD67A9700FA22E2 /* TargetPhraseCollection.cpp in Sources */,
+ 1C8CFEF80AD67A9700FA22E2 /* TranslationOption.cpp in Sources */,
+ 1C8CFEFA0AD67A9700FA22E2 /* TranslationOptionCollection.cpp in Sources */,
+ 1C8CFEFC0AD67A9700FA22E2 /* TranslationOptionCollectionConfusionNet.cpp in Sources */,
+ 1C8CFEFE0AD67A9700FA22E2 /* TranslationOptionCollectionText.cpp in Sources */,
+ 1C8CFF020AD67A9700FA22E2 /* UserMessage.cpp in Sources */,
+ 1C8CFF040AD67A9700FA22E2 /* Util.cpp in Sources */,
+ 1C8CFF060AD67A9700FA22E2 /* Word.cpp in Sources */,
+ 1C8CFF080AD67A9700FA22E2 /* WordsBitmap.cpp in Sources */,
+ 1C8CFF0A0AD67A9700FA22E2 /* WordsRange.cpp in Sources */,
+ D39BA82D0AFBB7090089AE6A /* LanguageModelSkip.cpp in Sources */,
+ D39BA82F0AFBB7090089AE6A /* PhraseDictionaryMemory.cpp in Sources */,
+ D39BA8480B11FF0C0089AE6A /* File.cpp in Sources */,
+ D39BA8490B11FF0C0089AE6A /* LanguageModelInternal.cpp in Sources */,
+ D39BA84B0B11FF0C0089AE6A /* NGramCollection.cpp in Sources */,
+ D39BA84D0B11FF0C0089AE6A /* NGramNode.cpp in Sources */,
+ 0396E1A70C0B189200D95CFF /* HypothesisStack.cpp in Sources */,
+ 0396E1A90C0B189200D95CFF /* LexicalReorderingTable.cpp in Sources */,
+ 0396E1AB0C0B189200D95CFF /* LVoc.cpp in Sources */,
+ 0396E1AD0C0B189200D95CFF /* PCNTools.cpp in Sources */,
+ 0396E1AF0C0B189200D95CFF /* PrefixTreeMap.cpp in Sources */,
+ 0396E1B10C0B189200D95CFF /* TrellisPath.cpp in Sources */,
+ 0396E1B30C0B189200D95CFF /* TrellisPathCollection.cpp in Sources */,
+ 0396E1B60C0B189200D95CFF /* WordLattice.cpp in Sources */,
+ 037C639A0C8EBFB400584F2E /* DecodeGraph.cpp in Sources */,
+ E2B7C9590DDB1AEF0089EFE0 /* BitmapContainer.cpp in Sources */,
+ E2B7CA720DDB3B5C0089EFE0 /* FloydWarshall.cpp in Sources */,
+ E2B7CA760DDB3B700089EFE0 /* XmlOption.cpp in Sources */,
+ E21C110E0DFEE86B00ADAED0 /* HypothesisStackCubePruning.cpp in Sources */,
+ E21C11100DFEE86B00ADAED0 /* HypothesisStackNormal.cpp in Sources */,
+ E21C11180DFEE88800ADAED0 /* Search.cpp in Sources */,
+ E21C111A0DFEE88800ADAED0 /* SearchCubePruning.cpp in Sources */,
+ E21C111C0DFEE88800ADAED0 /* SearchNormal.cpp in Sources */,
+ B219B8690E93836100EAB407 /* Timer.cpp in Sources */,
+ B23821380EB73DCB007303C3 /* LanguageModelIRST.cpp in Sources */,
+ B2639DEA0EF199D400A67519 /* ReorderingConstraint.cpp in Sources */,
+ B2639DEC0EF199D400A67519 /* TranslationOptionList.cpp in Sources */,
+ 1C5009BF0FB9E09700DFD24F /* FeatureFunction.cpp in Sources */,
+ 1C5009C10FB9E09700DFD24F /* FFState.cpp in Sources */,
+ 1CB459EA0FD2DFEC000030BE /* GlobalLexicalModel.cpp in Sources */,
+ 1CCE5B10114E60A500F79AD5 /* DPR_reordering.cpp in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 0396E18D0C0B186F00D95CFF /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
+ GCC_MODEL_TUNING = G5;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ LM_IRST,
+ LM_SRI,
+ TRACE_ENABLE,
+ "_FILE_OFFSET_BITS=64",
+ _LARGE_FILES,
+ );
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ HEADER_SEARCH_PATHS = (
+ ../irstlm/include,
+ ../srilm/misc/src,
+ ../srilm/dstruct/src,
+ ../srilm/include,
+ ../srilm/lm/src,
+ );
+ INSTALL_PATH = /usr/local/lib;
+ LIBRARY_STYLE = STATIC;
+ PREBINDING = NO;
+ PRODUCT_NAME = moses;
+ REZ_PREPROCESSOR_DEFINITIONS = "";
+ ZERO_LINK = NO;
+ };
+ name = Debug;
+ };
+ 0396E18E0C0B186F00D95CFF /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ COPY_PHASE_STRIP = YES;
+ GCC_ENABLE_FIX_AND_CONTINUE = NO;
+ GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+ GCC_MODEL_TUNING = G5;
+ GCC_OPTIMIZATION_LEVEL = 3;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ LM_IRST,
+ LM_SRI,
+ TRACE_ENABLE,
+ "_FILE_OFFSET_BITS=64",
+ _LARGE_FILES,
+ );
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ HEADER_SEARCH_PATHS = (
+ ../irstlm/include,
+ ../srilm/misc/src,
+ ../srilm/dstruct/src,
+ ../srilm/include,
+ ../srilm/lm/src,
+ );
+ INSTALL_PATH = /usr/local/lib;
+ LIBRARY_STYLE = STATIC;
+ PREBINDING = NO;
+ PRODUCT_NAME = moses;
+ ZERO_LINK = NO;
+ };
+ name = Release;
+ };
+ 0396E18F0C0B186F00D95CFF /* Default */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+ GCC_MODEL_TUNING = G5;
+ GCC_PREPROCESSOR_DEFINITIONS = "";
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ HEADER_SEARCH_PATHS = "";
+ INSTALL_PATH = /usr/local/lib;
+ LIBRARY_STYLE = STATIC;
+ PREBINDING = NO;
+ PRODUCT_NAME = moses;
+ };
+ name = Default;
+ };
+ 0396E1910C0B186F00D95CFF /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_OPTIMIZATION_LEVEL = 0;
+ VALID_ARCHS = i386;
+ };
+ name = Debug;
+ };
+ 0396E1920C0B186F00D95CFF /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_OPTIMIZATION_LEVEL = 0;
+ };
+ name = Release;
+ };
+ 0396E1930C0B186F00D95CFF /* Default */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_OPTIMIZATION_LEVEL = 0;
+ };
+ name = Default;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 0396E18C0C0B186F00D95CFF /* Build configuration list for PBXNativeTarget "moses" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 0396E18D0C0B186F00D95CFF /* Debug */,
+ 0396E18E0C0B186F00D95CFF /* Release */,
+ 0396E18F0C0B186F00D95CFF /* Default */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Default;
+ };
+ 0396E1900C0B186F00D95CFF /* Build configuration list for PBXProject "moses" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 0396E1910C0B186F00D95CFF /* Debug */,
+ 0396E1920C0B186F00D95CFF /* Release */,
+ 0396E1930C0B186F00D95CFF /* Default */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Default;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/moses/src/BitmapContainer.cpp b/moses/src/BitmapContainer.cpp
new file mode 100644
index 000000000..2b905fff9
--- /dev/null
+++ b/moses/src/BitmapContainer.cpp
@@ -0,0 +1,499 @@
+// $Id: BitmapContainer.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <algorithm>
+#include <limits>
+#include <utility>
+
+#include "BitmapContainer.h"
+#include "HypothesisStackCubePruning.h"
+#include "DummyScoreProducers.h"
+#include "TranslationOptionList.h"
+
+namespace Moses
+{
+
+class HypothesisScoreOrdererNoDistortion
+{
+ public:
+ bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+ {
+ const float scoreA = hypoA->GetScore();
+ const float scoreB = hypoB->GetScore();
+
+ if (scoreA > scoreB)
+ {
+ return true;
+ }
+ else if (scoreA < scoreB)
+ {
+ return false;
+ }
+ else
+ {
+ return hypoA < hypoB;
+ }
+ }
+};
+
+class HypothesisScoreOrdererWithDistortion
+{
+ public:
+ HypothesisScoreOrdererWithDistortion(const WordsRange* transOptRange) :
+ m_transOptRange(transOptRange) {}
+
+ const WordsRange* m_transOptRange;
+
+ bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+ {
+ assert (m_transOptRange != NULL);
+
+ const float weightDistortion = StaticData::Instance().GetWeightDistortion();
+ const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
+ const float distortionScoreA = dsp->CalculateDistortionScore(
+ *hypoA,
+ hypoA->GetCurrSourceWordsRange(),
+ *m_transOptRange,
+ hypoA->GetWordsBitmap().GetFirstGapPos()
+ );
+ const float distortionScoreB = dsp->CalculateDistortionScore(
+ *hypoB,
+ hypoB->GetCurrSourceWordsRange(),
+ *m_transOptRange,
+ hypoB->GetWordsBitmap().GetFirstGapPos()
+ );
+
+ const float scoreA = hypoA->GetScore() + distortionScoreA * weightDistortion;
+ const float scoreB = hypoB->GetScore() + distortionScoreB * weightDistortion;
+
+ if (scoreA > scoreB)
+ {
+ return true;
+ }
+ else if (scoreA < scoreB)
+ {
+ return false;
+ }
+ else
+ {
+ return hypoA < hypoB;
+ }
+ }
+
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// BackwardsEdge Code
+////////////////////////////////////////////////////////////////////////////////
+
+BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
+ , BitmapContainer &parent
+ , const TranslationOptionList &translations
+ , const SquareMatrix &futureScore,
+ const InputType& itype)
+ : m_initialized(false)
+ , m_prevBitmapContainer(prevBitmapContainer)
+ , m_parent(parent)
+ , m_translations(translations)
+ , m_futurescore(futureScore)
+ , m_seenPosition()
+{
+
+ // If either dimension is empty, we haven't got anything to do.
+ if(m_prevBitmapContainer.GetHypotheses().size() == 0 || m_translations.size() == 0) {
+ VERBOSE(3, "Empty cube on BackwardsEdge" << std::endl);
+ return;
+ }
+
+ // Fetch the things we need for distortion cost computation.
+ int maxDistortion = StaticData::Instance().GetMaxDistortion();
+
+ if (maxDistortion == -1) {
+ for (HypothesisSet::const_iterator iter = m_prevBitmapContainer.GetHypotheses().begin(); iter != m_prevBitmapContainer.GetHypotheses().end(); ++iter)
+ {
+ m_hypotheses.push_back(*iter);
+ }
+ return;
+ }
+
+ const WordsRange &transOptRange = translations.Get(0)->GetSourceWordsRange();
+
+ HypothesisSet::const_iterator iterHypo = m_prevBitmapContainer.GetHypotheses().begin();
+ HypothesisSet::const_iterator iterEnd = m_prevBitmapContainer.GetHypotheses().end();
+
+ while (iterHypo != iterEnd)
+ {
+ const Hypothesis &hypo = **iterHypo;
+ // Special case: If this is the first hypothesis used to seed the search,
+ // it doesn't have a valid range, and we create the hypothesis, if the
+ // initial position is not further into the sentence than the distortion limit.
+ if (hypo.GetWordsBitmap().GetNumWordsCovered() == 0)
+ {
+ if (transOptRange.GetStartPos() <= maxDistortion)
+ m_hypotheses.push_back(&hypo);
+ }
+ else
+ {
+ int distortionDistance = itype.ComputeDistortionDistance(hypo.GetCurrSourceWordsRange()
+ , transOptRange);
+
+ if (distortionDistance <= maxDistortion)
+ m_hypotheses.push_back(&hypo);
+ }
+
+ ++iterHypo;
+ }
+
+ if (m_translations.size() > 1)
+ {
+ assert(m_translations.Get(0)->GetFutureScore() >= m_translations.Get(1)->GetFutureScore());
+ }
+
+ if (m_hypotheses.size() > 1)
+ {
+ assert(m_hypotheses[0]->GetTotalScore() >= m_hypotheses[1]->GetTotalScore());
+ }
+
+ HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
+ std::sort(m_hypotheses.begin(), m_hypotheses.end(), orderer);
+
+ // std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());
+}
+
+BackwardsEdge::~BackwardsEdge()
+{
+ m_seenPosition.clear();
+ m_hypotheses.clear();
+}
+
+
+void
+BackwardsEdge::Initialize()
+{
+ if(m_hypotheses.size() == 0 || m_translations.size() == 0)
+ {
+ m_initialized = true;
+ return;
+ }
+
+ Hypothesis *expanded = CreateHypothesis(*m_hypotheses[0], *m_translations.Get(0));
+ m_parent.Enqueue(0, 0, expanded, this);
+ SetSeenPosition(0, 0);
+ m_initialized = true;
+}
+
+Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt)
+{
+ // create hypothesis and calculate all its scores
+ Hypothesis *newHypo = hypothesis.CreateNext(transOpt, NULL); // TODO FIXME This is absolutely broken - don't pass null here
+
+ // expand hypothesis further if transOpt was linked
+ std::vector<TranslationOption*>::const_iterator iterLinked = transOpt.GetLinkedTransOpts().begin();
+ std::vector<TranslationOption*>::const_iterator iterEnd = transOpt.GetLinkedTransOpts().end();
+
+ while (iterLinked != iterEnd)
+ {
+ const WordsBitmap hypoBitmap = newHypo->GetWordsBitmap();
+ if (hypoBitmap.Overlap((**iterLinked).GetSourceWordsRange())) {
+ // don't want to add a hypothesis that has some but not all of a linked TO set, so return
+ delete newHypo;
+ return NULL;
+ }
+ else
+ {
+ newHypo->CalcScore(m_futurescore);
+ newHypo = newHypo->CreateNext(**iterLinked, NULL); // TODO FIXME This is absolutely broken - don't pass null here
+ }
+
+ ++iterLinked;
+ }
+
+ newHypo->CalcScore(m_futurescore);
+
+ return newHypo;
+}
+
+bool
+BackwardsEdge::SeenPosition(const size_t x, const size_t y)
+{
+ std::set< int >::iterator iter = m_seenPosition.find((x<<16) + y);
+ return (iter != m_seenPosition.end());
+}
+
+void
+BackwardsEdge::SetSeenPosition(const size_t x, const size_t y)
+{
+ assert(x < (1<<17));
+ assert(y < (1<<17));
+
+ m_seenPosition.insert((x<<16) + y);
+}
+
+
+bool
+BackwardsEdge::GetInitialized()
+{
+ return m_initialized;
+}
+
+const BitmapContainer&
+BackwardsEdge::GetBitmapContainer() const
+{
+ return m_prevBitmapContainer;
+}
+
+void
+BackwardsEdge::PushSuccessors(const size_t x, const size_t y)
+{
+ Hypothesis *newHypo;
+
+ if(y + 1 < m_translations.size() && !SeenPosition(x, y + 1)) {
+ SetSeenPosition(x, y + 1);
+ newHypo = CreateHypothesis(*m_hypotheses[x], *m_translations.Get(y + 1));
+ if(newHypo != NULL)
+ {
+ m_parent.Enqueue(x, y + 1, newHypo, (BackwardsEdge*)this);
+ }
+ }
+
+ if(x + 1 < m_hypotheses.size() && !SeenPosition(x + 1, y)) {
+ SetSeenPosition(x + 1, y);
+ newHypo = CreateHypothesis(*m_hypotheses[x + 1], *m_translations.Get(y));
+ if(newHypo != NULL)
+ {
+ m_parent.Enqueue(x + 1, y, newHypo, (BackwardsEdge*)this);
+ }
+ }
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// BitmapContainer Code
+////////////////////////////////////////////////////////////////////////////////
+
+BitmapContainer::BitmapContainer(const WordsBitmap &bitmap
+ , HypothesisStackCubePruning &stack)
+ : m_bitmap(bitmap)
+ , m_stack(stack)
+ , m_numStackInsertions(0)
+{
+ m_hypotheses = HypothesisSet();
+ m_edges = BackwardsEdgeSet();
+ m_queue = HypothesisQueue();
+}
+
+BitmapContainer::~BitmapContainer()
+{
+ // As we have created the square position objects we clean up now.
+ HypothesisQueueItem *item = NULL;
+
+ while (!m_queue.empty())
+ {
+ item = m_queue.top();
+ FREEHYPO(item->GetHypothesis());
+ delete item;
+ m_queue.pop();
+ }
+
+ // Delete all edges.
+ RemoveAllInColl(m_edges);
+
+ m_hypotheses.clear();
+ m_edges.clear();
+}
+
+
+void
+BitmapContainer::Enqueue(int hypothesis_pos
+ , int translation_pos
+ , Hypothesis *hypothesis
+ , BackwardsEdge *edge)
+{
+ HypothesisQueueItem *item = new HypothesisQueueItem(hypothesis_pos
+ , translation_pos
+ , hypothesis
+ , edge);
+ m_queue.push(item);
+}
+
+HypothesisQueueItem*
+BitmapContainer::Dequeue(bool keepValue)
+{
+ if (!m_queue.empty())
+ {
+ HypothesisQueueItem *item = m_queue.top();
+
+ if (!keepValue)
+ {
+ m_queue.pop();
+ }
+
+ return item;
+ }
+
+ return NULL;
+}
+
+HypothesisQueueItem*
+BitmapContainer::Top() const
+{
+ return m_queue.top();
+}
+
+size_t
+BitmapContainer::Size()
+{
+ return m_queue.size();
+}
+
+bool
+BitmapContainer::Empty() const
+{
+ return m_queue.empty();
+}
+
+
+const WordsBitmap&
+BitmapContainer::GetWordsBitmap()
+{
+ return m_bitmap;
+}
+
+const HypothesisSet&
+BitmapContainer::GetHypotheses() const
+{
+ return m_hypotheses;
+}
+
+size_t
+BitmapContainer::GetHypothesesSize() const
+{
+ return m_hypotheses.size();
+}
+
+const BackwardsEdgeSet&
+BitmapContainer::GetBackwardsEdges()
+{
+ return m_edges;
+}
+
+void
+BitmapContainer::AddHypothesis(Hypothesis *hypothesis)
+{
+ bool itemExists = false;
+ HypothesisSet::const_iterator iter = m_hypotheses.begin();
+ HypothesisSet::const_iterator iterEnd = m_hypotheses.end();
+
+ // cfedermann: do we actually need this check?
+ while (iter != iterEnd)
+ {
+ if (*iter == hypothesis) {
+ itemExists = true;
+ break;
+ }
+
+ ++iter;
+ }
+ assert(itemExists == false);
+ m_hypotheses.push_back(hypothesis);
+}
+
+void
+BitmapContainer::AddBackwardsEdge(BackwardsEdge *edge)
+{
+ m_edges.insert(edge);
+}
+
+void
+BitmapContainer::InitializeEdges()
+{
+ BackwardsEdgeSet::iterator iter = m_edges.begin();
+ BackwardsEdgeSet::iterator iterEnd = m_edges.end();
+
+ while (iter != iterEnd)
+ {
+ BackwardsEdge *edge = *iter;
+ edge->Initialize();
+
+ ++iter;
+ }
+}
+
+void
+BitmapContainer::EnsureMinStackHyps(const size_t minNumHyps)
+{
+ while ((!Empty()) && m_numStackInsertions < minNumHyps)
+ {
+ ProcessBestHypothesis();
+ }
+}
+
+void
+BitmapContainer::ProcessBestHypothesis()
+{
+ if (m_queue.empty())
+ {
+ return;
+ }
+
+ // Get the currently best hypothesis from the queue.
+ HypothesisQueueItem *item = Dequeue();
+
+ // If the priority queue is exhausted, we are done and should have exited
+ assert(item != NULL);
+
+ // check we are pulling things off of priority queue in right order
+ if (!Empty())
+ {
+ HypothesisQueueItem *check = Dequeue(true);
+ assert(item->GetHypothesis()->GetTotalScore() >= check->GetHypothesis()->GetTotalScore());
+ }
+
+ // Logging for the criminally insane
+ IFVERBOSE(3) {
+ // const StaticData &staticData = StaticData::Instance();
+ item->GetHypothesis()->PrintHypothesis();
+ }
+
+ // Add best hypothesis to hypothesis stack.
+ const bool newstackentry = m_stack.AddPrune(item->GetHypothesis());
+ if (newstackentry)
+ m_numStackInsertions++;
+
+ IFVERBOSE(3) {
+ TRACE_ERR("new stack entry flag is " << newstackentry << std::endl);
+ }
+
+ // Create new hypotheses for the two successors of the hypothesis just added.
+ item->GetBackwardsEdge()->PushSuccessors(item->GetHypothesisPos(), item->GetTranslationPos());
+
+ // We are done with the queue item, we delete it.
+ delete item;
+}
+
+void
+BitmapContainer::SortHypotheses()
+{
+ std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrderer());
+}
+
+}
+
diff --git a/moses/src/BitmapContainer.h b/moses/src/BitmapContainer.h
new file mode 100644
index 000000000..04ddc403a
--- /dev/null
+++ b/moses/src/BitmapContainer.h
@@ -0,0 +1,249 @@
+// $Id: BitmapContainer.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_BitmapContainer_h
+#define moses_BitmapContainer_h
+
+#include <set>
+#include <vector>
+
+#include "Hypothesis.h"
+#include "HypothesisStackCubePruning.h"
+#include "SquareMatrix.h"
+#include "TranslationOption.h"
+#include "TypeDef.h"
+#include "WordsBitmap.h"
+
+namespace Moses
+{
+
+class BitmapContainer;
+class BackwardsEdge;
+class Hypothesis;
+class HypothesisStackCubePruning;
+class HypothesisQueueItem;
+class QueueItemOrderer;
+
+typedef std::vector< Hypothesis* > HypothesisSet;
+typedef std::set< BackwardsEdge* > BackwardsEdgeSet;
+typedef std::priority_queue< HypothesisQueueItem*, std::vector< HypothesisQueueItem* >, QueueItemOrderer> HypothesisQueue;
+
+////////////////////////////////////////////////////////////////////////////////
+// Hypothesis Priority Queue Code
+////////////////////////////////////////////////////////////////////////////////
+
+class HypothesisQueueItem
+{
+ private:
+ size_t m_hypothesis_pos, m_translation_pos;
+ Hypothesis *m_hypothesis;
+ BackwardsEdge *m_edge;
+
+ HypothesisQueueItem();
+
+ public:
+ HypothesisQueueItem(const size_t hypothesis_pos
+ , const size_t translation_pos
+ , Hypothesis *hypothesis
+ , BackwardsEdge *edge)
+ : m_hypothesis_pos(hypothesis_pos)
+ , m_translation_pos(translation_pos)
+ , m_hypothesis(hypothesis)
+ , m_edge(edge)
+ {
+ }
+
+ ~HypothesisQueueItem()
+ {
+ }
+
+ int GetHypothesisPos()
+ {
+ return m_hypothesis_pos;
+ }
+
+ int GetTranslationPos()
+ {
+ return m_translation_pos;
+ }
+
+ Hypothesis *GetHypothesis()
+ {
+ return m_hypothesis;
+ }
+
+ BackwardsEdge *GetBackwardsEdge()
+ {
+ return m_edge;
+ }
+};
+
+// Allows to compare two HypothesisQueueItem objects by the corresponding scores.
+class QueueItemOrderer
+{
+ public:
+ bool operator()(HypothesisQueueItem* itemA, HypothesisQueueItem* itemB) const
+ {
+ float scoreA = itemA->GetHypothesis()->GetTotalScore();
+ float scoreB = itemB->GetHypothesis()->GetTotalScore();
+
+ return (scoreA < scoreB);
+
+ /*
+ {
+ return true;
+ }
+ else if (scoreA < scoreB)
+ {
+ return false;
+ }
+ else
+ {
+ return itemA < itemB;
+ }*/
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Hypothesis Orderer Code
+////////////////////////////////////////////////////////////////////////////////
+// Allows to compare two Hypothesis objects by the corresponding scores.
+////////////////////////////////////////////////////////////////////////////////
+
+class HypothesisScoreOrderer
+{
+ public:
+ bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+ {
+ float scoreA = hypoA->GetTotalScore();
+ float scoreB = hypoB->GetTotalScore();
+
+ return (scoreA > scoreB);
+ /*
+ {
+ return true;
+ }
+ else if (scoreA < scoreB)
+ {
+ return false;
+ }
+ else
+ {
+ return hypoA < hypoB;
+ }*/
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Backwards Edge Code
+////////////////////////////////////////////////////////////////////////////////
+// Encodes an edge pointing to a BitmapContainer.
+////////////////////////////////////////////////////////////////////////////////
+
+class BackwardsEdge
+{
+ private:
+ friend class BitmapContainer;
+ bool m_initialized;
+
+ const BitmapContainer &m_prevBitmapContainer;
+ BitmapContainer &m_parent;
+ const TranslationOptionList &m_translations;
+ const SquareMatrix &m_futurescore;
+
+ std::vector< const Hypothesis* > m_hypotheses;
+ std::set< int > m_seenPosition;
+
+ // We don't want to instantiate "empty" objects.
+ BackwardsEdge();
+
+ Hypothesis *CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt);
+ bool SeenPosition(const size_t x, const size_t y);
+ void SetSeenPosition(const size_t x, const size_t y);
+
+ protected:
+ void Initialize();
+
+ public:
+ BackwardsEdge(const BitmapContainer &prevBitmapContainer
+ , BitmapContainer &parent
+ , const TranslationOptionList &translations
+ , const SquareMatrix &futureScore,
+ const InputType& source);
+ ~BackwardsEdge();
+
+ bool GetInitialized();
+ const BitmapContainer &GetBitmapContainer() const;
+ int GetDistortionPenalty();
+ void PushSuccessors(const size_t x, const size_t y);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Bitmap Container Code
+////////////////////////////////////////////////////////////////////////////////
+// A BitmapContainer encodes an ordered set of hypotheses and a set of edges
+// pointing to the "generating" BitmapContainers. It also stores a priority
+// queue that contains expanded hypotheses from the connected edges.
+////////////////////////////////////////////////////////////////////////////////
+
+class BitmapContainer
+{
+ private:
+ WordsBitmap m_bitmap;
+ HypothesisStackCubePruning &m_stack;
+ HypothesisSet m_hypotheses;
+ BackwardsEdgeSet m_edges;
+ HypothesisQueue m_queue;
+ size_t m_numStackInsertions;
+
+ // We always require a corresponding bitmap to be supplied.
+ BitmapContainer();
+ BitmapContainer(const BitmapContainer &);
+ public:
+ BitmapContainer(const WordsBitmap &bitmap
+ , HypothesisStackCubePruning &stack);
+
+ // The destructor will also delete all the edges that are
+ // connected to this BitmapContainer.
+ ~BitmapContainer();
+
+ void Enqueue(int hypothesis_pos, int translation_pos, Hypothesis *hypothesis, BackwardsEdge *edge);
+ HypothesisQueueItem *Dequeue(bool keepValue=false);
+ HypothesisQueueItem *Top() const;
+ size_t Size();
+ bool Empty() const;
+
+ const WordsBitmap &GetWordsBitmap();
+ const HypothesisSet &GetHypotheses() const;
+ size_t GetHypothesesSize() const;
+ const BackwardsEdgeSet &GetBackwardsEdges();
+
+ void InitializeEdges();
+ void ProcessBestHypothesis();
+ void EnsureMinStackHyps(const size_t minNumHyps);
+ void AddHypothesis(Hypothesis *hypothesis);
+ void AddBackwardsEdge(BackwardsEdge *edge);
+ void SortHypotheses();
+};
+
+}
+
+#endif
diff --git a/moses/src/ConfusionNet.cpp b/moses/src/ConfusionNet.cpp
new file mode 100644
index 000000000..85398cd17
--- /dev/null
+++ b/moses/src/ConfusionNet.cpp
@@ -0,0 +1,245 @@
+// $Id: ConfusionNet.cpp 2935 2010-02-24 10:30:24Z jfouet $
+
+#include "ConfusionNet.h"
+#include <sstream>
+
+#include "FactorCollection.h"
+#include "Util.h"
+#include "PhraseDictionaryTreeAdaptor.h"
+#include "TranslationOptionCollectionConfusionNet.h"
+#include "StaticData.h"
+#include "Sentence.h"
+#include "UserMessage.h"
+
+namespace Moses
+{
+struct CNStats {
+ size_t created,destr,read,colls,words;
+
+ CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
+ ~CNStats() {print(std::cerr);}
+
+ void createOne() {++created;}
+ void destroyOne() {++destr;}
+
+ void collect(const ConfusionNet& cn)
+ {
+ ++read;
+ colls+=cn.GetSize();
+ for(size_t i=0;i<cn.GetSize();++i)
+ words+=cn[i].size();
+ }
+ void print(std::ostream& out) const
+ {
+ if(created>0)
+ {
+ out<<"confusion net statistics:\n"
+ " created:\t"<<created<<"\n"
+ " destroyed:\t"<<destr<<"\n"
+ " succ. read:\t"<<read<<"\n"
+ " columns:\t"<<colls<<"\n"
+ " words:\t"<<words<<"\n"
+ " avg. word/column:\t"<<words/(1.0*colls)<<"\n"
+ " avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
+ "\n\n";
+ }
+ }
+
+};
+
+CNStats stats;
+
+size_t ConfusionNet::GetColumnIncrement(size_t i, size_t j) const
+{
+ (void) i;
+ (void) j;
+ return 1;
+}
+
+ConfusionNet::ConfusionNet()
+ : InputType()
+{
+ stats.createOne();
+}
+ConfusionNet::~ConfusionNet() {stats.destroyOne();}
+
+ConfusionNet::ConfusionNet(Sentence const& s)
+{
+ data.resize(s.GetSize());
+ for(size_t i=0;i<s.GetSize();++i)
+ data[i].push_back(std::make_pair(s.GetWord(i),0.0));
+}
+
+bool ConfusionNet::ReadF(std::istream& in,
+ const std::vector<FactorType>& factorOrder,
+ int format)
+{
+ VERBOSE(1, "read confusion net with format "<<format<<"\n");
+ switch(format)
+ {
+ case 0: return ReadFormat0(in,factorOrder);
+ case 1: return ReadFormat1(in,factorOrder);
+ default:
+ stringstream strme;
+ strme << "ERROR: unknown format '"<<format
+ <<"' in ConfusionNet::Read";
+ UserMessage::Add(strme.str());
+ }
+ return false;
+}
+
+int ConfusionNet::Read(std::istream& in,
+ const std::vector<FactorType>& factorOrder)
+{
+ int rv=ReadF(in,factorOrder,0);
+ if(rv) stats.collect(*this);
+ return rv;
+}
+
+
+void ConfusionNet::String2Word(const std::string& s,Word& w,
+ const std::vector<FactorType>& factorOrder)
+{
+ std::vector<std::string> factorStrVector = Tokenize(s, "|");
+ for(size_t i=0;i<factorOrder.size();++i)
+ w.SetFactor(factorOrder[i],
+ FactorCollection::Instance().AddFactor(Input,factorOrder[i],
+ factorStrVector[i]));
+}
+
+bool ConfusionNet::ReadFormat0(std::istream& in,
+ const std::vector<FactorType>& factorOrder)
+{
+ Clear();
+ std::string line;
+ size_t numLinkParams = StaticData::Instance().GetNumLinkParams();
+ size_t numLinkWeights = StaticData::Instance().GetNumInputScores();
+ bool addRealWordCount = ((numLinkParams + 1) == numLinkWeights);
+
+ while(getline(in,line)) {
+ std::istringstream is(line);
+ std::string word;
+
+ Column col;
+ while(is>>word) {
+ Word w;
+ String2Word(word,w,factorOrder);
+ std::vector<float> probs(numLinkWeights,0.0);
+ for(size_t i=0;i<numLinkParams;i++) {
+ double prob;
+ if (!(is>>prob)) {
+ TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
+ return false;
+ }
+ if(prob<0.0)
+ {
+ VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
+ prob=0.0;
+ }
+ else if (prob>1.0)
+ {
+ VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
+ prob=1.0;
+ }
+ probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
+
+ }
+ //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
+ if (addRealWordCount && word!=EPSILON && word!="")
+ probs[numLinkParams] = -1.0;
+ col.push_back(std::make_pair(w,probs));
+ }
+ if(col.size()) {
+ data.push_back(col);
+ ShrinkToFit(data.back());
+ }
+ else break;
+ }
+ return !data.empty();
+}
+bool ConfusionNet::ReadFormat1(std::istream& in,
+ const std::vector<FactorType>& factorOrder)
+{
+ Clear();
+ std::string line;
+ if(!getline(in,line)) return 0;
+ size_t s;
+ if(getline(in,line)) s=atoi(line.c_str()); else return 0;
+ data.resize(s);
+ for(size_t i=0;i<data.size();++i) {
+ if(!getline(in,line)) return 0;
+ std::istringstream is(line);
+ if(!(is>>s)) return 0;
+ std::string word;double prob;
+ data[i].resize(s);
+ for(size_t j=0;j<s;++j)
+ if(is>>word>>prob) {
+ //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
+ data[i][j].second = std::vector<float> (1);
+ data[i][j].second.push_back((float) log(prob));
+ if(data[i][j].second[0]<0) {
+ VERBOSE(1, "WARN: neg costs: "<<data[i][j].second[0]<<" -> set to 0\n");
+ data[i][j].second[0]=0.0;}
+ String2Word(word,data[i][j].first,factorOrder);
+ } else return 0;
+ }
+ return !data.empty();
+}
+
+void ConfusionNet::Print(std::ostream& out) const {
+ out<<"conf net: "<<data.size()<<"\n";
+ for(size_t i=0;i<data.size();++i) {
+ out<<i<<" -- ";
+ for(size_t j=0;j<data[i].size();++j) {
+ out<<"("<<data[i][j].first.ToString()<<", ";
+ for(std::vector<float>::const_iterator scoreIterator = data[i][j].second.begin();scoreIterator<data[i][j].second.end();scoreIterator++) {
+ out<<", "<<*scoreIterator;
+ }
+ out<<") ";
+ }
+ out<<"\n";
+ }
+ out<<"\n\n";
+}
+
+#ifdef _WIN32
+#pragma warning(disable:4716)
+#endif
+Phrase ConfusionNet::GetSubString(const WordsRange&) const {
+ TRACE_ERR("ERROR: call to ConfusionNet::GetSubString\n");
+ abort();
+ //return Phrase(Input);
+}
+
+std::string ConfusionNet::GetStringRep(const vector<FactorType> factorsToPrint) const{ //not well defined yet
+ TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
+ return "";
+}
+#ifdef _WIN32
+#pragma warning(disable:4716)
+#endif
+const Word& ConfusionNet::GetWord(size_t) const {
+ TRACE_ERR("ERROR: call to ConfusionNet::GetFactorArray\n");
+ abort();
+}
+#ifdef _WIN32
+#pragma warning(default:4716)
+#endif
+std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
+{
+ cn.Print(out);return out;
+}
+
+TranslationOptionCollection*
+ConfusionNet::CreateTranslationOptionCollection() const
+{
+ size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
+ float translationOptionThreshold = StaticData::Instance().GetTranslationOptionThreshold();
+ TranslationOptionCollection *rv= new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
+ assert(rv);
+ return rv;
+}
+
+}
+
+
diff --git a/moses/src/ConfusionNet.h b/moses/src/ConfusionNet.h
new file mode 100644
index 000000000..3f7d55c85
--- /dev/null
+++ b/moses/src/ConfusionNet.h
@@ -0,0 +1,63 @@
+// $Id: ConfusionNet.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_ConfusionNet_h
+#define moses_ConfusionNet_h
+
+#include <vector>
+#include <iostream>
+#include "Word.h"
+#include "InputType.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+class TranslationOptionCollection;
+class Sentence;
+
+class ConfusionNet : public InputType {
+ public:
+ typedef std::vector<std::pair<Word,std::vector<float> > > Column;
+
+ protected:
+ std::vector<Column> data;
+
+ bool ReadFormat0(std::istream&,const std::vector<FactorType>& factorOrder);
+ bool ReadFormat1(std::istream&,const std::vector<FactorType>& factorOrder);
+ void String2Word(const std::string& s,Word& w,const std::vector<FactorType>& factorOrder);
+
+ public:
+ ConfusionNet();
+ virtual ~ConfusionNet();
+
+ ConfusionNet(Sentence const& s);
+
+ InputTypeEnum GetType() const
+ { return ConfusionNetworkInput;}
+
+ const Column& GetColumn(size_t i) const {assert(i<data.size());return data[i];}
+ const Column& operator[](size_t i) const {return GetColumn(i);}
+ virtual size_t GetColumnIncrement(size_t i, size_t j) const; //! returns 1 for CNs
+
+ bool Empty() const {return data.empty();}
+ size_t GetSize() const {return data.size();}
+ void Clear() {data.clear();}
+
+ bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
+ virtual void Print(std::ostream&) const;
+
+ int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
+
+ Phrase GetSubString(const WordsRange&) const; //TODO not defined
+ std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
+ const Word& GetWord(size_t pos) const;
+
+ TranslationOptionCollection* CreateTranslationOptionCollection() const;
+};
+
+std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn);
+
+
+}
+
+#endif
diff --git a/moses/src/DPR_reordering.cpp b/moses/src/DPR_reordering.cpp
new file mode 100644
index 000000000..2b80ae623
--- /dev/null
+++ b/moses/src/DPR_reordering.cpp
@@ -0,0 +1,577 @@
+/*
+**Remark: the sentences should not exceed 1000 words!!!!!!!!!!!**
+**********************************************************
+Cpp file ---------- DPR_reordering.cpp
+The reordering feature function for MOSES
+based on the DPR model proposed in (Ni et al., 2009)
+
+Components:
+ vector<unsigned long long> m_dprOptionStartPOS --- store the start pos for each sentence option
+ ifstream sentenceOptionFile --- the ifstream file of the sentenceOption
+ mutable long int sentenceID --- store the ID of current sentence needed translation
+ mutable mapPhraseOption sentencePhraseOption --- store the phrase option for each sentence
+ int classSetup --- store the number of orientations
+ float unDetectProb --- the const reodering prob if the phrase pair is not in sentence option
+ vector<float> WDR_cost --- the word distance reodering cost
+Functions:
+0. Constructor: DPR_reordering(ScoreIndexManager &scoreIndexManager, const std::string &filePath, const std::vector<float>& weights)
+
+1. interface functions:
+ GetNumScoreComponents() --- return the number of scores the component used (usually 1)
+ GetScoreProducerDescription() --- return the name of the reordering model
+ GetScoreProducerWeightShortName() --- return the short name of the weight for the score
+2. Score producers:
+ Evaluate() --- to evaluate the reordering scores and add the score to the score component collection
+ EmptyHypothesisState() --- create an empty hypothesis
+
+3. Other functions:
+ constructSentencePhraseOption() --- Construct sentencePhraseOption using sentenceID
+ clearSentencePhraseOption() --- clear the sentence phrase options
+ generateReorderingProb(...) --- generate the reordering probability
+ createOrientationClass(int dist) --- the create the orientation class
+**********************************************************
+*/
+
+#include "DPR_reordering.h"
+
+
+namespace Moses
+{
+
+/*
+1. constructor
+*/
+DPR_reordering::DPR_reordering(ScoreIndexManager &scoreIndexManager, const string filePath, const string classString, const vector<float>& weights)
+{
+ //1. Add the function in the scoreIndexManager
+ scoreIndexManager.AddScoreProducer(this);
+ //2. Set the weight for this score producer
+ const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
+
+ //3. Get the class setup
+ istringstream tempClassSetup(classString);
+ tempClassSetup>>classSetup;
+ if (classSetup==3)
+ {
+ for (int k=0; k<25; k++)
+ WDR_cost.push_back(log10(exp(-(float) k)));
+ unDetectProb = 0.3333;
+ }
+ else if (classSetup==5)
+ unDetectProb = log10(0.2);
+ else
+ cerr<<"Error in DPR_reordering: Currently there is no class setup: "<<classSetup<<" in our model.\n";
+
+ //4. get the start position of the sentence options
+ string fileStartPos = filePath+".startPosition"; //path of the sentence start position file
+ ifstream sentencePOS((char*) fileStartPos.c_str(),ios::binary);
+
+ if (!sentencePOS.is_open())
+ cerr<<"Error in DPR_reordering.cpp: can not open the sentence options start position file!\n";
+
+ string eachLine;
+ while (getline(sentencePOS,eachLine,'\n'))
+ {
+ istringstream tempString(eachLine);
+ unsigned long long tempValue;
+ tempString>>tempValue;
+ m_dprOptionStartPOS.push_back(tempValue); //Get the start position of each sentence option DB
+ }
+
+ //5. Read the first sentence option
+ sentenceID=0;
+ sentenceOptionFile.open((char*) filePath.c_str(),ios::binary);
+
+ if (!sentenceOptionFile.is_open())
+ cerr<<"Error in DPR_reordering.cpp: can not open the sentence options file!\n";
+ else
+ constructSentencePhraseOption(); //construct the first sentencePhraseOption
+
+ sentencePOS.close();
+}
+
+/*
+2. interface functions
+*/
+
+//return the number of score components
+size_t DPR_reordering::GetNumScoreComponents() const
+{
+ return 1;
+}
+
+//return the description of this feature function
+string DPR_reordering::GetScoreProducerDescription() const
+{
+ return "Distance_phrase_reordering_probabilities_produders";
+ }
+
+//return the weight short name
+string DPR_reordering::GetScoreProducerWeightShortName() const
+{
+ return "wDPR";
+ }
+
+/*
+3. the score producers
+*/
+const FFState* DPR_reordering::EmptyHypothesisState() const
+{
+ //Do nothing
+ return NULL;
+}
+
+
+//given the hypothesis (and previous hypothesis) computed and add the reordering score
+FFState* DPR_reordering::Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const
+{
+ //cerr << cur_hypo.GetInput();
+ //cerr << cur_hypo.GetInput().GetTranslationId();
+
+ //1. Check the sentence phrase option (check the ID starts from 0 or 1?)
+ long int currentSentenceID = cur_hypo.GetInput().GetTranslationId();
+ if (sentenceID!=currentSentenceID)
+ {
+ sentenceID=currentSentenceID;
+ clearSentencePhraseOption(); //clear all components in the sentencePhraseOption
+ constructSentencePhraseOption(); //construct the first sentencePhraseOption
+ }
+
+ //2. get the information current phrase: left_boundary, right_boundary, target translation
+ // prev phrase: right_boundary
+ size_t prev_right_boundary;
+ size_t curr_left_boundary;
+ size_t curr_right_boundary;
+ const Hypothesis* prevHypothesis = cur_hypo.GetPrevHypo();
+ //check if there is a previous hypo
+ if (prevHypothesis->GetId()==0)
+ prev_right_boundary=-1;
+ else
+ prev_right_boundary=prevHypothesis->GetCurrSourceWordsRange().GetEndPos();
+
+ const WordsRange currWordsRange = cur_hypo.GetCurrSourceWordsRange();
+ curr_left_boundary = currWordsRange.GetStartPos();
+ curr_right_boundary = currWordsRange.GetEndPos();
+ string targetTranslation = dynamic_cast<const Phrase&>(cur_hypo.GetCurrTargetPhrase()).ToString();
+ targetTranslation.erase(targetTranslation.end()-1); //remove the last space in the target phrase
+
+ //3. Get the reordering probability
+ float reorderingProb = generateReorderingProb(curr_left_boundary, curr_right_boundary, prev_right_boundary, targetTranslation);
+ //float reorderingProb = generateReorderingProb(curr_left_boundary, curr_right_boundary, prev_right_boundary, targetTranslation, 1);
+
+
+ //simple, update the score -1.0
+ accumulator->PlusEquals(this,reorderingProb);
+ return NULL;
+}
+
+
+/*
+4. Other functions
+*/
+
+/*
+4.1 Clear the content in sentencePhraseOption
+*/
+void DPR_reordering::clearSentencePhraseOption() const
+{
+ for (mapPhraseOption::iterator iterator = sentencePhraseOption.begin(); iterator!= sentencePhraseOption.end(); iterator++)
+ {
+ iterator->second.clear(); //clear each map in mapTargetProbOption
+ }
+ sentencePhraseOption.clear(); //clear the components in sentencePhraseOption
+ }
+
+
+/*
+4.1* Clear the content in sentencePhraseOption (overload)
+*/
+/*void DPR_reordering::clearSentencePhraseOption() const
+{
+ for (mapPhraseOptionLeft::iterator iterator_l = sentencePhraseOption.begin(); iterator_l!= sentencePhraseOption.end(); iterator_l++)
+ {
+ for (mapPhraseOptionRight::iterator iterator_r = iterator_l->second.begin(); iterator_r!= iterator_l->second.end(); iterator_r++)
+ {
+ iterator_r->second.clear(); //clear each map in mapTargetProbOption
+ }
+ iterator_l->second.clear(); //clear each map in boundary_l
+ }
+ sentencePhraseOption.clear(); //clear the components in sentencePhraseOption
+ }
+ */
+/*
+4.2 Construct sentencePhraseOption using sentenceID
+*/
+void DPR_reordering::constructSentencePhraseOption() const
+{
+ //1. Get the start position of the sentence options
+ const_cast<ifstream&>(sentenceOptionFile).seekg(m_dprOptionStartPOS[sentenceID],ios::beg); //set the offset
+ string eachSentence;
+ getline(const_cast<ifstream&>(sentenceOptionFile) ,eachSentence,'\n');
+
+ //2. Search each separation
+ size_t boundaryFound = eachSentence.find(" ::: "); //find the separation between the boundary and the values
+ size_t boundaryFound_end; //find the end of the boundary
+ int countBoundaryOption=0;
+ while (boundaryFound!=string::npos)
+ {
+ //2.1 Get the boundary (create a phraseOption map)
+ //vector<unsigned short> boundary; //store the boundary
+ unsigned short boundary_int;
+ string tempString; //store the boundary
+ if (countBoundaryOption==0)
+ tempString=eachSentence.substr(0,boundaryFound); //get the boundary string
+ else
+ tempString=eachSentence.substr(boundaryFound_end+5,boundaryFound-boundaryFound_end-5);
+
+ istringstream boundaryString(tempString);
+ //while (boundaryString>>boundary_int)
+ // boundary.push_back(boundary_int);
+ boundaryString>>boundary_int;
+ size_t boundary=boundary_int*1000; //story the boundary
+ boundaryString>>boundary_int;
+ boundary+=boundary_int;
+
+ //2.2 Get the target string (all target transaltions)
+ boundaryFound_end=eachSentence.find(" ;;;",boundaryFound+5);
+ string targetString=eachSentence.substr(boundaryFound+5,boundaryFound_end-boundaryFound-5);
+ size_t targetFound=targetString.find(" ||| ");
+ size_t probFound=targetString.find(" ||| ",targetFound+5);
+ size_t probFound_prev; //store the previous probs position
+ int countPhraseOption=0;
+ mapTargetProbOption tempTargetProbOption;
+ while (targetFound!=string::npos)
+ {
+ if (probFound==string::npos)
+ probFound=targetString.size();
+ string target; //store each target phrase
+ string tempProbString; //store the probability string
+ vector<float> tempProbs; //store the probabilities
+ float probValue; //store the probability value
+
+ //2.3 Get each target string
+ if (countPhraseOption==0)
+ target = targetString.substr(0,targetFound);
+ else
+ target= targetString.substr(probFound_prev+5,targetFound-probFound_prev-5);
+
+ //2.4 Get the probability vector
+ tempProbString=targetString.substr(targetFound+5,probFound-targetFound-5);
+ istringstream probString(tempProbString);
+ while(probString>>probValue)
+ {
+ if (classSetup==5)
+ probValue=log10(probValue); //get the log probability
+ tempProbs.push_back(probValue);
+ }
+
+ //2.5 Update the information
+ //sentencePhraseOption[boundary][target]=tempProbs;
+ tempTargetProbOption[target] = tempProbs;
+ countPhraseOption++;
+ probFound_prev=probFound;
+ targetFound=targetString.find(" ||| ",probFound+5);
+ if (targetFound!=string::npos)
+ probFound=targetString.find(" ||| ",targetFound+5);
+ }
+ //3. Get the next boundary
+
+ sentencePhraseOption[boundary]=tempTargetProbOption;
+ countBoundaryOption++;
+ boundaryFound=eachSentence.find(" ::: ",boundaryFound_end+5); //Get next boundary found
+ }
+ }
+
+/*
+4.2* Construct sentencePhraseOption using sentenceID (overload function)
+*/
+/*void DPR_reordering::constructSentencePhraseOption() const
+{
+ //1. Get the start position of the sentence options
+ const_cast<ifstream&>(sentenceOptionFile).seekg(m_dprOptionStartPOS[sentenceID],ios::beg); //set the offset
+ string eachSentence;
+ getline(const_cast<ifstream&>(sentenceOptionFile) ,eachSentence,'\n');
+
+ //2. Search each separation
+ size_t boundaryFound = eachSentence.find(" ::: "); //find the separation between the boundary and the values
+ size_t boundaryFound_end; //find the end of the boundary
+ int countBoundaryOption=0;
+ while (boundaryFound!=string::npos)
+ {
+ //2.1 Get the boundary (create a phraseOption map)
+ unsigned short boundary_left;
+ unsigned short boundary_right;
+ string tempString; //store the boundary
+ if (countBoundaryOption==0)
+ tempString=eachSentence.substr(0,boundaryFound); //get the boundary string
+ else
+ tempString=eachSentence.substr(boundaryFound_end+5,boundaryFound-boundaryFound_end-5);
+
+ istringstream boundaryString(tempString);
+ boundaryString>>boundary_left;
+ boundaryString>>boundary_right;
+
+
+
+
+ //2.2 Get the target string (all target transaltions)
+ boundaryFound_end=eachSentence.find(" ;;;",boundaryFound+5);
+ string targetString=eachSentence.substr(boundaryFound+5,boundaryFound_end-boundaryFound-5);
+ size_t targetFound=targetString.find(" ||| ");
+ size_t probFound=targetString.find(" ||| ",targetFound+5);
+ size_t probFound_prev; //store the previous probs position
+ int countPhraseOption=0;
+ mapTargetProbOption tempTargetProbOption;
+ while (targetFound!=string::npos)
+ {
+ if (probFound==string::npos)
+ probFound=targetString.size();
+ string target; //store each target phrase
+ string tempProbString; //store the probability string
+ vector<float> tempProbs; //store the probabilities
+ float probValue; //store the probability value
+
+ //2.3 Get each target string
+ if (countPhraseOption==0)
+ target = targetString.substr(0,targetFound);
+ else
+ target= targetString.substr(probFound_prev+5,targetFound-probFound_prev-5);
+
+ //2.4 Get the probability vector
+ tempProbString=targetString.substr(targetFound+5,probFound-targetFound-5);
+ istringstream probString(tempProbString);
+ while(probString>>probValue)
+ {
+ if (classSetup==5)
+ probValue=log10(probValue); //get the log probability
+ tempProbs.push_back(probValue);
+ }
+
+ //2.5 Update the information
+ //sentencePhraseOption[boundary_left][boundary_right][target]=tempProbs;
+ tempTargetProbOption[target] = tempProbs;
+ countPhraseOption++;
+ probFound_prev=probFound;
+ targetFound=targetString.find(" ||| ",probFound+5);
+ if (targetFound!=string::npos)
+ probFound=targetString.find(" ||| ",targetFound+5);
+ }
+ //3. Get the next boundary
+ sentencePhraseOption[boundary_left][boundary_right]=tempTargetProbOption;
+ countBoundaryOption++;
+ boundaryFound=eachSentence.find(" ::: ",boundaryFound_end+5); //Get next boundary found
+ }
+ }
+*/
+/*
+4.3 generate the reordering probability
+*/
+
+float DPR_reordering::generateReorderingProb(size_t boundary_left, size_t boundary_right, size_t prev_boundary_right, string targetPhrase) const
+{
+ float reorderingProb;
+ //1. get the distance reordering
+ int reorderDistance = prev_boundary_right+1-boundary_left; //reordering distance
+ int reorderOrientation = createOrientationClass(reorderDistance); //reordering orientation
+ //2. get the boundary vector
+
+ //vector<unsigned short> phrase_boundary;
+ //phrase_boundary.push_back(boundary_left);
+ //phrase_boundary.push_back(boundary_right);
+
+ //ostringstream tempBoundary;
+ //tempBoundary<<boundary_left;
+ //tempBoundary<<" ";
+ //tempBoundary<<boundary_right;
+ //string phrase_boundary=tempBoundary.str();
+ size_t phrase_boundary = boundary_left*1000+boundary_right;
+
+ mapPhraseOption::const_iterator boundaryFound = sentencePhraseOption.find(phrase_boundary);
+
+ //3.1 If no this source phrase (then return equal probability)
+ if (boundaryFound==sentencePhraseOption.end())
+ {
+ if (classSetup==3)
+ {
+ reorderingProb = WDR_cost[abs(reorderDistance)]; //using word-based distance reordering
+ }
+ else if (classSetup==5)
+ {
+ reorderingProb=unDetectProb;
+ }
+ }
+ else
+ {
+ mapTargetProbOption::const_iterator targetFound = boundaryFound->second.find(targetPhrase);
+ //3.2 if no this target phrase
+ if (targetFound == boundaryFound->second.end())
+ {
+ if (classSetup==3)
+ {
+ reorderingProb = WDR_cost[abs(reorderDistance)]; //using word-based distance reordering
+ }
+ else if (classSetup==5)
+ {
+ reorderingProb=unDetectProb;
+ }
+ }
+ //3.3 else, get normal reordering probability
+ else
+ {
+ if (classSetup ==3)
+ {
+ if (reorderOrientation==1) //special case: monotone
+ {
+ if (targetFound->second[1]>0.5)
+ reorderingProb=0.0;
+ else
+ {
+ float ratio=min(MAXRATIO, 1.0/(3*targetFound->second[1]));
+ reorderingProb=ratio*WDR_cost[1];
+ }
+ }
+ else
+ {
+ float ratio=min(MAXRATIO, 1.0/(3*targetFound->second[reorderOrientation]));
+ reorderingProb=ratio*WDR_cost[abs(reorderDistance)];
+ }
+ }
+ else if (classSetup==5)
+ {
+ reorderingProb=targetFound->second[reorderOrientation];
+ }
+ }
+ }
+
+ return reorderingProb;
+ }
+
+
+/*
+4.3* generate the reordering probability (overload)
+*/
+/*float DPR_reordering::generateReorderingProb(size_t boundary_left, size_t boundary_right, size_t prev_boundary_right, string targetPhrase) const
+{
+ float reorderingProb;
+ //1. get the distance reordering
+ int reorderDistance = prev_boundary_right+1-boundary_left; //reordering distance
+ int reorderOrientation = createOrientationClass(reorderDistance); //reordering orientation
+ //2. get the boundary vector
+ mapPhraseOptionLeft::const_iterator boundaryFound_left = sentencePhraseOption.find(boundary_left);
+
+ //3.1 If no this source phrase (then return equal probability)
+ if (boundaryFound_left==sentencePhraseOption.end())
+ {
+ if (classSetup==3)
+ {
+ reorderingProb = WDR_cost[abs(reorderDistance)]; //using word-based distance reordering
+ }
+ else if (classSetup==5)
+ {
+ reorderingProb=unDetectProb;
+ }
+ }
+ else
+ {
+ mapPhraseOptionRight::const_iterator boundaryFound_right = boundaryFound_left->second.find(boundary_right);
+ if (boundaryFound_right==boundaryFound_left->second.end())
+ {
+ if (classSetup==3)
+ {
+ reorderingProb = WDR_cost[abs(reorderDistance)]; //using word-based distance reordering
+ }
+ else if (classSetup==5)
+ {
+ reorderingProb=unDetectProb;
+ }
+ }
+ else{
+ mapTargetProbOption::const_iterator targetFound = boundaryFound_right->second.find(targetPhrase);
+ //3.2 if no this target phrase
+ if (targetFound == boundaryFound_right->second.end())
+ {
+ if (classSetup==3)
+ {
+ reorderingProb = WDR_cost[abs(reorderDistance)]; //using word-based distance reordering
+ }
+ else if (classSetup==5)
+ {
+ reorderingProb=unDetectProb;
+ }
+ }
+ //3.3 else, get normal reordering probability
+ else
+ {
+ if (classSetup ==3)
+ {
+ if (reorderOrientation==1) //special case: monotone
+ {
+ if (targetFound->second[1]>0.5)
+ reorderingProb=0.0;
+ else
+ {
+ float ratio=min(MAXRATIO, 1.0/(3*targetFound->second[1]));
+ reorderingProb=ratio*WDR_cost[1];
+ }
+ }
+ else
+ {
+ float ratio=min(MAXRATIO, 1.0/(3*targetFound->second[reorderOrientation]));
+ reorderingProb=ratio*WDR_cost[abs(reorderDistance)];
+ }
+ }
+ else if (classSetup==5)
+ {
+ reorderingProb=targetFound->second[reorderOrientation];
+ }
+ }
+ }
+ }
+
+ return reorderingProb;
+ }
+*/
+/*
+4.4. int createOrientationClass(int dist,int classSetup) --- the create the orientation class
+*/
+int DPR_reordering::createOrientationClass(int dist) const
+{
+ int orientationClass;
+ //If three-class setup
+ if (classSetup==3)
+ {
+ if (dist<0)
+ orientationClass=0;
+ else if (dist==0)
+ orientationClass=1;
+ else
+ orientationClass=2;
+ }
+ else if (classSetup==5)
+ {
+ if (dist<=-5)
+ orientationClass=0;
+ else if (dist>-5 and dist<0)
+ orientationClass=1;
+ else if (dist==0)
+ orientationClass=2;
+ else if (dist>0 and dist<5)
+ orientationClass=3;
+ else
+ orientationClass=4;
+ }
+ else
+ {
+ cerr<<"Error in DPR_reordering: Currently there is no class setup: "<<classSetup<<" in our model.\n";
+ }
+
+
+ return orientationClass; //return the orientation class
+ }
+
+DPR_reordering::~DPR_reordering()
+{
+ sentenceOptionFile.close();
+ }
+
+} // namespace
diff --git a/moses/src/DPR_reordering.h b/moses/src/DPR_reordering.h
new file mode 100644
index 000000000..4f8a08900
--- /dev/null
+++ b/moses/src/DPR_reordering.h
@@ -0,0 +1,130 @@
+/*
+**Remark: the sentences should not exceed 1000 words!!!!!!!!!!!**
+**********************************************************
+Head file ---------- DPR_reordering.h
+The reordering feature function for MOSES
+based on the DPR model proposed in (Ni et al., 2009)
+
+Components:
+ vector<unsigned long long> m_dprOptionStartPOS --- store the start pos for each sentence option
+ ifstream sentenceOptionFile --- the ifstream file of the sentenceOption
+ mutable long int sentenceID --- store the ID of current sentence needed translation
+ mutable mapPhraseOption sentencePhraseOption --- store the phrase option for each sentence
+ int classSetup --- store the number of orientations
+ float unDetectProb --- the const reodering prob if the phrase pair is not in sentence option
+ vector<float> WDR_cost --- the word distance reodering cost
+
+Functions:
+0. Constructor: DPR_reordering(ScoreIndexManager &scoreIndexManager, const std::string &filePath, const std::vector<float>& weights)
+
+1. interface functions:
+ GetNumScoreComponents() --- return the number of scores the component used (usually 1)
+ GetScoreProducerDescription() --- return the name of the reordering model
+ GetScoreProducerWeightShortName() --- return the short name of the weight for the score
+2. Score producers:
+ Evaluate() --- to evaluate the reordering scores and add the score to the score component collection
+ EmptyHypothesisState() --- create an empty hypothesis
+
+3. Other functions:
+ constructSentencePhraseOption() --- Construct sentencePhraseOption using sentenceID
+ clearSentencePhraseOption() --- clear the sentence phrase options
+ generateReorderingProb(...) --- generate the reordering probability
+ createOrientationClass(int dist) --- the create the orientation class
+**********************************************************
+*/
+#pragma once
+#ifndef DPR_REORDERING_H
+#define DPR_REORDERING_H
+#include <cstdlib>
+#include <map>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <sstream> //using istringstream
+#include <fstream> //using ifstream
+#include <math.h>
+#include "FeatureFunction.h"
+#include "Hypothesis.h"
+#include "WordsRange.h"
+#include "StaticData.h"
+#include "InputType.h"
+#define MAXRATIO 5.0 //the maximum ration for the 3-class setup
+
+#ifdef __GNUC__
+#include <ext/hash_map>
+#else
+#include <hash_map>
+#endif
+
+//********************************************************************
+//The following definition is for the use of hash_map with string key
+namespace __gnu_cxx
+{
+ template<> struct hash< std::string >
+ {
+ size_t operator()( const std::string& x ) const
+ {
+ return hash< const char* >()( x.c_str() );
+ }
+ };
+}
+//********************************************************************
+
+namespace std{using namespace __gnu_cxx;}
+using namespace std;
+using std::ifstream;
+using std::istringstream;
+using std::vector;
+using std::string;
+using std::ostringstream;
+//for sentencePhraseOption
+//typedef std::hash_map<unsigned short, hash_map<unsigned short, hash_map<string, vector<float> > > > mapPhraseOptionLeft;
+//typedef std::hash_map<unsigned short, hash_map<string, vector<float> > > mapPhraseOptionRight;
+//typedef std::map<vector<unsigned short>, map<string, vector<float> > > mapPhraseOption;
+typedef std::hash_map<size_t, hash_map<string, vector<float> > > mapPhraseOption;
+typedef std::hash_map<string, vector<float> > mapTargetProbOption;
+
+
+namespace Moses
+{
+ using namespace std;
+
+ //define the class DPR_reordering
+ class DPR_reordering : public StatefulFeatureFunction
+ {
+ public:
+ //constructor
+ DPR_reordering(ScoreIndexManager &scoreIndexManager, const string filePath, const string classString, const vector<float>& weights);
+ ~DPR_reordering();
+ public:
+ //interface: include 3 functions
+ size_t GetNumScoreComponents() const; //return the number of scores the component used
+ string GetScoreProducerDescription() const; //return the name of the reordering model
+ string GetScoreProducerWeightShortName() const; //return the short name of the weight for the score
+ public:
+ //The evaluation function and score calculation function
+ // FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator);
+ FFState* Evaluate(
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
+
+ const FFState* EmptyHypothesisState() const;
+
+ public:
+ void clearSentencePhraseOption() const; //clear the sentence phrase options
+ void constructSentencePhraseOption() const; //construct sentence phrase options (for a sentence)
+ float generateReorderingProb(size_t boundary_left, size_t boundary_right, size_t prev_boundary_right, string targetPhrase) const; //generate the reordering probability
+ int createOrientationClass(int dist) const; //the create the orientation class
+ private:
+ vector<unsigned long long> m_dprOptionStartPOS; //store the start pos for each sentence option
+ ifstream sentenceOptionFile; //the ifstream file of the sentenceOption
+ mutable long int sentenceID; //store the ID of current sentence needed translation
+ mutable mapPhraseOption sentencePhraseOption; //store the phrase option for each sentence
+ //mutable mapPhraseOptionLeft sentencePhraseOption;
+ int classSetup; //store the number of orientations
+ float unDetectProb; //the const reodering prob if the phrase pair is not in sentence option
+ vector<float> WDR_cost; //the word distance reodering cost
+ };
+};
+#endif
diff --git a/moses/src/DecodeGraph.cpp b/moses/src/DecodeGraph.cpp
new file mode 100644
index 000000000..2f1c5c859
--- /dev/null
+++ b/moses/src/DecodeGraph.cpp
@@ -0,0 +1,36 @@
+// $Id: TranslationOptionCollection.cpp 1429 2007-07-20 13:03:12Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "DecodeGraph.h"
+#include "DecodeStep.h"
+#include "TypeDef.h"
+#include "Util.h"
+
+namespace Moses
+{
+DecodeGraph::~DecodeGraph()
+{
+ RemoveAllInColl(m_steps);
+}
+
+}
+
diff --git a/moses/src/DecodeGraph.h b/moses/src/DecodeGraph.h
new file mode 100644
index 000000000..d87f78a72
--- /dev/null
+++ b/moses/src/DecodeGraph.h
@@ -0,0 +1,68 @@
+// $Id: TranslationOptionCollection.cpp 1429 2007-07-20 13:03:12Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_DecodeGraph_h
+#define moses_DecodeGraph_h
+
+#include <list>
+#include <iterator>
+
+namespace Moses
+{
+
+class DecodeStep;
+
+//! list of DecodeStep s which factorizes the translation
+class DecodeGraph
+{
+protected:
+ std::list<const DecodeStep*> m_steps;
+ size_t m_position;
+
+public:
+ /**
+ * position: The position of this graph within the decode sequence.
+ **/
+ DecodeGraph(size_t position): m_position(position) {}
+ //! iterators
+ typedef std::list<const DecodeStep*>::iterator iterator;
+ typedef std::list<const DecodeStep*>::const_iterator const_iterator;
+ const_iterator begin() const { return m_steps.begin(); }
+ const_iterator end() const { return m_steps.end(); }
+
+ size_t GetPosition() const
+ {
+ return m_position;
+ }
+
+ ~DecodeGraph();
+
+ //! Add another decode step to the graph
+ void Add(const DecodeStep *decodeStep)
+ {
+ m_steps.push_back(decodeStep);
+ }
+};
+
+
+}
+#endif
diff --git a/moses/src/DecodeStep.cpp b/moses/src/DecodeStep.cpp
new file mode 100644
index 000000000..432d96f6b
--- /dev/null
+++ b/moses/src/DecodeStep.cpp
@@ -0,0 +1,66 @@
+// $Id: DecodeStep.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "DecodeStep.h"
+#include "PhraseDictionaryMemory.h"
+#include "GenerationDictionary.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+DecodeStep::DecodeStep(Dictionary *ptr, const DecodeStep* prev)
+:m_ptr(ptr)
+{
+ FactorMask prevOutputFactors;
+ if (prev) prevOutputFactors = prev->m_outputFactors;
+ m_outputFactors = prevOutputFactors;
+ FactorMask conflictMask = (m_outputFactors & ptr->GetOutputFactorMask());
+ m_outputFactors |= ptr->GetOutputFactorMask();
+ FactorMask newOutputFactorMask = m_outputFactors ^ prevOutputFactors; //xor
+ m_newOutputFactors.resize(newOutputFactorMask.count());
+ m_conflictFactors.resize(conflictMask.count());
+ size_t j=0, k=0;
+ for (size_t i = 0; i < MAX_NUM_FACTORS; i++) {
+ if (newOutputFactorMask[i]) m_newOutputFactors[j++] = i;
+ if (conflictMask[i]) m_conflictFactors[k++] = i;
+ }
+ VERBOSE(2,"DecodeStep():\n\toutputFactors=" << m_outputFactors
+ << "\n\tconflictFactors=" << conflictMask
+ << "\n\tnewOutputFactors=" << newOutputFactorMask << std::endl);
+}
+
+DecodeStep::~DecodeStep() {}
+
+/** returns phrase table (dictionary) for translation step */
+const PhraseDictionary &DecodeStep::GetPhraseDictionary() const
+{
+ return *static_cast<const PhraseDictionary*>(m_ptr);
+}
+
+/** returns generation table (dictionary) for generation step */
+const GenerationDictionary &DecodeStep::GetGenerationDictionary() const
+{
+ return *static_cast<const GenerationDictionary*>(m_ptr);
+}
+
+}
+
+
diff --git a/moses/src/DecodeStep.h b/moses/src/DecodeStep.h
new file mode 100644
index 000000000..2d37b337f
--- /dev/null
+++ b/moses/src/DecodeStep.h
@@ -0,0 +1,113 @@
+// $Id: DecodeStep.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_DecodeStep_h
+#define moses_DecodeStep_h
+
+#include <cassert>
+#include "TypeDef.h"
+#include "Dictionary.h"
+
+namespace Moses
+{
+
+class PhraseDictionary;
+class GenerationDictionary;
+class TranslationOption;
+class TranslationOptionCollection;
+class PartialTranslOptColl;
+class FactorCollection;
+class InputType;
+
+/*! Specification for a decoding step.
+ * The factored translation model consists of Translation and Generation
+ * steps, which consult a Dictionary of phrase translations or word
+ * generations. This class implements the specification for one of these
+ * steps, both the DecodeType and a pointer to the Dictionary
+ **/
+class DecodeStep
+{
+protected:
+ const Dictionary *m_ptr; //! pointer to translation/generation table
+ FactorMask m_outputFactors; //! mask of what factors exist on the output side after this decode step
+ std::vector<FactorType> m_conflictFactors; //! list of the factors that may conflict during this step
+ std::vector<FactorType> m_newOutputFactors; //! list of the factors that are new in this step, may be empty
+
+public:
+ DecodeStep(); //! not implemented
+ DecodeStep(Dictionary *ptr, const DecodeStep* prevDecodeStep);
+ virtual ~DecodeStep();
+
+ //! mask of factors that are present after this decode step
+ const FactorMask& GetOutputFactorMask() const
+ {
+ return m_outputFactors;
+ }
+
+ //! returns true if this decode step must match some pre-existing factors
+ bool IsFilteringStep() const
+ {
+ return !m_conflictFactors.empty();
+ }
+
+ //! returns true if this decode step produces one or more new factors
+ bool IsFactorProducingStep() const
+ {
+ return !m_newOutputFactors.empty();
+ }
+
+ /*! returns a list (possibly empty) of the (target side) factors that
+ * are produced in this decoding step. For example, if a previous step
+ * generated factor 1, and this step generates 1,2, then only 2 will be
+ * in the returned vector. */
+ const std::vector<FactorType>& GetNewOutputFactors() const
+ {
+ return m_newOutputFactors;
+ }
+
+ /*! returns a list (possibly empty) of the (target side) factors that
+ * are produced BUT ALREADY EXIST and therefore must be checked for
+ * conflict or compatibility */
+ const std::vector<FactorType>& GetConflictFactors() const
+ {
+ return m_conflictFactors;
+ }
+
+ /*! returns phrase table (dictionary) for translation step */
+ const PhraseDictionary &GetPhraseDictionary() const;
+
+ /*! returns generation table (dictionary) for generation step */
+ const GenerationDictionary &GetGenerationDictionary() const;
+
+ /*! returns dictionary in abstract class */
+ const Dictionary* GetDictionaryPtr() const {return m_ptr;}
+
+ /*! Given an input TranslationOption, extend it in some way (put results in outputPartialTranslOptColl) */
+ virtual void Process(const TranslationOption &inputPartialTranslOpt
+ , const DecodeStep &decodeStep
+ , PartialTranslOptColl &outputPartialTranslOptColl
+ , TranslationOptionCollection *toc
+ , bool adhereTableLimit) const = 0;
+
+};
+
+}
+#endif
diff --git a/moses/src/DecodeStepGeneration.cpp b/moses/src/DecodeStepGeneration.cpp
new file mode 100644
index 000000000..5907eb50f
--- /dev/null
+++ b/moses/src/DecodeStepGeneration.cpp
@@ -0,0 +1,176 @@
+// $Id: DecodeStepGeneration.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "DecodeStepGeneration.h"
+#include "GenerationDictionary.h"
+#include "TranslationOption.h"
+#include "TranslationOptionCollection.h"
+#include "PartialTranslOptColl.h"
+#include "FactorCollection.h"
+
+namespace Moses
+{
+DecodeStepGeneration::DecodeStepGeneration(GenerationDictionary* dict, const DecodeStep* prev)
+: DecodeStep(dict, prev)
+{
+}
+
+const GenerationDictionary &DecodeStepGeneration::GetGenerationDictionary() const
+{
+ return *static_cast<const GenerationDictionary*>(m_ptr);
+}
+
+TranslationOption *DecodeStepGeneration::MergeGeneration(const TranslationOption& oldTO, Phrase &mergePhrase
+ , const ScoreComponentCollection& generationScore) const
+{
+ if (IsFilteringStep()) {
+ if (!oldTO.IsCompatible(mergePhrase, m_conflictFactors))
+ return NULL;
+ }
+
+ TranslationOption *newTransOpt = new TranslationOption(oldTO);
+ newTransOpt->MergeNewFeatures(mergePhrase, generationScore, m_newOutputFactors);
+ return newTransOpt;
+}
+
+// helpers
+typedef pair<Word, ScoreComponentCollection> WordPair;
+typedef list< WordPair > WordList;
+// 1st = word
+// 2nd = score
+typedef list< WordPair >::const_iterator WordListIterator;
+
+/** used in generation: increases iterators when looping through the exponential number of generation expansions */
+inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
+ , const vector< WordList > &wordListVector)
+{
+ for (size_t currPos = 0 ; currPos < wordListVector.size() ; currPos++)
+ {
+ WordListIterator &iter = wordListIterVector[currPos];
+ iter++;
+ if (iter != wordListVector[currPos].end())
+ { // eg. 4 -> 5
+ return;
+ }
+ else
+ { // eg 9 -> 10
+ iter = wordListVector[currPos].begin();
+ }
+ }
+}
+
+void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
+ , const DecodeStep &decodeStep
+ , PartialTranslOptColl &outputPartialTranslOptColl
+ , TranslationOptionCollection *toc
+ , bool adhereTableLimit) const
+{
+ if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0)
+ { // word deletion
+
+ TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt);
+ outputPartialTranslOptColl.Add(newTransOpt);
+
+ return;
+ }
+
+ // normal generation step
+ const GenerationDictionary &generationDictionary = decodeStep.GetGenerationDictionary();
+// const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
+
+ const Phrase &targetPhrase = inputPartialTranslOpt.GetTargetPhrase();
+ size_t targetLength = targetPhrase.GetSize();
+
+ // generation list for each word in phrase
+ vector< WordList > wordListVector(targetLength);
+
+ // create generation list
+ int wordListVectorPos = 0;
+ for (size_t currPos = 0 ; currPos < targetLength ; currPos++) // going thorugh all words
+ {
+ // generatable factors for this word to be put in wordList
+ WordList &wordList = wordListVector[wordListVectorPos];
+ const Word &word = targetPhrase.GetWord(currPos);
+
+ // consult dictionary for possible generations for this word
+ const OutputWordCollection *wordColl = generationDictionary.FindWord(word);
+
+ if (wordColl == NULL)
+ { // word not found in generation dictionary
+ //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
+ return; // can't be part of a phrase, special handling
+ }
+ else
+ {
+ // sort(*wordColl, CompareWordCollScore);
+ OutputWordCollection::const_iterator iterWordColl;
+ for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl)
+ {
+ const Word &outputWord = (*iterWordColl).first;
+ const ScoreComponentCollection& score = (*iterWordColl).second;
+ // enter into word list generated factor(s) and its(their) score(s)
+ wordList.push_back(WordPair(outputWord, score));
+ }
+
+ wordListVectorPos++; // done, next word
+ }
+ }
+
+ // use generation list (wordList)
+ // set up iterators (total number of expansions)
+ size_t numIteration = 1;
+ vector< WordListIterator > wordListIterVector(targetLength);
+ vector< const Word* > mergeWords(targetLength);
+ for (size_t currPos = 0 ; currPos < targetLength ; currPos++)
+ {
+ wordListIterVector[currPos] = wordListVector[currPos].begin();
+ numIteration *= wordListVector[currPos].size();
+ }
+
+ // go thru each possible factor for each word & create hypothesis
+ for (size_t currIter = 0 ; currIter < numIteration ; currIter++)
+ {
+ ScoreComponentCollection generationScore; // total score for this string of words
+
+ // create vector of words with new factors for last phrase
+ for (size_t currPos = 0 ; currPos < targetLength ; currPos++)
+ {
+ const WordPair &wordPair = *wordListIterVector[currPos];
+ mergeWords[currPos] = &(wordPair.first);
+ generationScore.PlusEquals(wordPair.second);
+ }
+
+ // merge with existing trans opt
+ Phrase genPhrase(Output, mergeWords);
+ TranslationOption *newTransOpt = MergeGeneration(inputPartialTranslOpt, genPhrase, generationScore);
+ if (newTransOpt != NULL)
+ {
+ outputPartialTranslOptColl.Add(newTransOpt);
+ }
+
+ // increment iterators
+ IncrementIterators(wordListIterVector, wordListVector);
+ }
+}
+
+}
+
+
diff --git a/moses/src/DecodeStepGeneration.h b/moses/src/DecodeStepGeneration.h
new file mode 100644
index 000000000..18bd7517f
--- /dev/null
+++ b/moses/src/DecodeStepGeneration.h
@@ -0,0 +1,60 @@
+// $Id: DecodeStepGeneration.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_DecodeStepGeneration_h
+#define moses_DecodeStepGeneration_h
+
+#include "DecodeStep.h"
+
+namespace Moses
+{
+
+class GenerationDictionary;
+class Phrase;
+class ScoreComponentCollection;
+
+//! subclass of DecodeStep for generation step
+class DecodeStepGeneration : public DecodeStep
+{
+public:
+ DecodeStepGeneration(GenerationDictionary* dict, const DecodeStep* prev);
+
+ //! returns phrase table (dictionary) for translation step
+ const GenerationDictionary &GetGenerationDictionary() const;
+
+ virtual void Process(const TranslationOption &inputPartialTranslOpt
+ , const DecodeStep &decodeStep
+ , PartialTranslOptColl &outputPartialTranslOptColl
+ , TranslationOptionCollection *toc
+ , bool adhereTableLimit) const;
+
+private:
+ /*! create new TranslationOption from merging oldTO with mergePhrase
+ This function runs IsCompatible() to ensure the two can be merged
+ */
+ TranslationOption *MergeGeneration(const TranslationOption& oldTO, Phrase &mergePhrase
+ , const ScoreComponentCollection& generationScore) const;
+
+};
+
+
+}
+#endif
diff --git a/moses/src/DecodeStepTranslation.cpp b/moses/src/DecodeStepTranslation.cpp
new file mode 100644
index 000000000..4048fcf18
--- /dev/null
+++ b/moses/src/DecodeStepTranslation.cpp
@@ -0,0 +1,136 @@
+// $Id: DecodeStepTranslation.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "DecodeStepTranslation.h"
+#include "PhraseDictionaryMemory.h"
+#include "TranslationOption.h"
+#include "TranslationOptionCollection.h"
+#include "PartialTranslOptColl.h"
+#include "FactorCollection.h"
+
+namespace Moses
+{
+DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* dict, const DecodeStep* prev)
+: DecodeStep(dict, prev), m_phraseDictionary(dict)
+{
+}
+
+/*const PhraseDictionary &DecodeStepTranslation::GetPhraseDictionary() const
+{
+ return *m_phraseDictionary;
+}*/
+
+TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const
+{
+ if (IsFilteringStep()) {
+ if (!oldTO.IsCompatible(targetPhrase, m_conflictFactors)) return 0;
+ }
+
+ TranslationOption *newTransOpt = new TranslationOption(oldTO);
+ newTransOpt->MergeNewFeatures(targetPhrase, targetPhrase.GetScoreBreakdown(), m_newOutputFactors);
+ return newTransOpt;
+}
+
+
+void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslOpt
+ , const DecodeStep &decodeStep
+ , PartialTranslOptColl &outputPartialTranslOptColl
+ , TranslationOptionCollection *toc
+ , bool adhereTableLimit) const
+{
+ if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0)
+ { // word deletion
+
+ outputPartialTranslOptColl.Add(new TranslationOption(inputPartialTranslOpt));
+
+ return;
+ }
+
+ // normal trans step
+ const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
+ const PhraseDictionary &phraseDictionary = decodeStep.GetPhraseDictionary();
+ const size_t currSize = inputPartialTranslOpt.GetTargetPhrase().GetSize();
+ const size_t tableLimit = phraseDictionary.GetTableLimit();
+
+ const TargetPhraseCollection *phraseColl= phraseDictionary.GetTargetPhraseCollection(toc->GetSource(),sourceWordsRange);
+
+ if (phraseColl != NULL)
+ {
+ TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
+ iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
+
+ for (iterTargetPhrase = phraseColl->begin(); iterTargetPhrase != iterEnd; ++iterTargetPhrase)
+ {
+ const TargetPhrase& targetPhrase = **iterTargetPhrase;
+ // skip if the
+ if (targetPhrase.GetSize() != currSize) continue;
+
+ TranslationOption *newTransOpt = MergeTranslation(inputPartialTranslOpt, targetPhrase);
+ if (newTransOpt != NULL)
+ {
+ outputPartialTranslOptColl.Add( newTransOpt );
+ }
+ }
+ }
+ else if (sourceWordsRange.GetNumWordsCovered() == 1)
+ { // unknown handler
+ //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
+ }
+}
+
+
+void DecodeStepTranslation::ProcessInitialTranslation(
+ const InputType &source
+ ,PartialTranslOptColl &outputPartialTranslOptColl
+ , size_t startPos, size_t endPos, bool adhereTableLimit) const
+{
+ const size_t tableLimit = m_phraseDictionary->GetTableLimit();
+
+ const WordsRange wordsRange(startPos, endPos);
+ const TargetPhraseCollection *phraseColl = m_phraseDictionary->GetTargetPhraseCollection(source,wordsRange);
+
+ if (phraseColl != NULL)
+ {
+ IFVERBOSE(3) {
+ if(StaticData::Instance().GetInputType() == SentenceInput)
+ TRACE_ERR("[" << source.GetSubString(wordsRange) << "; " << startPos << "-" << endPos << "]\n");
+ else
+ TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
+ }
+
+ TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
+ iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
+
+ for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase)
+ {
+ const TargetPhrase &targetPhrase = **iterTargetPhrase;
+ outputPartialTranslOptColl.Add ( new TranslationOption(wordsRange, targetPhrase, source) );
+
+ VERBOSE(3,"\t" << targetPhrase << "\n");
+ }
+ VERBOSE(3,endl);
+ }
+}
+
+}
+
+
+
diff --git a/moses/src/DecodeStepTranslation.h b/moses/src/DecodeStepTranslation.h
new file mode 100644
index 000000000..4ffb70f2f
--- /dev/null
+++ b/moses/src/DecodeStepTranslation.h
@@ -0,0 +1,67 @@
+// $Id: DecodeStepTranslation.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_DecodeStepTranslation_h
+#define moses_DecodeStepTranslation_h
+
+#include "DecodeStep.h"
+#include "PhraseDictionary.h"
+
+namespace Moses
+{
+
+class PhraseDictionary;
+class TargetPhrase;
+
+//! subclass of DecodeStep for translation step
+class DecodeStepTranslation : public DecodeStep
+{
+public:
+ DecodeStepTranslation(); //! not implemented
+ DecodeStepTranslation(PhraseDictionary* dict, const DecodeStep* prev);
+
+ //! returns phrase table (dictionary) for translation step
+ const PhraseDictionary &GetPhraseDictionary() const;
+
+ virtual void Process(const TranslationOption &inputPartialTranslOpt
+ , const DecodeStep &decodeStep
+ , PartialTranslOptColl &outputPartialTranslOptColl
+ , TranslationOptionCollection *toc
+ , bool adhereTableLimit) const;
+
+ /*! initialize list of partial translation options by applying the first translation step
+ * Ideally, this function should be in DecodeStepTranslation class
+ */
+ void ProcessInitialTranslation(
+ const InputType &source
+ , PartialTranslOptColl &outputPartialTranslOptColl
+ , size_t startPos, size_t endPos, bool adhereTableLimit) const;
+private:
+ /*! create new TranslationOption from merging oldTO with mergePhrase
+ This function runs IsCompatible() to ensure the two can be merged
+ */
+ TranslationOption *MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const;
+ PhraseDictionary* m_phraseDictionary;
+};
+
+
+}
+#endif
diff --git a/moses/src/Dictionary.cpp b/moses/src/Dictionary.cpp
new file mode 100644
index 000000000..a57178d1a
--- /dev/null
+++ b/moses/src/Dictionary.cpp
@@ -0,0 +1,38 @@
+// $Id: Dictionary.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "Dictionary.h"
+#include "FactorTypeSet.h"
+
+namespace Moses
+{
+Dictionary::Dictionary(size_t numScoreComponent)
+ :m_numScoreComponent(numScoreComponent)
+{
+}
+
+Dictionary::~Dictionary() {}
+
+void Dictionary::CleanUp() {}
+
+}
+
+
diff --git a/moses/src/Dictionary.h b/moses/src/Dictionary.h
new file mode 100644
index 000000000..a74c9aab6
--- /dev/null
+++ b/moses/src/Dictionary.h
@@ -0,0 +1,68 @@
+// $Id: Dictionary.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_Dictionary_h
+#define moses_Dictionary_h
+
+#include <vector>
+#include "FactorTypeSet.h"
+#include "ScoreProducer.h"
+
+namespace Moses
+{
+
+/** Abstract class from which PhraseDictionary and GenerationDictionary
+ * are inherited.
+*/
+class Dictionary
+{
+protected:
+
+ const size_t m_numScoreComponent;
+ FactorMask m_inputFactors;
+ FactorMask m_outputFactors;
+
+public:
+ //! Constructor
+ Dictionary(size_t numScoreComponent);
+ //!Destructor
+ virtual ~Dictionary();
+
+ //! returns output factor types as specified by the ini file
+ const FactorMask& GetOutputFactorMask() const
+ {
+ return m_outputFactors;
+ }
+ //! returns input factor types as specified by the ini file
+ const FactorMask& GetInputFactorMask() const
+ {
+ return m_inputFactors;
+ }
+
+ //! returns whether this dictionary is to be used for Translate or Generate
+ virtual DecodeType GetDecodeType() const = 0;
+
+ // clean up temporary memory, called after processing each sentence
+ virtual void CleanUp();
+};
+
+}
+#endif
diff --git a/moses/src/DummyScoreProducers.cpp b/moses/src/DummyScoreProducers.cpp
new file mode 100644
index 000000000..892dd0d4e
--- /dev/null
+++ b/moses/src/DummyScoreProducers.cpp
@@ -0,0 +1,154 @@
+// $Id: DummyScoreProducers.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+#include <cassert>
+#include "FFState.h"
+#include "StaticData.h"
+#include "DummyScoreProducers.h"
+#include "WordsRange.h"
+#include "TranslationOption.h"
+
+namespace Moses
+{
+
+struct DistortionState_traditional : public FFState {
+ WordsRange range;
+ int first_gap;
+ DistortionState_traditional(const WordsRange& wr, int fg) : range(wr), first_gap(fg) {}
+ int Compare(const FFState& other) const {
+ const DistortionState_traditional& o =
+ static_cast<const DistortionState_traditional&>(other);
+ if (range.GetEndPos() < o.range.GetEndPos()) return -1;
+ if (range.GetEndPos() > o.range.GetEndPos()) return 1;
+ return 0;
+ }
+};
+
+struct DistortionState_MQ2007 : public FFState {
+ //TODO
+};
+
+const FFState* DistortionScoreProducer::EmptyHypothesisState() const {
+ return new DistortionState_traditional(WordsRange(NOT_FOUND,NOT_FOUND), NOT_FOUND);
+}
+
+DistortionScoreProducer::DistortionScoreProducer(ScoreIndexManager &scoreIndexManager)
+{
+ scoreIndexManager.AddScoreProducer(this);
+}
+
+size_t DistortionScoreProducer::GetNumScoreComponents() const
+{
+ return 1;
+}
+
+std::string DistortionScoreProducer::GetScoreProducerDescription() const
+{
+ return "Distortion";
+}
+
+std::string DistortionScoreProducer::GetScoreProducerWeightShortName() const
+{
+ return "d";
+}
+
+float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo,
+ const WordsRange &prev, const WordsRange &curr, const int FirstGap) const
+{
+ const int USE_OLD = 1;
+ if (USE_OLD) {
+ return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr);
+ }
+
+ // Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
+
+ int prefixEndPos = FirstGap-1;
+ if ((int) curr.GetStartPos() == prefixEndPos+1) {
+ return 0;
+ }
+
+ if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
+ return (float) -2*curr.GetNumWordsCovered();
+ }
+
+ if ((int) prev.GetEndPos() <= prefixEndPos) {
+ int z = curr.GetStartPos()-prefixEndPos;
+ return (float) -2*(z + curr.GetNumWordsCovered());
+ }
+
+ return (float) -2*(curr.GetNumWordsBetween(prev) + curr.GetNumWordsCovered());
+}
+
+size_t DistortionScoreProducer::GetNumInputScores() const { return 0;}
+
+FFState* DistortionScoreProducer::Evaluate(
+ const Hypothesis& hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* out) const {
+ const DistortionState_traditional* prev = static_cast<const DistortionState_traditional*>(prev_state);
+ const float distortionScore = CalculateDistortionScore(
+ hypo,
+ prev->range,
+ hypo.GetCurrSourceWordsRange(),
+ prev->first_gap);
+ out->PlusEquals(this, distortionScore);
+ DistortionState_traditional* res = new DistortionState_traditional(
+ hypo.GetCurrSourceWordsRange(),
+ hypo.GetPrevHypo()->GetWordsBitmap().GetFirstGapPos());
+ return res;
+}
+
+
+WordPenaltyProducer::WordPenaltyProducer(ScoreIndexManager &scoreIndexManager)
+{
+ scoreIndexManager.AddScoreProducer(this);
+}
+
+size_t WordPenaltyProducer::GetNumScoreComponents() const
+{
+ return 1;
+}
+
+std::string WordPenaltyProducer::GetScoreProducerDescription() const
+{
+ return "WordPenalty";
+}
+
+std::string WordPenaltyProducer::GetScoreProducerWeightShortName() const
+{
+ return "w";
+}
+
+size_t WordPenaltyProducer::GetNumInputScores() const { return 0;}
+
+void WordPenaltyProducer::Evaluate(const TargetPhrase& tp, ScoreComponentCollection* out) const
+{
+ out->PlusEquals(this, -static_cast<float>(tp.GetSize()));
+}
+
+UnknownWordPenaltyProducer::UnknownWordPenaltyProducer(ScoreIndexManager &scoreIndexManager)
+{
+ scoreIndexManager.AddScoreProducer(this);
+}
+
+size_t UnknownWordPenaltyProducer::GetNumScoreComponents() const
+{
+ return 1;
+}
+
+std::string UnknownWordPenaltyProducer::GetScoreProducerDescription() const
+{
+ return "!UnknownWordPenalty";
+}
+
+std::string UnknownWordPenaltyProducer::GetScoreProducerWeightShortName() const
+{
+ return "u";
+}
+
+size_t UnknownWordPenaltyProducer::GetNumInputScores() const { return 0;}
+
+bool UnknownWordPenaltyProducer::ComputeValueInTranslationOption() const {
+ return true;
+}
+
+}
diff --git a/moses/src/DummyScoreProducers.h b/moses/src/DummyScoreProducers.h
new file mode 100644
index 000000000..2bbcc4c4d
--- /dev/null
+++ b/moses/src/DummyScoreProducers.h
@@ -0,0 +1,70 @@
+// $Id: DummyScoreProducers.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_DummyScoreProducers_h
+#define moses_DummyScoreProducers_h
+
+#include "FeatureFunction.h"
+
+namespace Moses
+{
+
+class WordsRange;
+
+/** Calculates Distortion scores
+ */
+class DistortionScoreProducer : public StatefulFeatureFunction {
+public:
+ DistortionScoreProducer(ScoreIndexManager &scoreIndexManager);
+
+ float CalculateDistortionScore(const Hypothesis& hypo,
+ const WordsRange &prev, const WordsRange &curr, const int FirstGapPosition) const;
+
+ size_t GetNumScoreComponents() const;
+ std::string GetScoreProducerDescription() const;
+ std::string GetScoreProducerWeightShortName() const;
+ size_t GetNumInputScores() const;
+
+ virtual const FFState* EmptyHypothesisState() const;
+
+ virtual FFState* Evaluate(
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
+
+};
+
+/** Doesn't do anything but provide a key into the global
+ * score array to store the word penalty in.
+ */
+class WordPenaltyProducer : public StatelessFeatureFunction {
+public:
+ WordPenaltyProducer(ScoreIndexManager &scoreIndexManager);
+
+ size_t GetNumScoreComponents() const;
+ std::string GetScoreProducerDescription() const;
+ std::string GetScoreProducerWeightShortName() const;
+ size_t GetNumInputScores() const;
+
+ virtual void Evaluate(
+ const TargetPhrase& phrase,
+ ScoreComponentCollection* out) const;
+
+};
+
+/** unknown word penalty */
+class UnknownWordPenaltyProducer : public StatelessFeatureFunction {
+public:
+ UnknownWordPenaltyProducer(ScoreIndexManager &scoreIndexManager);
+
+ size_t GetNumScoreComponents() const;
+ std::string GetScoreProducerDescription() const;
+ std::string GetScoreProducerWeightShortName() const;
+ size_t GetNumInputScores() const;
+
+ virtual bool ComputeValueInTranslationOption() const;
+
+};
+
+}
+
+#endif
diff --git a/moses/src/DynSAInclude/fdstream.h b/moses/src/DynSAInclude/fdstream.h
new file mode 100644
index 000000000..2c13e8504
--- /dev/null
+++ b/moses/src/DynSAInclude/fdstream.h
@@ -0,0 +1,147 @@
+/* Class modified by ADL for randlm namespace on Feb 15th, 2008.
+ *
+ * The following code declares classes to read from and write to
+ * file descriptore or file handles.
+ *
+ * See
+ * http://www.josuttis.com/cppcode
+ * for details and the latest version.
+ *
+ * - open:
+ * - integrating BUFSIZ on some systems?
+ * - optimized reading of multiple characters
+ * - stream for reading AND writing
+ * - i18n
+ *
+ * (C) Copyright Nicolai M. Josuttis 2001.
+ * Permission to copy, use, modify, sell and distribute this software
+ * is granted provided this copyright notice appears in all copies.
+ * This software is provided "as is" without express or implied
+ * warranty, and with no claim as to its suitability for any purpose.
+ *
+ * Version: Jul 28, 2002
+ * History:
+ * Jul 28, 2002: bugfix memcpy() => memmove()
+ * fdinbuf::underflow(): cast for return statements
+ * Aug 05, 2001: first public version
+ */
+#ifndef moses_DynSAInclude_fdstream_h
+#define moses_DynSAInclude_fdstream_h
+
+#include <streambuf>
+// for EOF:
+#include <cstdio>
+// for memmove():
+#include <cstring>
+
+
+// low-level read and write functions
+#ifdef _MSC_VER
+# include <io.h>
+#else
+# include <unistd.h>
+//extern "C" {
+// int write (int fd, const char* buf, int num);
+// int read (int fd, char* buf, int num);
+//}
+#endif
+
+
+// BEGIN namespace
+//namespace randlm {
+
+/************************************************************
+ * fdstreambuf
+ * - a stream that reads on a file descriptor
+ ************************************************************/
+
+class fdstreambuf : public std::streambuf {
+ protected:
+ int fd; // file descriptor
+ protected:
+ /* data buffer:
+ * - at most, pbSize characters in putback area plus
+ * - at most, bufSize characters in ordinary read buffer
+ */
+ static const int pbSize = 4; // size of putback area
+ static const int bufSize = 1024; // size of the data buffer
+ char buffer[bufSize+pbSize]; // data buffer
+
+ public:
+ /* constructor
+ * - initialize file descriptor
+ * - initialize empty data buffer
+ * - no putback area
+ * => force underflow()
+ */
+ fdstreambuf (int _fd) : fd(_fd) {
+ setg (buffer+pbSize, // beginning of putback area
+ buffer+pbSize, // read position
+ buffer+pbSize); // end position
+ }
+
+ protected:
+ // insert new characters into the buffer
+ virtual int_type underflow () {
+#ifndef _MSC_VER
+ using std::memmove;
+#endif
+
+ // is read position before end of buffer?
+ if (gptr() < egptr()) {
+ return traits_type::to_int_type(*gptr());
+ }
+
+ /* process size of putback area
+ * - use number of characters read
+ * - but at most size of putback area
+ */
+ int numPutback;
+ numPutback = gptr() - eback();
+ if (numPutback > pbSize) {
+ numPutback = pbSize;
+ }
+
+ /* copy up to pbSize characters previously read into
+ * the putback area
+ */
+ memmove (buffer+(pbSize-numPutback), gptr()-numPutback,
+ numPutback);
+
+ // read at most bufSize new characters
+ int num;
+ num = read (fd, buffer+pbSize, bufSize);
+ if (num <= 0) {
+ // ERROR or EOF
+ return EOF;
+ }
+
+ // reset buffer pointers
+ setg (buffer+(pbSize-numPutback), // beginning of putback area
+ buffer+pbSize, // read position
+ buffer+pbSize+num); // end of buffer
+
+ // return next character
+ return traits_type::to_int_type(*gptr());
+ }
+
+ // write one character
+ virtual int_type overflow (int_type c) {
+ if (c != EOF) {
+ char z = c;
+ if (write (fd, &z, 1) != 1) {
+ return EOF;
+ }
+ }
+ return c;
+ }
+ // write multiple characters
+ virtual
+ std::streamsize xsputn (const char* s,
+ std::streamsize num) {
+ return write(fd,s,num);
+ }
+};
+//} // END namespace
+
+#endif
diff --git a/moses/src/DynSAInclude/file.cpp b/moses/src/DynSAInclude/file.cpp
new file mode 100644
index 000000000..aad34926a
--- /dev/null
+++ b/moses/src/DynSAInclude/file.cpp
@@ -0,0 +1,160 @@
+#include "file.h"
+
+namespace Moses {
+
+ // FileHandler class
+ const std::string FileHandler::kStdInDescriptor = "___stdin___";
+ const std::string FileHandler::kStdOutDescriptor = "___stdout___";
+ // compression commands
+ const FileExtension FileHandler::kGzipped = ".gz";
+ const FileExtension FileHandler::kBzipped2 = ".bz2";
+
+ const std::string FileHandler::kCatCommand = "cat";
+ const std::string FileHandler::kGzipCommand = "gzip -f";
+ const std::string FileHandler::kGunzipCommand = "gunzip -f";
+ const std::string FileHandler::kBzip2Command = "bzip2 -f";
+ const std::string FileHandler::kBunzip2Command = "bunzip2 -f";
+
+ FileHandler::FileHandler(const std::string & path, std::ios_base::openmode flags, bool checkExists)
+ : std::fstream(NULL), path_(path), flags_(flags), buffer_(NULL), fp_(NULL) {
+ if( !(flags^(std::ios::in|std::ios::out)) ) {
+ fprintf(stderr, "ERROR: FileHandler does not support bidirectional files (%s).\n", path_.c_str());
+ exit(EXIT_FAILURE);
+ }
+ else
+ assert(setStreamBuffer(flags & std::ios::in));
+ this->precision(32);
+ }
+
+ FileHandler::~FileHandler() {
+ if( fp_ != 0 )
+ pclose(fp_);
+ if( path_ != FileHandler::kStdInDescriptor &&
+ path_ != FileHandler::kStdOutDescriptor )
+ delete buffer_;
+ if( this->is_open() )
+ this->close();
+ }
+
+ fdstreambuf * FileHandler::openCompressedFile(const char * cmd) {
+ //bool isInput = (flags_ & std::ios::in);
+ //open pipe to file with compression/decompression command
+ const char * p_type = (flags_ & std::ios::in ? "r" : "w");
+ fp_ = popen(cmd, p_type);
+ if( fp_ == NULL ) {
+ //fprintf(stderr, "ERROR:Failed to open compressed file at %s\n", path_.c_str());
+ perror("openCompressedFile: ");
+ exit(EXIT_FAILURE);
+ }
+ //open streambuf with file descriptor
+ return new fdstreambuf(fileno(fp_));
+ }
+
+ bool FileHandler::setStreamBuffer(bool checkExists) {
+ // redirect stdin or stdout if necesary
+ if (path_ == FileHandler::kStdInDescriptor) {
+ assert(flags_ & std::ios::in);
+ std::streambuf* sb = std::cin.rdbuf();
+ buffer_ = sb;
+ } else if (path_ == FileHandler::kStdOutDescriptor) {
+ assert(flags_ & std::ios::out);
+ std::streambuf* sb = std::cout.rdbuf();
+ buffer_ = sb;
+ } else {
+ // real file
+ if( checkExists && ! fileExists() ) {
+ fprintf(stderr, "ERROR: Failed to find file at %s\n", path_.c_str());
+ exit(EXIT_FAILURE);
+ }
+ std::string cmd = "";
+ if( isCompressedFile(cmd) && (! cmd.empty()) ) {
+ buffer_ = openCompressedFile(cmd.c_str());
+ } else {
+ // open underlying filebuf
+ std::filebuf* fb = new std::filebuf();
+ fb->open(path_.c_str(), flags_);
+ buffer_ = fb;
+ }
+ }
+ if (!buffer_) {
+ fprintf(stderr, "ERROR:Failed to open file at %s\n", path_.c_str());
+ exit(EXIT_FAILURE);
+ }
+ this->init(buffer_);
+ return true;
+ }
+
+ /*
+ * Checks for compression via file extension. Currently checks for
+ * ".gz" and ".bz2".
+ */
+ bool FileHandler::isCompressedFile(std::string & cmd)
+ {
+ bool compressed = false, isInput = (flags_ & std::ios::in);
+ cmd = "";
+ unsigned int len = path_.size();
+ if( len > kGzipped.size()
+ && path_.find(kGzipped) == len - kGzipped.size()) {
+ //gzip file command to compress or decompress
+ compressed = true;
+ // cmd = (isInput ? "exec gunzip -cf " : "exec gzip -c > ") + path_;
+ cmd = (isInput ? "exec " + kGunzipCommand + "c "
+ : "exec " + kGzipCommand + "c > ") + path_;
+ } else if( len > kBzipped2.size() &&
+ path_.find(kBzipped2) == len - kBzipped2.size()) {
+ //do bzipped2 file command
+ compressed = true;
+ cmd = (isInput ? "exec " + kBunzip2Command + "c "
+ : "exec " + kBzip2Command + "c > ") + path_;
+ }
+ return compressed;
+ }
+
+ bool FileHandler::fileExists() {
+ bool exists = false;
+ struct stat f_info;
+ if( stat(path_.c_str(), &f_info) == 0 ) //if stat() returns no errors
+ exists = true;
+ return( exists );
+ }
+
+ // static method used during preprocessing compressed files without
+ // opening fstream objects.
+ bool FileHandler::getCompressionCmds(const std::string & filepath, std::string & compressionCmd,
+ std::string & decompressionCmd,
+ std::string & compressionSuffix) {
+ // determine what compression and decompression cmds are suitable from filepath
+ compressionCmd = kCatCommand;
+ decompressionCmd = kCatCommand;
+ if (filepath.length() > kGzipped.size() &&
+ filepath.find(kGzipped) == filepath.length()
+ - kGzipped.length()) {
+ compressionCmd = kGzipCommand;
+ decompressionCmd = kGunzipCommand;
+ compressionSuffix = kGzipped;
+ } else if (filepath.length() > kBzipped2.size() &&
+ filepath.find(kBzipped2) == filepath.length()
+ - kBzipped2.length() ) {
+ compressionCmd = kBzip2Command;
+ decompressionCmd = kBunzip2Command;
+ compressionSuffix = kBzipped2;;
+ }
+ return (compressionCmd != kCatCommand && decompressionCmd != kCatCommand);
+ }
+
+ bool FileHandler::reset() {
+ // move to beginning of file
+ if (fp_ != 0) {
+ //can't seek on a pipe so reopen
+ pclose(fp_);
+ std::string cmd = "";
+ if (isCompressedFile(cmd) && ! cmd.empty())
+ buffer_ = openCompressedFile(cmd.c_str());
+ //reinitialize
+ this->init(buffer_);
+ }
+ else
+ buffer_->pubseekoff(0, std::ios_base::beg); //sets both get and put pointers to beginning of stream
+ return true;
+ }
+} //end namespace
diff --git a/moses/src/DynSAInclude/file.h b/moses/src/DynSAInclude/file.h
new file mode 100644
index 000000000..eae5ed8c6
--- /dev/null
+++ b/moses/src/DynSAInclude/file.h
@@ -0,0 +1,61 @@
+#ifndef moses_File_h
+#define moses_File_h
+
+#include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+#include <sys/stat.h>
+#include <string>
+#include <cassert>
+#include "fdstream.h"
+#include "utils.h"
+
+namespace Moses {
+typedef std::string FileExtension;
+
+class FileHandler: public std::fstream {
+public:
+ // descriptors for stdin and stdout
+ static const std::string kStdInDescriptor; // file name for std::cin
+ static const std::string kStdOutDescriptor; // file name for std::cout
+ // compression commands
+ static const std::string kCatCommand; // i.e. no compression
+ static const std::string kGzipCommand; // gzip -f
+ static const std::string kGunzipCommand; // gunzip -f
+ static const std::string kBzip2Command; // bzip2 -f
+ static const std::string kBunzip2Command; // bunzip2 -f
+
+ // open file or wrap stdin or stdout
+ FileHandler(const std::string & path,
+ std::ios_base::openmode flags = std::ios::in,
+ bool checkExists = true);
+ ~FileHandler();
+ // file utilities
+ static bool getCompressionCmds(const std::string & filepath,
+ std::string & compressionCmd,
+ std::string & decompressionCmd,
+ std::string & compressionSuffix);
+
+ // data accessors
+ std::string getPath() { return path_; }
+ std::ios_base::openmode getFlags() { return flags_; }
+ bool isStdIn() { return path_ == FileHandler::kStdInDescriptor; }
+ bool isStdOut() { return path_ == FileHandler::kStdOutDescriptor; }
+ bool reset();
+protected:
+ static const FileExtension kGzipped;
+ static const FileExtension kBzipped2;
+ bool fileExists();
+ bool setStreamBuffer(bool checkExists);
+ bool isCompressedFile(std::string & cmd);
+ fdstreambuf* openCompressedFile(const char* cmd);
+ std::string path_; // file path
+ std::ios_base::openmode flags_; // open flags
+ std::streambuf* buffer_; // buffer to either gzipped or standard data
+ std::FILE* fp_; //file pointer to handle pipe data
+};
+
+} // end namespace
+
+#endif
diff --git a/moses/src/DynSAInclude/types.h b/moses/src/DynSAInclude/types.h
new file mode 100644
index 000000000..6f766337c
--- /dev/null
+++ b/moses/src/DynSAInclude/types.h
@@ -0,0 +1,32 @@
+#ifndef moses_DynSAInclude_types_h
+#define moses_DynSAInclude_types_h
+
+#include <iostream>
+#include <map>
+#include <set>
+#include <vector>
+#include <typeinfo>
+#include <stdint.h>
+
+#define iterate(c, i) for(typeof(c.begin()) i = c.begin(); i != c.end(); ++i)
+#define piterate(c, i) for(typeof(c->begin()) i = c->begin(); i != c->end(); ++i)
+#define THREADED false
+#define THREAD_MAX 2
+#define MAX_NGRAM_ORDER 8
+#define MAX_STR_LEN 300
+#define PRIME 8589935681ULL
+#define MAX_HASH_FUNCS 1000
+//#define PRIME 409
+
+using std::string;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+//typedefs for projects
+typedef std::string word_t; // word as string
+typedef unsigned int wordID_t; // word mapped to integer
+typedef std::string date_t; // a date marker
+typedef unsigned int count_t; // for 64-bit to 32-bit compatibility
+
+#endif
diff --git a/moses/src/DynSAInclude/utils.h b/moses/src/DynSAInclude/utils.h
new file mode 100644
index 000000000..8312f109a
--- /dev/null
+++ b/moses/src/DynSAInclude/utils.h
@@ -0,0 +1,81 @@
+#ifndef moses_DynSAInclude_utils_h
+#define moses_DynSAInclude_utils_h
+
+#include <cstdlib>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <cctype>
+#include <cmath>
+#include <cstring>
+
+class Utils {
+public:
+ static void trim(std::string& str, const std::string dropChars = " \t\n\r") {
+ str.erase(str.find_last_not_of(dropChars)+1);
+ str.erase(0, str.find_first_not_of(dropChars));
+ }
+ static void rtrim(std::string& str, const std::string dropChars = " \t\n\r") {
+ str.erase(str.find_last_not_of(dropChars)+1);
+ }
+ static void ltrim(std::string& str, const std::string dropChars = " \t\n\r") {
+ str.erase(0, str.find_first_not_of(dropChars));
+ }
+ static std::string IntToStr(int integer) {
+ std::ostringstream stream;
+ stream << integer;
+ return stream.str();
+ }
+ static int splitToStr(const char * str,
+ std::vector<std::string> & items,
+ const char * delm = "\t") {
+ char * buff = const_cast<char *>(str);
+ items.clear();
+ char * pch = strtok(buff, delm);
+ while( pch != NULL ) {
+ items.push_back(pch);
+ pch = strtok(NULL, delm);
+ }
+ return items.size();
+ }
+ static int splitToStr(std::string buff,
+ std::vector<std::string> & items,
+ std::string delm = "\t") {
+ std::string cp = buff.substr();
+ return splitToStr(cp.c_str(), items, delm.c_str());
+ }
+ static int splitToInt(std::string buff, std::vector<int>& items,
+ std::string delm = ",") {
+ items.clear();
+ std::vector<std::string> tmpVector(0);
+ int i = 0;
+ i = splitToStr(buff.c_str(), tmpVector, delm.c_str());
+ if( i > 0 )
+ for( int j = 0; j < i; j++ )
+ items.push_back(atoi(tmpVector[j].c_str()));
+ return i;
+ }
+ static void strToLowercase(std::string& str) {
+ for(unsigned i=0; i < str.length(); i++) {
+ str[i] = tolower(str[i]);
+ }
+ }
+ // TODO: interface with decent PRG
+ template<typename T>
+ static T rand(T mod_bnd = 0) {
+ T random = 0;
+ if(sizeof(T) <= 4) {
+ random = static_cast<T>(std::rand());
+ }
+ else if(sizeof(T) == 8) {
+ random = static_cast<T>(std::rand());
+ random <<= 31; random <<= 1;
+ random |= static_cast<T>(std::rand());
+ }
+ if(mod_bnd != 0)
+ return random % mod_bnd;
+ else return random;
+ }
+};
+
+#endif
diff --git a/moses/src/DynSAInclude/vocab.cpp b/moses/src/DynSAInclude/vocab.cpp
new file mode 100644
index 000000000..4aa9ba000
--- /dev/null
+++ b/moses/src/DynSAInclude/vocab.cpp
@@ -0,0 +1,93 @@
+#include <sstream>
+#include "vocab.h"
+
+namespace Moses {
+
+ // Vocab class
+ const wordID_t Vocab::kOOVWordID;
+ const wordID_t Vocab::kBOSWordID;
+ const word_t Vocab::kBOS = "<s>";
+ const word_t Vocab::kEOS = "</s>";
+ const word_t Vocab::kOOVWord = "<unk>";
+
+ wordID_t Vocab::getWordID(const word_t& word) {
+ // get id and possibly add to vocab
+ if (words2ids_.find(word) == words2ids_.end())
+ if (!closed_) {
+ wordID_t id = words2ids_.size() + 1;
+ words2ids_[word] = id; // size() returns size AFTER insertion of word
+ ids2words_[id] = word; // so size() is the same here ...
+ }
+ else {
+ return Vocab::kOOVWordID;
+ }
+ wordID_t id = words2ids_[word];
+ return id;
+ }
+
+ word_t Vocab::getWord(wordID_t id) {
+ // get word string given id
+ return (ids2words_.find(id) == ids2words_.end()) ? Vocab::kOOVWord : ids2words_[id];
+ }
+
+ bool Vocab::inVocab(wordID_t id) {
+ return ids2words_.find(id) != ids2words_.end();
+ }
+
+ bool Vocab::inVocab(const word_t & word) {
+ return words2ids_.find(word) != words2ids_.end();
+ }
+
+ bool Vocab::save(const std::string & vocab_path) {
+ // save vocab as id -> word
+ FileHandler vcbout(vocab_path, std::ios::out);
+ return save(&vcbout);
+ }
+ bool Vocab::save(FileHandler* vcbout) {
+ // then each vcb entry
+ *vcbout << ids2words_.size() << "\n";
+ iterate(ids2words_, iter)
+ *vcbout << iter->second << "\t" << iter->first << "\n";
+ return true;
+ }
+
+ bool Vocab::load(const std::string & vocab_path, bool closed) {
+ FileHandler vcbin(vocab_path, std::ios::in);
+ std::cerr << "Loading vocab from " << vocab_path << std::endl;
+ return load(&vcbin, closed);
+ }
+ bool Vocab::load(FileHandler* vcbin, bool closed) {
+ // load vocab id -> word mapping
+ words2ids_.clear(); // reset mapping
+ ids2words_.clear();
+ std::string line;
+ word_t word;
+ wordID_t id;
+ assert(getline(*vcbin, line));
+ std::istringstream first(line.c_str());
+ uint32_t vcbsize(0);
+ first >> vcbsize;
+ uint32_t loadedsize = 0;
+ while (loadedsize++ < vcbsize && getline(*vcbin, line)) {
+ std::istringstream entry(line.c_str());
+ entry >> word;
+ entry >> id;
+ // may be no id (i.e. file may just be a word list)
+ if (id == 0 && word != Vocab::kOOVWord)
+ id = ids2words_.size() + 1; // assign ids sequentially starting from 1
+ assert(ids2words_.count(id) == 0 && words2ids_.count(word) == 0);
+ ids2words_[id] = word;
+ words2ids_[word] = id;
+ }
+ closed_ = closed; // once loaded fix vocab ?
+ std::cerr << "Loaded vocab with " << ids2words_.size() << " words." << std::endl;
+ return true;
+ }
+ void Vocab::printVocab() {
+ iterate(ids2words_, iter)
+ std::cerr << iter->second << "\t" << iter->first << "\n";
+ iterate(words2ids_, iter)
+ std::cerr << iter->second << "\t" << iter->first << "\n";
+ }
+
+} //end namespace
diff --git a/moses/src/DynSAInclude/vocab.h b/moses/src/DynSAInclude/vocab.h
new file mode 100644
index 000000000..02b1ffa65
--- /dev/null
+++ b/moses/src/DynSAInclude/vocab.h
@@ -0,0 +1,64 @@
+#ifndef moses_DynSAInclude_vocab_h
+#define moses_DynSAInclude_vocab_h
+
+#include <map>
+#include <string>
+#include "types.h"
+#include "file.h"
+#include "utils.h"
+namespace Moses {
+ // Vocab maps between strings and uint32 ids.
+
+ class Vocab {
+ public:
+ typedef std::map<word_t, wordID_t> Word2Id;
+ typedef std::map<wordID_t, word_t> Id2Word;
+
+ static const wordID_t kOOVWordID = 0; // out of vocabulary word id
+ static const wordID_t kBOSWordID = 1;
+ static const word_t kBOS; // beginning of sentence marker
+ static const word_t kEOS; // end of sentence marker
+ static const word_t kOOVWord; // <unk>
+ Vocab(bool sntMarkers = true):closed_(false) {
+ if(sntMarkers) {
+ getWordID(kBOS); // added in case not observed in corpus
+ getWordID(kEOS);
+ }
+ }
+ // if no file then must allow new words
+ // specify whether more words can be added via 'closed'
+ // assume that if a vocab is loaded from file then it should be closed.
+ Vocab(const std::string & vocab_path, bool closed = true) {
+ assert(load(vocab_path, closed));
+ }
+ Vocab(FileHandler* fin, bool closed = true) {
+ assert(load(fin, closed));
+ }
+ ~Vocab() {}
+ wordID_t getWordID(const word_t & word);
+ word_t getWord(wordID_t id);
+ bool inVocab(wordID_t id);
+ bool inVocab(const word_t & word);
+ uint32_t size() { return words2ids_.size(); }
+ void makeClosed() { closed_ = true; }
+ void makeOpen() { closed_ = false; }
+ bool isClosed() { return closed_; }
+ bool save(const std::string & vocab_path);
+ bool save(FileHandler* fout);
+ bool load(const std::string & vocab_path, bool closed = true);
+ bool load(FileHandler* fin, bool closed = true);
+ void printVocab();
+ Word2Id::const_iterator vocabStart() {
+ return words2ids_.begin();
+ }
+ Word2Id::const_iterator vocabEnd() {
+ return words2ids_.end();
+ }
+ private:
+ Word2Id words2ids_; // map from strings to word ids
+ Id2Word ids2words_; // map from ids to strings
+ bool closed_; // can more words be added
+ };
+}
+
+#endif
diff --git a/moses/src/DynSuffixArray.cpp b/moses/src/DynSuffixArray.cpp
new file mode 100644
index 000000000..c2f738519
--- /dev/null
+++ b/moses/src/DynSuffixArray.cpp
@@ -0,0 +1,237 @@
+#include "DynSuffixArray.h"
+#include <iostream>
+namespace Moses {
+DynSuffixArray::DynSuffixArray() {
+ SA_ = new vuint_t();
+ ISA_ = new vuint_t();
+ F_ = new vuint_t();
+ L_ = new vuint_t();
+ std::cerr << "DYNAMIC SUFFIX ARRAY CLASS INSTANTIATED" << std::endl;
+}
+DynSuffixArray::~DynSuffixArray() {
+ delete SA_;
+ delete ISA_;
+ delete F_;
+ delete L_;
+}
+DynSuffixArray::DynSuffixArray(vuint_t* crp) {
+ // make native int array and pass to SA builder
+ corpus_ = crp;
+ int size = corpus_->size();
+ int* tmpArr = new int[size];
+ for(int i=0 ; i < size; ++i) tmpArr[i] = i;
+ qsort(tmpArr, 0, size-1);
+ SA_ = new vuint_t(tmpArr, tmpArr + size);
+ //std::cerr << "printing SA " << std::endl;
+ //for(int i=0; i < size; ++i) std::cerr << SA_->at(i) << std::endl;
+ delete[] tmpArr;
+ std::cerr << "DYNAMIC SUFFIX ARRAY CLASS INSTANTIATED WITH SIZE " << size << std::endl;
+ buildAuxArrays();
+ //printAuxArrays();
+}
+void DynSuffixArray::buildAuxArrays() {
+ int size = SA_->size();
+ ISA_ = new vuint_t(size);
+ F_ = new vuint_t(size);
+ L_ = new vuint_t(size);
+ for(int i=0; i < size; ++i) {
+ ISA_->at(SA_->at(i)) = i;
+ //(*ISA_)[(*SA_)[i]] = i;
+ (*F_)[i] = (*corpus_)[SA_->at(i)];
+ (*L_)[i] = (*corpus_)[(SA_->at(i) == 0 ? size-1 : SA_->at(i)-1)];
+ }
+}
+int DynSuffixArray::rank(unsigned word, unsigned idx) {
+/* use Gerlach's code to make rank faster */
+ // the number of word in L[0..i]
+ int r(0);
+ for(unsigned i=0; i < idx; ++i)
+ if(L_->at(i) == word) ++r;
+ return r;
+}
+/* count function should be implemented
+ * with binary search over suffix array!! */
+int DynSuffixArray::F_firstIdx(unsigned word) {
+ // return index of first row where word is found in F_
+ int low = std::lower_bound(F_->begin(), F_->end(), word) - F_->begin();
+ if(F_->at(low) == word) return low;
+ else return -1;
+}
+/* uses rank() and c() to obtain the LF function */
+int DynSuffixArray::LF(unsigned L_idx) {
+ int fIdx(-1);
+ unsigned word = L_->at(L_idx);
+ if((fIdx = F_firstIdx(word)) != -1)
+ return fIdx + rank(word, L_idx);
+}
+void DynSuffixArray::insertFactor(vuint_t* newSent, unsigned newIndex) {
+ // for sentences
+ //stages 1, 2, 4 stay same from 1char case
+ //(use last word of new text in step 2 and save Ltmp until last insert?)
+ //stage 3...all words of new sentence are inserted backwards
+ // stage 2: k=ISA[newIndex], tmp= L[k], L[k] = newChar
+ assert(newIndex <= SA_->size());
+ int k(-1), kprime(-1);
+ k = (newIndex < SA_->size() ? ISA_->at(newIndex) : ISA_->at(0)); // k is now index of the cycle that starts at newindex
+ int true_pos = LF(k); // track cycle shift (newIndex - 1)
+ int Ltmp = L_->at(k);
+ L_->at(k) = (*newSent)[newSent->size()-1]; // cycle k now ends with correct word
+ for(int j = newSent->size()-1; j > -1; --j) {
+ kprime = LF(k); // find cycle that starts with (newindex - 1)
+ //kprime += ((L_[k] == Ltmp) && (k > isa[k]) ? 1 : 0); // yada yada
+ // only terminal char can be 0 so add new vocab at end
+ kprime = (kprime > 0 ? kprime : SA_->size());
+ true_pos += (kprime <= true_pos ? 1 : 0); // track changes
+ // insert everything
+ F_->insert(F_->begin() + kprime, (*newSent)[j]);
+ int theLWord = (j == 0 ? Ltmp : (*newSent)[j-1]);
+ L_->insert(L_->begin() + kprime, theLWord);
+ piterate(SA_, itr)
+ if(*itr >= newIndex) ++(*itr);
+ SA_->insert(SA_->begin() + kprime, newIndex);
+ piterate(ISA_, itr)
+ if(*itr >= kprime) ++(*itr);
+ ISA_->insert(ISA_->begin() + newIndex, kprime);
+ k = kprime;
+ }
+ // Begin stage 4
+ reorder(true_pos, LF(kprime)); // actual position vs computed position of cycle (newIndex-1)
+}
+void DynSuffixArray::reorder(unsigned j, unsigned jprime) {
+ printf("j=%d\tj'=%d\n", j, jprime);
+ while(j != jprime) {
+ printf("j=%d\tj'=%d\n", j, jprime);
+ int tmp, isaIdx(-1);
+ int new_j = LF(j);
+ // for SA, L, and F, the element at pos j is moved to j'
+ tmp = L_->at(j); // L
+ L_->at(j) = L_->at(jprime);
+ L_->at(jprime) = tmp;
+ tmp = SA_->at(j); // SA
+ SA_->at(j) = SA_->at(jprime);
+ SA_->at(jprime) = tmp;
+ // all ISA values between (j...j'] decremented
+ for(int i = 0; i < ISA_->size(); ++i) {
+ if((ISA_->at(i) == j) && (isaIdx == -1))
+ isaIdx = i; // store index of ISA[i] = j
+ if((ISA_->at(i) > j) && (ISA_->at(i) <= jprime)) --(*ISA_)[i];
+ }
+ // replace j with j' in ISA
+ //isa[isaIdx] = jprime;
+ ISA_->at(isaIdx) = jprime;
+ j = new_j;
+ jprime = LF(jprime);
+ }
+}
+void DynSuffixArray::deleteFactor(unsigned index, unsigned num2del) {
+ int ltmp = L_->at(ISA_->at(index));
+ int true_pos = LF(ISA_->at(index)); // track cycle shift (newIndex - 1)
+ for(int q = 0; q < num2del; ++q) {
+ int row = ISA_->at(index); // gives the position of index in SA and F_
+ std::cerr << "row = " << row << std::endl;
+ std::cerr << "SA[r]/index = " << SA_->at(row) << "/" << index << std::endl;
+ true_pos -= (row <= true_pos ? 1 : 0); // track changes
+ L_->erase(L_->begin() + row);
+ F_->erase(F_->begin() + row);
+ ISA_->erase(ISA_->begin() + index); // order is important
+ piterate(ISA_, itr)
+ if(*itr > row) --(*itr);
+ SA_->erase(SA_->begin() + row);
+ piterate(SA_, itr)
+ if(*itr > index) --(*itr);
+ }
+ L_->at(ISA_->at(index))= ltmp;
+ reorder(LF(ISA_->at(index)), true_pos);
+ printAuxArrays();
+}
+void DynSuffixArray::substituteFactor(vuint_t* newSents, unsigned newIndex) {
+ std::cerr << "NEEDS TO IMPELEMNT SUBSITITUTE FACTOR\n";
+ return;
+}
+bool DynSuffixArray::getCorpusIndex(const vuint_t* phrase, vuint_t* indices) {
+ pair<vuint_t::iterator,vuint_t::iterator> bounds;
+ indices->clear();
+ int phrasesize = phrase->size();
+ // find lower and upper bounds on phrase[0]
+ bounds = std::equal_range(F_->begin(), F_->end(), phrase->at(0));
+ // bounds holds first and (last + 1) index of phrase[0] in SA_
+ int lwrBnd = int(bounds.first - F_->begin());
+ int uprBnd = int(bounds.second - F_->begin());
+ if(uprBnd - lwrBnd == 0) return false; // not found
+ if(phrasesize == 1) {
+ for(int i=lwrBnd; i < uprBnd; ++i) {
+ indices->push_back(SA_->at(i));
+ }
+ return (indices->size() > 0);
+ }
+ //find longer phrases if they exist
+ for(int i = lwrBnd; i < uprBnd; ++i) {
+ int crpIdx = SA_->at(i);
+ if((crpIdx + phrasesize) >= corpus_->size()) continue; // past end of corpus
+ for(int pos = 1; pos < phrasesize; ++pos) { // for all following words
+ if(corpus_->at(crpIdx + pos) != phrase->at(pos)) { // if word doesn't match
+ if(indices->size() > 0) i = uprBnd; // past the phrases since SA is ordered
+ break;
+ }
+ else if(pos == phrasesize-1) { // found phrase
+ indices->push_back(crpIdx + pos); // store rigthmost index of phrase
+ }
+ }
+ }
+ //cerr << "Total count of phrase = " << indices->size() << endl;
+ return (indices->size() > 0);
+}
+void DynSuffixArray::save(FILE* fout) {
+ fWriteVector(fout, *SA_);
+}
+void DynSuffixArray::load(FILE* fin) {
+ fReadVector(fin, *SA_);
+}
+int DynSuffixArray::compare(int pos1, int pos2, int max) {
+ for (int i=0; i < max; ++i) {
+ if((pos1 + i < corpus_->size()) && (pos2 + i >= corpus_->size()))
+ return 1;
+ if((pos2 + i < corpus_->size()) && (pos1 + i >= corpus_->size()))
+ return -1;
+
+ int diff = corpus_->at(pos1+i) - corpus_->at(pos2+i);
+ if(diff != 0) return diff;
+ }
+ return 0;
+}
+void DynSuffixArray::qsort(int* array, int begin, int end) {
+ if(end > begin)
+ {
+ int index;
+ {
+ index = begin + (rand() % (end - begin + 1));
+ int pivot = array[index];
+ {
+ int tmp = array[index];
+ array[index] = array[end];
+ array[end] = tmp;
+ }
+ for(int i=index=begin; i < end; ++i) {
+ if (compare(array[i], pivot, 20) <= 0) {
+ {
+ int tmp = array[index];
+ array[index] = array[i];
+ array[i] = tmp;
+ index++;
+ }
+ }
+ }
+ {
+ int tmp = array[index];
+ array[index] = array[end];
+ array[end] = tmp;
+ }
+ }
+ qsort(array, begin, index - 1);
+ qsort(array, index + 1, end);
+ }
+}
+
+
+
+} // end namespace
diff --git a/moses/src/DynSuffixArray.h b/moses/src/DynSuffixArray.h
new file mode 100644
index 000000000..08bf292cb
--- /dev/null
+++ b/moses/src/DynSuffixArray.h
@@ -0,0 +1,50 @@
+#ifndef moses_DynSuffixArray_h
+#define moses_DynSuffixArray_h
+
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <utility>
+#include "Util.h"
+#include "File.h"
+#include "DynSAInclude/types.h"
+
+namespace Moses {
+using std::vector;
+using std::pair;
+typedef std::vector<unsigned> vuint_t;
+
+class DynSuffixArray {
+public:
+ DynSuffixArray();
+ DynSuffixArray(vuint_t*);
+ ~DynSuffixArray();
+ bool getCorpusIndex(const vuint_t*, vuint_t*);
+ void load(FILE*);
+ void save(FILE*);
+private:
+ vuint_t* SA_;
+ vuint_t* ISA_;
+ vuint_t* F_;
+ vuint_t* L_;
+ vuint_t* corpus_;
+ void buildAuxArrays();
+ void qsort(int* array, int begin, int end);
+ int compare(int, int, int);
+ void reorder(unsigned, unsigned);
+ void insertFactor(vuint_t*, unsigned);
+ void deleteFactor(unsigned, unsigned);
+ void substituteFactor(vuint_t*, unsigned);
+ int LF(unsigned);
+ int rank(unsigned, unsigned);
+ int F_firstIdx(unsigned);
+ void printAuxArrays() {
+ std::cerr << "SA\tISA\tF_\tL_\n";
+ for(int i=0; i < SA_->size(); ++i)
+ std::cerr << SA_->at(i) << "\t" << ISA_->at(i) << "\t" << F_->at(i) << "\t" << L_->at(i) << std::endl;
+ }
+};
+
+} //end namespace
+
+#endif
diff --git a/moses/src/FFState.cpp b/moses/src/FFState.cpp
new file mode 100644
index 000000000..7e98c25fb
--- /dev/null
+++ b/moses/src/FFState.cpp
@@ -0,0 +1,8 @@
+#include "FFState.h"
+
+namespace Moses {
+
+FFState::~FFState() {}
+
+}
+
diff --git a/moses/src/FFState.h b/moses/src/FFState.h
new file mode 100644
index 000000000..ab0a171fe
--- /dev/null
+++ b/moses/src/FFState.h
@@ -0,0 +1,13 @@
+#ifndef moses_FFState_h
+#define moses_FFState_h
+
+namespace Moses {
+
+class FFState {
+ public:
+ virtual ~FFState();
+ virtual int Compare(const FFState& other) const = 0;
+};
+
+}
+#endif
diff --git a/moses/src/Factor.cpp b/moses/src/Factor.cpp
new file mode 100644
index 000000000..57fd3304e
--- /dev/null
+++ b/moses/src/Factor.cpp
@@ -0,0 +1,53 @@
+// $Id: Factor.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "Factor.h"
+
+using namespace std;
+
+namespace Moses
+{
+Factor::Factor(FactorDirection direction, FactorType factorType, const std::string *factorString, size_t id)
+://m_direction(direction)
+//,m_factorType(factorType)
+m_ptrString(factorString)
+,m_id(id)
+{}
+
+Factor::Factor(FactorDirection direction, FactorType factorType, const std::string *factorString)
+//:m_direction(direction)
+//,m_factorType(factorType)
+:m_ptrString(factorString)
+,m_id(NOT_FOUND)
+{}
+
+TO_STRING_BODY(Factor)
+
+// friend
+ostream& operator<<(ostream& out, const Factor& factor)
+{
+ out << factor.GetString();
+ return out;
+}
+
+}
+
+
diff --git a/moses/src/Factor.h b/moses/src/Factor.h
new file mode 100644
index 000000000..9215ecd8b
--- /dev/null
+++ b/moses/src/Factor.h
@@ -0,0 +1,147 @@
+// $Id: Factor.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_Factor_h
+#define moses_Factor_h
+
+#include <sstream>
+#include <iostream>
+#include <list>
+#include <vector>
+#include <map>
+#include <string>
+#include "TypeDef.h"
+#include "Util.h"
+#include "hash.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+
+/** Represents a factor (word, POS, etc) on the E or F side
+ *
+ * A Factor object is a tuple of direction (Input or Output,
+ * corresponding to French or English), a type (surface form,
+ * POS, stem, etc), and the value of the factor.
+ *
+ * @TODO I find this design problematic- essentially, a factor should
+ * just be a value type and the factor type and "direction"
+ * should be the keys in a larger identification system that
+ * find instances of specific factors.
+ *
+ */
+class Factor
+{
+ friend std::ostream& operator<<(std::ostream&, const Factor&);
+
+ // only these classes are allowed to instantiate this class
+ friend class FactorCollection;
+
+protected:
+
+ //FactorDirection m_direction;
+ //FactorType m_factorType;
+ const std::string *m_ptrString;
+ const size_t m_id;
+
+ //! protected constructor. only friend class, FactorCollection, is allowed to create Factor objects
+ Factor(FactorDirection direction, FactorType factorType, const std::string *factorString, size_t id);
+ //! no id set. do not used to create new factors, only used for seeing if factor exists
+ Factor(FactorDirection direction, FactorType factorType, const std::string *factorString);
+
+public:
+ //! returns whether this factor is part of the source ('Input') or target ('Output') language
+ //inline FactorDirection GetFactorDirection() const
+ //{
+ // return m_direction;
+ //}
+ //! index, FactorType. For example, 0=surface, 1=POS. The actual mapping is user defined
+ //inline FactorType GetFactorType() const
+ //{
+ // return m_factorType;
+ //}
+ //! original string representation of the factor
+ inline const std::string &GetString() const
+ {
+ return *m_ptrString;
+ }
+ //! contiguous ID
+ inline size_t GetId() const
+ {
+ return m_id;
+ }
+
+ /*
+ //! Alternative comparison between factors. Not yet used
+ inline unsigned int GetHash() const
+ {
+ unsigned int h=quick_hash((const char*)&m_direction, sizeof(FactorDirection), 0xc7e7f2fd);
+ h=quick_hash((const char*)&m_factorType, sizeof(FactorType), h);
+ h=quick_hash((const char*)&m_ptrString, sizeof(const std::string *), h);
+ return h;
+ }
+ */
+
+ /** transitive comparison between 2 factors.
+ * -1 = less than
+ * +1 = more than
+ * 0 = same
+ * Used by operator< & operator==, as well as other classes
+ */
+ inline int Compare(const Factor &compare) const
+ {
+ if (m_ptrString < compare.m_ptrString)
+ return -1;
+ if (m_ptrString > compare.m_ptrString)
+ return 1;
+/*
+ if (m_direction < compare.m_direction)
+ return -1;
+ if (m_direction > compare.m_direction)
+ return 1;
+
+ if (m_factorType < compare.m_factorType)
+ return -1;
+ if (m_factorType > compare.m_factorType)
+ return 1;
+*/
+ return 0;
+ }
+ //! transitive comparison used for adding objects into FactorCollection
+ inline bool operator<(const Factor &compare) const
+ {
+ return Compare(compare) < 0;
+ }
+
+ // quick equality comparison. Not used
+ inline bool operator==(const Factor &compare) const
+ {
+ return this == &compare;
+ }
+
+ TO_STRING();
+
+};
+
+
+}
+#endif
diff --git a/moses/src/FactorCollection.cpp b/moses/src/FactorCollection.cpp
new file mode 100644
index 000000000..7cb1cb2e1
--- /dev/null
+++ b/moses/src/FactorCollection.cpp
@@ -0,0 +1,117 @@
+// $Id: FactorCollection.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+#include "FactorCollection.h"
+#include "LanguageModel.h"
+#include "Util.h"
+
+using namespace std;
+
+namespace Moses
+{
+FactorCollection FactorCollection::s_instance;
+
+void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorType, const string &filePath)
+{
+ ifstream inFile(filePath.c_str());
+
+ string line;
+#ifdef WITH_THREADS
+ boost::upgrade_lock<boost::shared_mutex> lock(m_accessLock);
+ boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(lock);
+#endif
+ while( !getline(inFile, line, '\n').eof())
+ {
+ vector<string> token = Tokenize( line );
+ if (token.size() < 2)
+ {
+ continue;
+ }
+ // looks like good line
+ AddFactor(direction, factorType, token[1]);
+ }
+}
+
+bool FactorCollection::Exists(FactorDirection direction, FactorType factorType, const string &factorString)
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> lock(m_accessLock);
+#endif
+ // find string id
+ const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
+
+ FactorSet::const_iterator iterFactor;
+ Factor search(direction, factorType, ptrString); // id not used for searching
+
+ iterFactor = m_collection.find(search);
+ return iterFactor != m_collection.end();
+}
+
+const Factor *FactorCollection::AddFactor(FactorDirection direction
+ , FactorType factorType
+ , const string &factorString)
+{
+#ifdef WITH_THREADS
+ boost::upgrade_lock<boost::shared_mutex> lock(m_accessLock);
+ boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(lock);
+#endif
+ // find string id
+ const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
+ pair<FactorSet::iterator, bool> ret = m_collection.insert( Factor(direction, factorType, ptrString, m_factorId) );
+ if (ret.second)
+ ++m_factorId; // new factor, make sure next new factor has diffrernt id
+
+ const Factor *factor = &(*ret.first);
+ return factor;
+}
+
+FactorCollection::~FactorCollection()
+{
+ //FactorSet::iterator iter;
+ //for (iter = m_collection.begin() ; iter != m_collection.end() ; iter++)
+ //{
+ // delete (*iter);
+ //}
+}
+
+TO_STRING_BODY(FactorCollection);
+
+// friend
+ostream& operator<<(ostream& out, const FactorCollection& factorCollection)
+{
+ FactorSet::const_iterator iterFactor;
+
+ for (iterFactor = factorCollection.m_collection.begin() ; iterFactor != factorCollection.m_collection.end() ; ++iterFactor)
+ {
+ const Factor &factor = *iterFactor;
+ out << factor;
+ }
+
+ return out;
+}
+
+}
+
+
diff --git a/moses/src/FactorCollection.h b/moses/src/FactorCollection.h
new file mode 100644
index 000000000..9d1f3ba21
--- /dev/null
+++ b/moses/src/FactorCollection.h
@@ -0,0 +1,91 @@
+// $Id: FactorCollection.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_FactorCollection_h
+#define moses_FactorCollection_h
+
+#include <set>
+#include <string>
+
+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#endif
+
+#include "Factor.h"
+
+namespace Moses
+{
+
+class LanguageModel;
+
+typedef std::set<Factor> FactorSet;
+typedef std::set<std::string> StringSet;
+
+/** collection of factors
+ *
+ * All Factors in moses are accessed and created by a FactorCollection.
+ * By enforcing this strict creation processes (ie, forbidding factors
+ * from being created on the stack, etc), their memory addresses can
+ * be used as keys to uniquely identify them.
+ * Only 1 FactorCollection object should be created.
+ */
+class FactorCollection
+{
+ friend std::ostream& operator<<(std::ostream&, const FactorCollection&);
+
+protected:
+ static FactorCollection s_instance;
+#ifdef WITH_THREADS
+ //reader-writer lock
+ boost::shared_mutex m_accessLock;
+#endif
+
+ size_t m_factorId; /**< unique, contiguous ids, starting from 0, for each factor */
+ FactorSet m_collection; /**< collection of all factors */
+ StringSet m_factorStringCollection; /**< collection of unique string used by factors */
+
+ //! constructor. only the 1 static variable can be created
+ FactorCollection()
+ :m_factorId(0)
+ {}
+
+public:
+ static FactorCollection& Instance() { return s_instance; }
+
+ //! Destructor
+ ~FactorCollection();
+
+ //! Test to see whether a factor exists
+ bool Exists(FactorDirection direction, FactorType factorType, const std::string &factorString);
+ /** returns a factor with the same direction, factorType and factorString.
+ * If a factor already exist in the collection, return the existing factor, if not create a new 1
+ */
+ const Factor *AddFactor(FactorDirection direction, FactorType factorType, const std::string &factorString);
+ //! Load list of factors. Deprecated
+ void LoadVocab(FactorDirection direction, FactorType factorType, const std::string &filePath);
+
+ TO_STRING();
+
+};
+
+
+}
+#endif
diff --git a/moses/src/FactorTypeSet.cpp b/moses/src/FactorTypeSet.cpp
new file mode 100644
index 000000000..0be3a76a9
--- /dev/null
+++ b/moses/src/FactorTypeSet.cpp
@@ -0,0 +1,59 @@
+// $Id: FactorTypeSet.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "FactorTypeSet.h"
+
+using namespace std;
+
+namespace Moses
+{
+FactorMask::FactorMask(const vector<FactorType> &factors)
+{
+ vector<FactorType>::const_iterator iter;
+ for (iter = factors.begin() ; iter != factors.end() ; ++iter)
+ {
+ this->set(*iter);
+ }
+}
+
+TO_STRING_BODY(FactorMask);
+
+// friend
+std::ostream& operator<<(std::ostream& out, const FactorMask& fm)
+{
+ out << "FactorMask<";
+ bool first = true;
+ for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+ {
+ if (fm[currFactor])
+ {
+ if (first) { first = false; } else { out << ","; }
+ out << currFactor;
+ }
+ }
+ out << ">";
+
+ return out;
+}
+
+}
+
+
diff --git a/moses/src/FactorTypeSet.h b/moses/src/FactorTypeSet.h
new file mode 100644
index 000000000..1911d3a68
--- /dev/null
+++ b/moses/src/FactorTypeSet.h
@@ -0,0 +1,53 @@
+// $Id: FactorTypeSet.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_FactorTypeSet_h
+#define moses_FactorTypeSet_h
+
+#include <iostream>
+#include <bitset>
+#include <vector>
+#include "TypeDef.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+/** set of unique FactorTypes. Used to store what factor types are used in phrase tables etc
+*/
+class FactorMask : public std::bitset<MAX_NUM_FACTORS>
+{
+ friend std::ostream& operator<<(std::ostream&, const FactorMask&);
+
+public:
+ //! construct object from list of FactorType.
+ explicit FactorMask(const std::vector<FactorType> &factors);
+ //! default constructor
+ inline FactorMask() {}
+ //! copy constructor
+ FactorMask(const std::bitset<MAX_NUM_FACTORS>& rhs) : std::bitset<MAX_NUM_FACTORS>(rhs) { }
+
+
+ TO_STRING();
+};
+
+}
+#endif
diff --git a/moses/src/FeatureFunction.cpp b/moses/src/FeatureFunction.cpp
new file mode 100644
index 000000000..5acfb9edf
--- /dev/null
+++ b/moses/src/FeatureFunction.cpp
@@ -0,0 +1,22 @@
+#include "FeatureFunction.h"
+
+#include <cassert>
+
+namespace Moses {
+
+FeatureFunction::~FeatureFunction() {}
+
+bool StatelessFeatureFunction::IsStateless() const { return true; }
+bool StatelessFeatureFunction::ComputeValueInTranslationOption() const {
+ return false;
+}
+void StatelessFeatureFunction::Evaluate(
+ const TargetPhrase& cur_hypo,
+ ScoreComponentCollection* accumulator) const {
+ assert(!"Please implement Evaluate or set ComputeValueInTranslationOption to true");
+}
+
+bool StatefulFeatureFunction::IsStateless() const { return false; }
+
+}
+
diff --git a/moses/src/FeatureFunction.h b/moses/src/FeatureFunction.h
new file mode 100644
index 000000000..927ae7106
--- /dev/null
+++ b/moses/src/FeatureFunction.h
@@ -0,0 +1,64 @@
+#ifndef moses_FeatureFunction_h
+#define moses_FeatureFunction_h
+
+#include <vector>
+
+#include "ScoreProducer.h"
+
+namespace Moses {
+
+class TargetPhrase;
+class Hypothesis;
+class FFState;
+class ScoreComponentCollection;
+
+class FeatureFunction: public ScoreProducer {
+
+public:
+ virtual bool IsStateless() const = 0;
+ virtual ~FeatureFunction();
+
+};
+
+class StatelessFeatureFunction: public FeatureFunction {
+
+public:
+ //! Evaluate for stateless feature functions. Implement this.
+ virtual void Evaluate(
+ const TargetPhrase& cur_hypo,
+ ScoreComponentCollection* accumulator) const;
+
+ // If true, this value is expected to be included in the
+ // ScoreBreakdown in the TranslationOption once it has been
+ // constructed.
+ // Default: true
+ virtual bool ComputeValueInTranslationOption() const;
+
+ bool IsStateless() const;
+};
+
+class StatefulFeatureFunction: public FeatureFunction {
+
+public:
+
+ /**
+ * \brief This interface should be implemented.
+ * Notes: When evaluating the value of this feature function, you should avoid
+ * calling hypo.GetPrevHypo(). If you need something from the "previous"
+ * hypothesis, you should store it in an FFState object which will be passed
+ * in as prev_state. If you don't do this, you will get in trouble.
+ */
+ virtual FFState* Evaluate(
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const = 0;
+
+ //! return the state associated with the empty hypothesis
+ virtual const FFState* EmptyHypothesisState() const = 0;
+
+ bool IsStateless() const;
+};
+
+}
+
+#endif
diff --git a/moses/src/File.cpp b/moses/src/File.cpp
new file mode 100644
index 000000000..624416d10
--- /dev/null
+++ b/moses/src/File.cpp
@@ -0,0 +1,4 @@
+
+#include "File.h"
+
+
diff --git a/moses/src/File.h b/moses/src/File.h
new file mode 100644
index 000000000..14ea54ee5
--- /dev/null
+++ b/moses/src/File.h
@@ -0,0 +1,122 @@
+// $Id: File.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/* ---------------------------------------------------------------- */
+/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
+/* Richard Zens */
+/* ---------------------------------------------------------------- */
+#ifndef moses_File_h
+#define moses_File_h
+
+#include <cstdio>
+#include <iostream>
+#include <vector>
+#include <cassert>
+#include "UserMessage.h"
+#include "TypeDef.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+#ifdef WIN32
+#define OFF_T __int64
+#define FTELLO(file) _ftelli64(file)
+#define FSEEKO(file, offset, origin) _fseeki64(file, offset, origin)
+
+#else
+#define OFF_T off_t
+#define FTELLO(f) ftello(f)
+#define FSEEKO(file, offset, origin) fseeko(file, offset, origin)
+#endif
+
+static const OFF_T InvalidOffT=-1;
+
+// WARNING:
+// these functions work only for bitwise read/write-able types
+
+template<typename T> inline size_t fWrite(FILE* f,const T& t) {
+ if(fwrite(&t,sizeof(t),1,f)!=1) {TRACE_ERR("ERROR:: fwrite!\n");abort();}
+ return sizeof(t);
+}
+
+template<typename T> inline void fRead(FILE* f,T& t) {
+ if(fread(&t,sizeof(t),1,f)!=1) {TRACE_ERR("ERROR: fread!\n");abort();}
+}
+
+template<typename T> inline size_t fWrite(FILE* f,const T* b,const T* e) {
+ UINT32 s=std::distance(b,e);size_t rv=fWrite(f,s);
+ if(fwrite(b,sizeof(T),s,f)!=s) {TRACE_ERR("ERROR: fwrite!\n");abort();}
+ return rv+sizeof(T)*s;
+}
+
+template<typename T> inline size_t fWrite(FILE* f,const T b,const T e) {
+ UINT32 s=std::distance(b,e);size_t rv=fWrite(f,s);
+ if(fwrite(&(*b),sizeof(T),s,f)!=s) {TRACE_ERR("ERROR: fwrite!\n");abort();}
+ return rv+sizeof(T)*s;
+}
+
+template<typename C> inline size_t fWriteVector(FILE* f,const C& v) {
+ UINT32 s=v.size();
+ size_t rv=fWrite(f,s);
+ if(fwrite(&v[0],sizeof(typename C::value_type),s,f)!=s) {TRACE_ERR("ERROR: fwrite!\n");abort();}
+ return rv+sizeof(typename C::value_type)*s;
+}
+
+template<typename C> inline void fReadVector(FILE* f, C& v) {
+ UINT32 s;fRead(f,s);
+ v.resize(s);
+ size_t r=fread(&(*v.begin()),sizeof(typename C::value_type),s,f);
+ if(r!=s) {TRACE_ERR("ERROR: freadVec! "<<r<<" "<<s<<"\n");abort();}
+}
+
+inline size_t fWriteString(FILE* f,const char* e, UINT32 s) {
+ size_t rv=fWrite(f,s);
+ if(fwrite(e,sizeof(char),s,f)!=s) {TRACE_ERR("ERROR:: fwrite!\n");abort();}
+ return rv+sizeof(char)*s;
+}
+
+inline void fReadString(FILE* f,std::string& e) {
+ UINT32 s;fRead(f,s);
+ char* a=new char[s+1];
+ if(fread(a,sizeof(char),s,f)!=s) {TRACE_ERR("ERROR: fread!\n");abort();}
+ a[s]='\0';
+ e.assign(a);
+}
+
+inline size_t fWriteStringVector(FILE* f,const std::vector<std::string>& v) {
+ UINT32 s=v.size();
+ size_t totrv=fWrite(f,s);
+ for (size_t i=0;i<s;i++){ totrv+=fWriteString(f,v.at(i).c_str(),v.at(i).size()); }
+ return totrv;
+}
+
+inline void fReadStringVector(FILE* f, std::vector<std::string>& v) {
+ UINT32 s;fRead(f,s);v.resize(s);
+
+ for (size_t i=0;i<s;i++){ fReadString(f,v.at(i)); }
+}
+
+inline OFF_T fTell(FILE* f) {return FTELLO(f);}
+
+inline void fSeek(FILE* f,OFF_T o) {
+ if(FSEEKO(f,o,SEEK_SET)<0) {
+ TRACE_ERR("ERROR: could not fseeko position "<<o<<"\n");
+ if(o==InvalidOffT) TRACE_ERR("You tried to seek for 'InvalidOffT'!\n");
+ abort();
+ }
+}
+
+inline FILE* fOpen(const char* fn,const char* m) {
+ if(FILE* f=fopen(fn,m))
+ return f;
+ else {
+ UserMessage::Add(std::string("ERROR: could not open file ") + fn + " with mode " + m + "\n");
+ assert(false);
+ return NULL;
+ }
+}
+inline void fClose(FILE* f) {fclose(f);} // for consistent function names only
+
+}
+
+#endif
diff --git a/moses/src/FilePtr.h b/moses/src/FilePtr.h
new file mode 100644
index 000000000..8876e2449
--- /dev/null
+++ b/moses/src/FilePtr.h
@@ -0,0 +1,55 @@
+// $Id: FilePtr.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/* ---------------------------------------------------------------- */
+/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
+/* Richard Zens */
+/* ---------------------------------------------------------------- */
+
+#ifndef moses_FilePtr_h
+#define moses_FilePtr_h
+
+#include "File.h"
+
+namespace Moses
+{
+
+// smart pointer for on-demand loading from file
+// requirement: T has a constructor T(FILE*)
+
+template<typename T> class FilePtr {
+public:
+ typedef T* Ptr;
+private:
+ FILE* f;
+ OFF_T pos;
+ mutable Ptr t;
+public:
+ FilePtr(FILE* f_=0,OFF_T p=0) : f(f_),pos(p),t(0) {}
+ ~FilePtr() {}
+
+ void set(FILE* f_,OFF_T p) {f=f_;pos=p;}
+ void free() {delete t; t=0;}
+
+ T& operator* () {load();return *t;}
+ Ptr operator->() {load();return t;}
+ operator Ptr () {load();return t;}
+
+ const T& operator* () const {load();return *t;}
+ Ptr operator->() const {load();return t;}
+ operator Ptr () const {load();return t;}
+
+ // direct access to pointer, use with care!
+ Ptr getPtr() {return t;}
+ Ptr getPtr() const {return t;}
+
+ operator bool() const {return (f && pos!=InvalidOffT);}
+
+ void load() const {
+ if(t) return;
+ if(f && pos!=InvalidOffT) {fSeek(f,pos); t=new T(f);}
+ }
+};
+
+}
+
+#endif
diff --git a/moses/src/FloydWarshall.cpp b/moses/src/FloydWarshall.cpp
new file mode 100644
index 000000000..b1b8c0bce
--- /dev/null
+++ b/moses/src/FloydWarshall.cpp
@@ -0,0 +1,34 @@
+#include <cassert>
+#include <climits>
+#include <vector>
+
+#define MAX_DIST (INT_MAX / 2)
+
+//#include "FloydWarshall.h"
+
+// All-pairs shortest path algorithm
+void floyd_warshall(const std::vector<std::vector<bool> >& edges, std::vector<std::vector<int> >& dist)
+{
+ assert(edges.size() == edges.front().size());
+ dist.clear();
+ dist.resize(edges.size(), std::vector<int>(edges.size(), 0));
+
+ size_t num_edges = edges.size();
+
+ for (size_t i=0; i<num_edges; ++i) {
+ for (size_t j=0; j<num_edges; ++j) {
+ if (edges[i][j])
+ dist[i][j] = 1;
+ else
+ dist[i][j] = MAX_DIST;
+ if (i == j) dist[i][j] = MAX_DIST;
+ }
+ }
+
+ for (size_t k=0; k<num_edges; ++k)
+ for (size_t i=0; i<num_edges; ++i)
+ for (size_t j=0; j<num_edges; ++j)
+ if (dist[i][j] > (dist[i][k] + dist[k][j]))
+ dist[i][j] = dist[i][k] + dist[k][j];
+}
+
diff --git a/moses/src/FloydWarshall.h b/moses/src/FloydWarshall.h
new file mode 100644
index 000000000..96845caf6
--- /dev/null
+++ b/moses/src/FloydWarshall.h
@@ -0,0 +1,12 @@
+#ifndef moses_FloydWarshall_h
+#define moses_FloydWarshall_h
+
+#include <vector>
+
+/**
+ * Floyd-Warshall all-pairs shortest path algorithm
+ * See CLR (1990). Introduction to Algorithms, p. 558-565
+ */
+void floyd_warshall(const std::vector<std::vector<bool> >& edges, std::vector<std::vector<int> >& distances);
+
+#endif
diff --git a/moses/src/GenerationDictionary.cpp b/moses/src/GenerationDictionary.cpp
new file mode 100644
index 000000000..f29a18e81
--- /dev/null
+++ b/moses/src/GenerationDictionary.cpp
@@ -0,0 +1,164 @@
+// $Id: GenerationDictionary.cpp 2087 2009-02-06 15:43:06Z redpony $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <fstream>
+#include <string>
+#include "GenerationDictionary.h"
+#include "FactorCollection.h"
+#include "Word.h"
+#include "Util.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+#include "UserMessage.h"
+
+using namespace std;
+
+namespace Moses
+{
+GenerationDictionary::GenerationDictionary(size_t numFeatures, ScoreIndexManager &scoreIndexManager)
+ : Dictionary(numFeatures)
+{
+ scoreIndexManager.AddScoreProducer(this);
+}
+
+bool GenerationDictionary::Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , FactorDirection direction)
+{
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ const size_t numFeatureValuesInConfig = this->GetNumScoreComponents();
+
+ //factors
+ m_inputFactors = FactorMask(input);
+ m_outputFactors = FactorMask(output);
+ VERBOSE(2,"GenerationDictionary: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
+
+ // data from file
+ InputFileStream inFile(filePath);
+ if (!inFile.good()) {
+ UserMessage::Add(string("Couldn't read ") + filePath);
+ return false;
+ }
+
+ m_filePath = filePath;
+ string line;
+ size_t lineNum = 0;
+ while(getline(inFile, line))
+ {
+ ++lineNum;
+ vector<string> token = Tokenize( line );
+
+ // add each line in generation file into class
+ Word *inputWord = new Word(); // deleted in destructor
+ Word outputWord;
+
+ // create word with certain factors filled out
+
+ // inputs
+ vector<string> factorString = Tokenize( token[0], "|" );
+ for (size_t i = 0 ; i < input.size() ; i++)
+ {
+ FactorType factorType = input[i];
+ const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
+ inputWord->SetFactor(factorType, factor);
+ }
+
+ factorString = Tokenize( token[1], "|" );
+ for (size_t i = 0 ; i < output.size() ; i++)
+ {
+ FactorType factorType = output[i];
+
+ const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
+ outputWord.SetFactor(factorType, factor);
+ }
+
+ size_t numFeaturesInFile = token.size() - 2;
+ if (numFeaturesInFile < numFeatureValuesInConfig) {
+ stringstream strme;
+ strme << filePath << ":" << lineNum << ": expected " << numFeatureValuesInConfig
+ << " feature values, but found " << numFeaturesInFile << std::endl;
+ UserMessage::Add(strme.str());
+ return false;
+ }
+ std::vector<float> scores(numFeatureValuesInConfig, 0.0f);
+ for (size_t i = 0; i < numFeatureValuesInConfig; i++)
+ scores[i] = FloorScore(TransformScore(Scan<float>(token[2+i])));
+
+ Collection::iterator iterWord = m_collection.find(inputWord);
+ if (iterWord == m_collection.end())
+ {
+ m_collection[inputWord][outputWord].Assign(this, scores);
+ }
+ else
+ { // source word already in there. delete input word to avoid mem leak
+ (iterWord->second)[outputWord].Assign(this, scores);
+ delete inputWord;
+ }
+ }
+
+ inFile.Close();
+ return true;
+}
+
+GenerationDictionary::~GenerationDictionary()
+{
+ Collection::const_iterator iter;
+ for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter)
+ {
+ delete iter->first;
+ }
+}
+
+size_t GenerationDictionary::GetNumScoreComponents() const
+{
+ return m_numScoreComponent;
+}
+
+std::string GenerationDictionary::GetScoreProducerDescription() const
+{
+ return "Generation score, file=" + m_filePath;
+}
+
+const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) const
+{
+ const OutputWordCollection *ret;
+
+ Collection::const_iterator iter = m_collection.find(&word);
+ if (iter == m_collection.end())
+ { // can't find source phrase
+ ret = NULL;
+ }
+ else
+ {
+ ret = &iter->second;
+ }
+ return ret;
+}
+
+bool GenerationDictionary::ComputeValueInTranslationOption() const {
+ return true;
+}
+
+
+}
+
diff --git a/moses/src/GenerationDictionary.h b/moses/src/GenerationDictionary.h
new file mode 100644
index 000000000..c202fda28
--- /dev/null
+++ b/moses/src/GenerationDictionary.h
@@ -0,0 +1,96 @@
+// $Id: GenerationDictionary.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_GenerationDictionary_h
+#define moses_GenerationDictionary_h
+
+#include <list>
+#include <map>
+#include <vector>
+#include "ScoreComponentCollection.h"
+#include "Phrase.h"
+#include "TypeDef.h"
+#include "Dictionary.h"
+#include "FeatureFunction.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+
+typedef std::map < Word , ScoreComponentCollection > OutputWordCollection;
+ // 1st = output phrase
+ // 2nd = log probability (score)
+
+/** Implementation of a generation table in a trie.
+ */
+class GenerationDictionary : public Dictionary, public StatelessFeatureFunction
+{
+ typedef std::map<const Word* , OutputWordCollection, WordComparer> Collection;
+protected:
+ Collection m_collection;
+ // 1st = source
+ // 2nd = target
+ std::string m_filePath;
+
+public:
+ /** constructor.
+ * \param numFeatures number of score components, as specified in ini file
+ */
+ GenerationDictionary(size_t numFeatures, ScoreIndexManager &scoreIndexManager);
+ virtual ~GenerationDictionary();
+
+ // returns Generate
+ DecodeType GetDecodeType() const
+ {
+ return Generate;
+ }
+
+ //! load data file
+ bool Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , FactorDirection direction);
+
+ size_t GetNumScoreComponents() const;
+ std::string GetScoreProducerDescription() const;
+ std::string GetScoreProducerWeightShortName() const
+ {
+ return "g";
+ }
+
+ /** number of unique input entries in the generation table.
+ * NOT the number of lines in the generation table
+ */
+ size_t GetSize() const
+ {
+ return m_collection.size();
+ }
+ /** returns a bag of output words, OutputWordCollection, for a particular input word.
+ * Or NULL if the input word isn't found. The search function used is the WordComparer functor
+ */
+ const OutputWordCollection *FindWord(const Word &word) const;
+ virtual bool ComputeValueInTranslationOption() const;
+};
+
+
+}
+#endif
diff --git a/moses/src/GlobalLexicalModel.cpp b/moses/src/GlobalLexicalModel.cpp
new file mode 100644
index 000000000..bec324a3f
--- /dev/null
+++ b/moses/src/GlobalLexicalModel.cpp
@@ -0,0 +1,185 @@
+#include <fstream>
+#include "GlobalLexicalModel.h"
+#include "StaticData.h"
+#include "InputFileStream.h"
+
+namespace Moses
+{
+GlobalLexicalModel::GlobalLexicalModel(const string &filePath,
+ const float weight,
+ const vector< FactorType >& inFactors,
+ const vector< FactorType >& outFactors)
+{
+ std::cerr << "Creating global lexical model...\n";
+
+ // register as score producer
+ const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
+ std::vector< float > weights;
+ weights.push_back( weight );
+ const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
+
+ // load model
+ LoadData( filePath, inFactors, outFactors );
+
+ // define bias word
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ m_bias = new Word();
+ const Factor* factor = factorCollection.AddFactor( Input, inFactors[0], "**BIAS**" );
+ m_bias->SetFactor( inFactors[0], factor );
+
+ m_cache = NULL;
+}
+
+GlobalLexicalModel::~GlobalLexicalModel(){
+ // delete words in the hash data structure
+ DoubleHash::const_iterator iter;
+ for(iter = m_hash.begin(); iter != m_hash.end(); iter++ )
+ {
+ map< const Word*, float, WordComparer >::const_iterator iter2;
+ for(iter2 = iter->second.begin(); iter2 != iter->second.end(); iter2++ )
+ {
+ delete iter2->first; // delete input word
+ }
+ delete iter->first; // delete output word
+ }
+ if (m_cache != NULL) delete m_cache;
+}
+
+void GlobalLexicalModel::LoadData(const string &filePath,
+ const vector< FactorType >& inFactors,
+ const vector< FactorType >& outFactors)
+{
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+
+ VERBOSE(2, "Loading global lexical model from file " << filePath << endl);
+
+ m_inputFactors = FactorMask(inFactors);
+ m_outputFactors = FactorMask(outFactors);
+ InputFileStream inFile(filePath);
+
+ // reading in data one line at a time
+ size_t lineNum = 0;
+ string line;
+ while(getline(inFile, line))
+ {
+ ++lineNum;
+ vector<string> token = Tokenize<string>(line, " ");
+
+ if (token.size() != 3) // format checking
+ {
+ stringstream errorMessage;
+ errorMessage << "Syntax error at " << filePath << ":" << lineNum << endl << line << endl;
+ UserMessage::Add(errorMessage.str());
+ abort();
+ }
+
+ // create the output word
+ Word *outWord = new Word();
+ vector<string> factorString = Tokenize( token[0], factorDelimiter );
+ for (size_t i=0 ; i < outFactors.size() ; i++)
+ {
+ const FactorDirection& direction = Output;
+ const FactorType& factorType = outFactors[i];
+ const Factor* factor = factorCollection.AddFactor( direction, factorType, factorString[i] );
+ outWord->SetFactor( factorType, factor );
+ }
+
+ // create the input word
+ Word *inWord = new Word();
+ factorString = Tokenize( token[1], factorDelimiter );
+ for (size_t i=0 ; i < inFactors.size() ; i++)
+ {
+ const FactorDirection& direction = Input;
+ const FactorType& factorType = inFactors[i];
+ const Factor* factor = factorCollection.AddFactor( direction, factorType, factorString[i] );
+ inWord->SetFactor( factorType, factor );
+ }
+
+ // maximum entropy feature score
+ float score = Scan<float>(token[2]);
+
+ // std::cerr << "storing word " << *outWord << " " << *inWord << " " << score << endl;
+
+ // store feature in hash
+ DoubleHash::iterator keyOutWord = m_hash.find( outWord );
+ if( keyOutWord == m_hash.end() )
+ {
+ m_hash[outWord][inWord] = score;
+ }
+ else // already have hash for outword, delete the word to avoid leaks
+ {
+ (keyOutWord->second)[inWord] = score;
+ delete outWord;
+ }
+ }
+}
+
+void GlobalLexicalModel::InitializeForInput( Sentence const& in )
+{
+ m_input = &in;
+ if (m_cache != NULL) delete m_cache;
+ m_cache = new map< const TargetPhrase*, float >;
+}
+
+float GlobalLexicalModel::ScorePhrase( const TargetPhrase& targetPhrase ) const
+{
+ float score = 0;
+ for(size_t targetIndex = 0; targetIndex < targetPhrase.GetSize(); targetIndex++ )
+ {
+ float sum = 0;
+ const Word& targetWord = targetPhrase.GetWord( targetIndex );
+ VERBOSE(2,"glm " << targetWord << ": ");
+ const DoubleHash::const_iterator targetWordHash = m_hash.find( &targetWord );
+ if( targetWordHash != m_hash.end() )
+ {
+ SingleHash::const_iterator inputWordHash = targetWordHash->second.find( m_bias );
+ if( inputWordHash != targetWordHash->second.end() )
+ {
+ VERBOSE(2,"*BIAS* " << inputWordHash->second);
+ sum += inputWordHash->second;
+ }
+
+ set< const Word*, WordComparer > alreadyScored; // do not score a word twice
+ for(size_t inputIndex = 0; inputIndex < m_input->GetSize(); inputIndex++ )
+ {
+ const Word& inputWord = m_input->GetWord( inputIndex );
+ if ( alreadyScored.find( &inputWord ) == alreadyScored.end() )
+ {
+ SingleHash::const_iterator inputWordHash = targetWordHash->second.find( &inputWord );
+ if( inputWordHash != targetWordHash->second.end() )
+ {
+ VERBOSE(2," " << inputWord << " " << inputWordHash->second);
+ sum += inputWordHash->second;
+ }
+ alreadyScored.insert( &inputWord );
+ }
+ }
+ }
+ // Hal Daume says: 1/( 1 + exp [ - sum_i w_i * f_i ] )
+ VERBOSE(2," p=" << FloorScore( log(1/(1+exp(-sum))) ) << endl);
+ score += FloorScore( log(1/(1+exp(-sum))) );
+ }
+ return score;
+}
+
+float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetPhrase ) const
+{
+ map< const TargetPhrase*, float >::const_iterator query = m_cache->find( &targetPhrase );
+ if ( query != m_cache->end() )
+ {
+ return query->second;
+ }
+
+ float score = ScorePhrase( targetPhrase );
+ m_cache->insert( pair<const TargetPhrase*, float>(&targetPhrase, score) );
+ std::cerr << "add to cache " << targetPhrase << ": " << score << endl;
+ return score;
+}
+
+void GlobalLexicalModel::Evaluate(const TargetPhrase& targetPhrase, ScoreComponentCollection* accumulator) const
+{
+ accumulator->PlusEquals( this, GetFromCacheOrScorePhrase( targetPhrase ) );
+}
+
+}
diff --git a/moses/src/GlobalLexicalModel.h b/moses/src/GlobalLexicalModel.h
new file mode 100644
index 000000000..c98d4735b
--- /dev/null
+++ b/moses/src/GlobalLexicalModel.h
@@ -0,0 +1,76 @@
+#ifndef moses_GlobalLexicalModel_h
+#define moses_GlobalLexicalModel_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "Phrase.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "WordsRange.h"
+#include "ScoreProducer.h"
+#include "FeatureFunction.h"
+#include "FactorTypeSet.h"
+#include "Sentence.h"
+
+namespace Moses
+{
+
+class Factor;
+class Phrase;
+class Hypothesis;
+class InputType;
+
+using namespace std;
+
+/** Discriminatively trained global lexicon model
+ * This is a implementation of Mauser et al., 2009's model that predicts
+ * each output word from _all_ the input words. The intuition behind this
+ * feature is that it uses context words for disambiguation
+ */
+
+class GlobalLexicalModel : public StatelessFeatureFunction {
+ typedef map< const Word*, map< const Word*, float, WordComparer >, WordComparer > DoubleHash;
+ typedef map< const Word*, float, WordComparer > SingleHash;
+private:
+ DoubleHash m_hash;
+ map< const TargetPhrase*, float > *m_cache;
+ const Sentence *m_input;
+ Word *m_bias;
+
+ FactorMask m_inputFactors;
+ FactorMask m_outputFactors;
+
+ void LoadData(const string &filePath,
+ const vector< FactorType >& inFactors,
+ const vector< FactorType >& outFactors);
+
+ float ScorePhrase( const TargetPhrase& targetPhrase ) const;
+ float GetFromCacheOrScorePhrase( const TargetPhrase& targetPhrase ) const;
+
+public:
+ GlobalLexicalModel(const string &filePath,
+ const float weight,
+ const vector< FactorType >& inFactors,
+ const vector< FactorType >& outFactors);
+ virtual ~GlobalLexicalModel();
+
+ virtual size_t GetNumScoreComponents() const {
+ return 1;
+ };
+
+ virtual string GetScoreProducerDescription() const {
+ return "GlobalLexicalModel";
+ };
+
+ virtual string GetScoreProducerWeightShortName() const {
+ return "lex";
+ };
+
+ void InitializeForInput( Sentence const& in );
+
+ void Evaluate(const TargetPhrase&, ScoreComponentCollection* ) const;
+};
+
+}
+#endif
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
new file mode 100644
index 000000000..be46fe7f0
--- /dev/null
+++ b/moses/src/Hypothesis.cpp
@@ -0,0 +1,512 @@
+// $Id: Hypothesis.cpp 2929 2010-02-22 23:42:35Z bhaddow $
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cassert>
+#include <iostream>
+#include <limits>
+#include <vector>
+#include <algorithm>
+
+#include "FFState.h"
+#include "TranslationOption.h"
+#include "TranslationOptionCollection.h"
+#include "DummyScoreProducers.h"
+#include "Hypothesis.h"
+#include "Util.h"
+#include "SquareMatrix.h"
+#include "LexicalReordering.h"
+#include "StaticData.h"
+#include "InputType.h"
+#include "LMList.h"
+#include "Manager.h"
+#include "hash.h"
+
+using namespace std;
+
+namespace Moses
+{
+unsigned int Hypothesis::s_HypothesesCreated = 0;
+
+#ifdef USE_HYPO_POOL
+ ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
+#endif
+
+Hypothesis::Hypothesis(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget)
+ : m_prevHypo(NULL)
+ , m_targetPhrase(emptyTarget)
+ , m_sourcePhrase(0)
+ , m_sourceCompleted(source.GetSize())
+ , m_sourceInput(source)
+ , m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)
+ , m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
+ , m_wordDeleted(false)
+ , m_ffStates(StaticData::Instance().GetScoreIndexManager().GetStatefulFeatureFunctions().size())
+ , m_arcList(NULL)
+ , m_transOpt(NULL)
+ , m_manager(manager)
+
+ , m_id(0)
+{ // used for initial seeding of trans process
+ // initialize scores
+ //_hash_computed = false;
+ s_HypothesesCreated = 1;
+ ResetScore();
+ const vector<const StatefulFeatureFunction*>& ffs = StaticData::Instance().GetScoreIndexManager().GetStatefulFeatureFunctions();
+ for (unsigned i = 0; i < ffs.size(); ++i)
+ m_ffStates[i] = ffs[i]->EmptyHypothesisState();
+}
+
+/***
+ * continue prevHypo by appending the phrases in transOpt
+ */
+Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
+ : m_prevHypo(&prevHypo)
+ , m_targetPhrase(transOpt.GetTargetPhrase())
+ , m_sourcePhrase(transOpt.GetSourcePhrase())
+ , m_sourceCompleted (prevHypo.m_sourceCompleted )
+ , m_sourceInput (prevHypo.m_sourceInput)
+ , m_currSourceWordsRange (transOpt.GetSourceWordsRange())
+ , m_currTargetWordsRange ( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
+ ,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize())
+ , m_wordDeleted(false)
+ , m_totalScore(0.0f)
+ , m_futureScore(0.0f)
+ , m_scoreBreakdown (prevHypo.m_scoreBreakdown)
+ , m_ffStates(prevHypo.m_ffStates.size())
+ , m_arcList(NULL)
+ , m_transOpt(&transOpt)
+ , m_manager(prevHypo.GetManager())
+ , m_id(s_HypothesesCreated++)
+{
+ // assert that we are not extending our hypothesis by retranslating something
+ // that this hypothesis has already translated!
+ assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
+
+ //_hash_computed = false;
+ m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
+ m_wordDeleted = transOpt.IsDeletionOption();
+}
+
+Hypothesis::~Hypothesis()
+{
+ for (unsigned i = 0; i < m_ffStates.size(); ++i)
+ delete m_ffStates[i];
+
+ if (m_arcList)
+ {
+ ArcList::iterator iter;
+ for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter)
+ {
+ FREEHYPO(*iter);
+ }
+ m_arcList->clear();
+
+ delete m_arcList;
+ m_arcList = NULL;
+ }
+}
+
+void Hypothesis::AddArc(Hypothesis *loserHypo)
+{
+ if (!m_arcList) {
+ if (loserHypo->m_arcList) // we don't have an arcList, but loser does
+ {
+ this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
+ loserHypo->m_arcList = 0; // prevent a double deletion
+ }
+ else
+ { this->m_arcList = new ArcList(); }
+ } else {
+ if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser
+ size_t my_size = m_arcList->size();
+ size_t add_size = loserHypo->m_arcList->size();
+ this->m_arcList->resize(my_size + add_size, 0);
+ std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *));
+ delete loserHypo->m_arcList;
+ loserHypo->m_arcList = 0;
+ } else { // loserHypo doesn't have any arcs
+ // DO NOTHING
+ }
+ }
+ m_arcList->push_back(loserHypo);
+}
+
+/***
+ * return the subclass of Hypothesis most appropriate to the given translation option
+ */
+Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt, const Phrase* constraint) const
+{
+ return Create(*this, transOpt, constraint);
+}
+
+/***
+ * return the subclass of Hypothesis most appropriate to the given translation option
+ */
+Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Phrase* constrainingPhrase)
+{
+
+ // This method includes code for constraint decoding
+
+ bool createHypothesis = true;
+
+ if (constrainingPhrase != NULL)
+ {
+
+ size_t constraintSize = constrainingPhrase->GetSize();
+
+ size_t start = 1 + prevHypo.GetCurrTargetWordsRange().GetEndPos();
+
+ const Phrase &transOptPhrase = transOpt.GetTargetPhrase();
+ size_t transOptSize = transOptPhrase.GetSize();
+
+ size_t endpoint = start + transOptSize - 1;
+
+
+ if (endpoint < constraintSize)
+ {
+ WordsRange range(start, endpoint);
+ Phrase relevantConstraint = constrainingPhrase->GetSubString(range);
+
+ if ( ! relevantConstraint.IsCompatible(transOptPhrase) )
+ {
+ createHypothesis = false;
+
+ }
+ }
+ else
+ {
+ createHypothesis = false;
+ }
+
+ }
+
+
+ if (createHypothesis)
+ {
+
+ #ifdef USE_HYPO_POOL
+ Hypothesis *ptr = s_objectPool.getPtr();
+ return new(ptr) Hypothesis(prevHypo, transOpt);
+ #else
+ return new Hypothesis(prevHypo, transOpt);
+ #endif
+
+ }
+ else
+ {
+ // If the previous hypothesis plus the proposed translation option
+ // fail to match the provided constraint,
+ // return a null hypothesis.
+ return NULL;
+ }
+
+}
+/***
+ * return the subclass of Hypothesis most appropriate to the given target phrase
+ */
+
+Hypothesis* Hypothesis::Create(Manager& manager, InputType const& m_source, const TargetPhrase &emptyTarget)
+{
+#ifdef USE_HYPO_POOL
+ Hypothesis *ptr = s_objectPool.getPtr();
+ return new(ptr) Hypothesis(manager, m_source, emptyTarget);
+#else
+ return new Hypothesis(manager, m_source, emptyTarget);
+#endif
+}
+
+/** check, if two hypothesis can be recombined.
+ this is actually a sorting function that allows us to
+ keep an ordered list of hypotheses. This makes recombination
+ much quicker.
+*/
+int Hypothesis::RecombineCompare(const Hypothesis &compare) const
+{ // -1 = this < compare
+ // +1 = this > compare
+ // 0 = this ==compare
+ int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted);
+ if (comp != 0)
+ return comp;
+
+ for (unsigned i = 0; i < m_ffStates.size(); ++i) {
+ if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
+ comp = m_ffStates[i] - compare.m_ffStates[i];
+ } else {
+ comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
+ }
+ if (comp != 0) return comp;
+ }
+
+ return 0;
+}
+
+void Hypothesis::ResetScore()
+{
+ m_scoreBreakdown.ZeroAll();
+ m_futureScore = m_totalScore = 0.0f;
+}
+
+/***
+ * calculate the logarithm of our total translation score (sum up components)
+ */
+void Hypothesis::CalcScore(const SquareMatrix &futureScore)
+{
+ // some stateless score producers cache their values in the translation
+ // option: add these here
+ // language model scores for n-grams completely contained within a target
+ // phrase are also included here
+ m_scoreBreakdown.PlusEquals(m_transOpt->GetScoreBreakdown());
+
+ const StaticData &staticData = StaticData::Instance();
+ clock_t t=0; // used to track time
+
+ // compute values of stateless feature functions that were not
+ // cached in the translation option-- there is no principled distinction
+ const vector<const StatelessFeatureFunction*>& sfs =
+ staticData.GetScoreIndexManager().GetStatelessFeatureFunctions();
+ for (unsigned i = 0; i < sfs.size(); ++i) {
+ sfs[i]->Evaluate(m_targetPhrase, &m_scoreBreakdown);
+ }
+
+ const vector<const StatefulFeatureFunction*>& ffs =
+ staticData.GetScoreIndexManager().GetStatefulFeatureFunctions();
+ for (unsigned i = 0; i < ffs.size(); ++i) {
+ m_ffStates[i] = ffs[i]->Evaluate(
+ *this,
+ m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
+ &m_scoreBreakdown);
+ }
+
+ IFVERBOSE(2) { t = clock(); } // track time excluding LM
+
+ // FUTURE COST
+ m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
+
+ // TOTAL
+ m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
+
+ IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
+}
+
+/** Calculates the expected score of extending this hypothesis with the
+ * specified translation option. Includes actual costs for everything
+ * except for expensive actual language model score.
+ * This function is used by early discarding.
+ * /param transOpt - translation option being considered
+ */
+float Hypothesis::CalcExpectedScore( const SquareMatrix &futureScore ) {
+ const StaticData &staticData = StaticData::Instance();
+ clock_t t=0;
+ IFVERBOSE(2) { t = clock(); } // track time excluding LM
+
+ assert(!"Need to add code to get the distortion scores");
+ //CalcDistortionScore();
+
+ // LANGUAGE MODEL ESTIMATE (includes word penalty cost)
+ float estimatedLMScore = m_transOpt->GetFutureScore() - m_transOpt->GetScoreBreakdown().InnerProduct(staticData.GetAllWeights());
+
+ // FUTURE COST
+ m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
+
+ //LEXICAL REORDERING COST
+ const std::vector<LexicalReordering*> &reorderModels = staticData.GetReorderModels();
+ for(unsigned int i = 0; i < reorderModels.size(); i++)
+ {
+ m_scoreBreakdown.PlusEquals(reorderModels[i], reorderModels[i]->CalcScore(this));
+ }
+
+ // TOTAL
+ float total = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore + estimatedLMScore;
+
+ IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeEstimateScore( clock()-t ); }
+ return total;
+}
+
+void Hypothesis::CalcRemainingScore()
+{
+ const StaticData &staticData = StaticData::Instance();
+ clock_t t=0; // used to track time
+
+ // LANGUAGE MODEL COST
+ assert(!"Need to add code to get the LM score(s)");
+ //CalcLMScore(staticData.GetAllLM());
+
+ IFVERBOSE(2) { t = clock(); } // track time excluding LM
+
+ // WORD PENALTY
+ m_scoreBreakdown.PlusEquals(staticData.GetWordPenaltyProducer(), - (float) m_currTargetWordsRange.GetNumWordsCovered());
+
+ // TOTAL
+ m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
+
+ IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
+}
+
+const Hypothesis* Hypothesis::GetPrevHypo()const{
+ return m_prevHypo;
+}
+
+/**
+ * print hypothesis information for pharaoh-style logging
+ */
+void Hypothesis::PrintHypothesis() const
+{
+ if (!m_prevHypo) { TRACE_ERR(endl << "NULL hypo" << endl); return; }
+ TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
+ int end = (int)(m_prevHypo->m_targetPhrase.GetSize()-1);
+ int start = end-1;
+ if ( start < 0 ) start = 0;
+ if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
+ TRACE_ERR( "<s> ");
+ }
+ else {
+ TRACE_ERR( "... ");
+ }
+ if (end>=0) {
+ WordsRange range(start, end);
+ TRACE_ERR( m_prevHypo->m_targetPhrase.GetSubString(range) << " ");
+ }
+ TRACE_ERR( ")"<<endl);
+ TRACE_ERR( "\tbase score "<< (m_prevHypo->m_totalScore - m_prevHypo->m_futureScore) <<endl);
+ TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "
+ << *m_sourcePhrase <<endl);
+ TRACE_ERR( "\ttranslated as: "<<(Phrase&) m_targetPhrase<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
+
+ if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
+ // TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
+ // TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
+ // TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
+ TRACE_ERR( "\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
+ TRACE_ERR( "\tunweighted feature scores: " << m_scoreBreakdown << endl);
+ //PrintLMScores();
+}
+
+void Hypothesis::CleanupArcList()
+{
+ // point this hypo's main hypo to itself
+ SetWinningHypo(this);
+
+ if (!m_arcList) return;
+
+ /* keep only number of arcs we need to create all n-best paths.
+ * However, may not be enough if only unique candidates are needed,
+ * so we'll keep all of arc list if nedd distinct n-best list
+ */
+ const StaticData &staticData = StaticData::Instance();
+ size_t nBestSize = staticData.GetNBestSize();
+ bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.UseLatticeMBR() ;
+
+ if (!distinctNBest && m_arcList->size() > nBestSize * 5)
+ { // prune arc list only if there too many arcs
+ nth_element(m_arcList->begin()
+ , m_arcList->begin() + nBestSize - 1
+ , m_arcList->end()
+ , CompareHypothesisTotalScore());
+
+ // delete bad ones
+ ArcList::iterator iter;
+ for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter)
+ {
+ Hypothesis *arc = *iter;
+ FREEHYPO(arc);
+ }
+ m_arcList->erase(m_arcList->begin() + nBestSize
+ , m_arcList->end());
+ }
+
+ // set all arc's main hypo variable to this hypo
+ ArcList::iterator iter = m_arcList->begin();
+ for (; iter != m_arcList->end() ; ++iter)
+ {
+ Hypothesis *arc = *iter;
+ arc->SetWinningHypo(this);
+ }
+}
+
+TO_STRING_BODY(Hypothesis)
+
+// friend
+ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
+{
+ hypothesis.ToStream(out);
+ // words bitmap
+ out << "[" << hypothesis.m_sourceCompleted << "] ";
+
+ // scores
+ out << " [total=" << hypothesis.GetTotalScore() << "]";
+ out << " " << hypothesis.GetScoreBreakdown();
+
+ // alignment
+
+ return out;
+}
+
+
+std::string Hypothesis::GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const
+{
+ if (!m_prevHypo) { return ""; }
+ return m_sourcePhrase->GetStringRep(factorsToPrint);
+#if 0
+ if(m_sourcePhrase)
+ {
+ return m_sourcePhrase->GetSubString(m_currSourceWordsRange).GetStringRep(factorsToPrint);
+ }
+ else
+ {
+ return m_sourceInput.GetSubString(m_currSourceWordsRange).GetStringRep(factorsToPrint);
+ }
+#endif
+}
+std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const
+{
+ if (!m_prevHypo) { return ""; }
+ return m_targetPhrase.GetStringRep(factorsToPrint);
+}
+
+std::string Hypothesis::GetSourcePhraseStringRep() const
+{
+ vector<FactorType> allFactors;
+ const size_t maxSourceFactors = StaticData::Instance().GetMaxNumFactors(Input);
+ for(size_t i=0; i < maxSourceFactors; i++)
+ {
+ allFactors.push_back(i);
+ }
+ return GetSourcePhraseStringRep(allFactors);
+}
+std::string Hypothesis::GetTargetPhraseStringRep() const
+{
+ vector<FactorType> allFactors;
+ const size_t maxTargetFactors = StaticData::Instance().GetMaxNumFactors(Output);
+ for(size_t i=0; i < maxTargetFactors; i++)
+ {
+ allFactors.push_back(i);
+ }
+ return GetTargetPhraseStringRep(allFactors);
+}
+
+
+const ScoreComponentCollection &Hypothesis::GetCachedReorderingScore() const
+{
+ return m_transOpt->GetReorderingScore();
+}
+
+}
+
diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h
new file mode 100644
index 000000000..8e2dbc5ec
--- /dev/null
+++ b/moses/src/Hypothesis.h
@@ -0,0 +1,322 @@
+// $Id: Hypothesis.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_Hypothesis_h
+#define moses_Hypothesis_h
+
+#include <iostream>
+#include <vector>
+#include "Phrase.h"
+#include "TypeDef.h"
+#include "WordsBitmap.h"
+#include "Sentence.h"
+#include "Phrase.h"
+#include "PhraseDictionaryMemory.h"
+#include "GenerationDictionary.h"
+#include "LanguageModelSingleFactor.h"
+#include "ScoreComponentCollection.h"
+#include "LexicalReordering.h"
+#include "InputType.h"
+#include "ObjectPool.h"
+
+namespace Moses
+{
+
+class SquareMatrix;
+class StaticData;
+class TranslationOption;
+class WordsRange;
+class Hypothesis;
+class FFState;
+class Manager;
+
+typedef std::vector<Hypothesis*> ArcList;
+
+/** Used to store a state in the beam search
+ for the best translation. With its link back to the previous hypothesis
+ m_prevHypo, we can trace back to the sentence start to read of the
+ (partial) translation to this point.
+
+ The expansion of hypotheses is handled in the class Manager, which
+ stores active hypothesis in the search in hypothesis stacks.
+***/
+class Hypothesis
+{
+ friend std::ostream& operator<<(std::ostream&, const Hypothesis&);
+
+protected:
+ static ObjectPool<Hypothesis> s_objectPool;
+
+ const Hypothesis* m_prevHypo; /*! backpointer to previous hypothesis (from which this one was created) */
+// const Phrase &m_targetPhrase; /*! target phrase being created at the current decoding step */
+ const TargetPhrase &m_targetPhrase; /*! target phrase being created at the current decoding step */
+ Phrase const* m_sourcePhrase; /*! input sentence */
+ WordsBitmap m_sourceCompleted; /*! keeps track of which words have been translated so far */
+ //TODO: how to integrate this into confusion network framework; what if
+ //it's a confusion network in the end???
+ InputType const& m_sourceInput;
+ WordsRange m_currSourceWordsRange; /*! source word positions of the last phrase that was used to create this hypothesis */
+ WordsRange m_currTargetWordsRange; /*! target word positions of the last phrase that was used to create this hypothesis */
+ bool m_wordDeleted;
+ float m_totalScore; /*! score so far */
+ float m_futureScore; /*! estimated future cost to translate rest of sentence */
+ ScoreComponentCollection m_scoreBreakdown; /*! detailed score break-down by components (for instance language model, word penalty, etc) */
+ std::vector<const FFState*> m_ffStates;
+ const Hypothesis *m_winningHypo;
+ ArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
+ const TranslationOption *m_transOpt;
+ Manager& m_manager;
+
+ int m_id; /*! numeric ID of this hypothesis, used for logging */
+ static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
+
+ /*! used by initial seeding of the translation process */
+ Hypothesis(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget);
+ /*! used when creating a new hypothesis using a translation option (phrase translation) */
+ Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
+
+public:
+ static ObjectPool<Hypothesis> &GetObjectPool()
+ {
+ return s_objectPool;
+ }
+
+ ~Hypothesis();
+
+ /** return the subclass of Hypothesis most appropriate to the given translation option */
+ static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Phrase* constraint);
+
+ static Hypothesis* Create(Manager& manager, const WordsBitmap &initialCoverage);
+
+ /** return the subclass of Hypothesis most appropriate to the given target phrase */
+ static Hypothesis* Create(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget);
+
+ /** return the subclass of Hypothesis most appropriate to the given translation option */
+ Hypothesis* CreateNext(const TranslationOption &transOpt, const Phrase* constraint) const;
+
+ void PrintHypothesis() const;
+
+ const InputType& GetInput() const {return m_sourceInput;}
+
+ /** return target phrase used to create this hypothesis */
+// const Phrase &GetCurrTargetPhrase() const
+ const TargetPhrase &GetCurrTargetPhrase() const
+ {
+ return m_targetPhrase;
+ }
+
+ // void PrintLMScores(const LMList &lmListInitial, const LMList &lmListEnd) const;
+
+ /** return input positions covered by the translation option (phrasal translation) used to create this hypothesis */
+ inline const WordsRange &GetCurrSourceWordsRange() const
+ {
+ return m_currSourceWordsRange;
+ }
+
+ inline const WordsRange &GetCurrTargetWordsRange() const
+ {
+ return m_currTargetWordsRange;
+ }
+
+ Manager& GetManager() const
+ {
+ return m_manager;
+ }
+
+ /** output length of the translation option used to create this hypothesis */
+ inline size_t GetCurrTargetLength() const
+ {
+ return m_currTargetWordsRange.GetNumWordsCovered();
+ }
+
+ void ResetScore();
+
+ void CalcScore(const SquareMatrix &futureScore);
+
+ float CalcExpectedScore( const SquareMatrix &futureScore );
+ void CalcRemainingScore();
+
+ int GetId()const
+ {
+ return m_id;
+ }
+
+ const Hypothesis* GetPrevHypo() const;
+
+ /** length of the partial translation (from the start of the sentence) */
+ inline size_t GetSize() const
+ {
+ return m_currTargetWordsRange.GetEndPos() + 1;
+ }
+
+ inline const Phrase* GetSourcePhrase() const
+ {
+ return m_sourcePhrase;
+ }
+
+ std::string GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const;
+ std::string GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const;
+ inline const TargetPhrase GetTargetPhrase() const { return m_targetPhrase; }
+ std::string GetSourcePhraseStringRep() const;
+ std::string GetTargetPhraseStringRep() const;
+
+ /** curr - pos is relative from CURRENT hypothesis's starting index
+ * (ie, start of sentence would be some negative number, which is
+ * not allowed- USE WITH CAUTION) */
+ inline const Word &GetCurrWord(size_t pos) const
+ {
+ return m_targetPhrase.GetWord(pos);
+ }
+ inline const Factor *GetCurrFactor(size_t pos, FactorType factorType) const
+ {
+ return m_targetPhrase.GetFactor(pos, factorType);
+ }
+ /** recursive - pos is relative from start of sentence */
+ inline const Word &GetWord(size_t pos) const
+ {
+ const Hypothesis *hypo = this;
+ while (pos < hypo->GetCurrTargetWordsRange().GetStartPos())
+ {
+ hypo = hypo->GetPrevHypo();
+ assert(hypo != NULL);
+ }
+ return hypo->GetCurrWord(pos - hypo->GetCurrTargetWordsRange().GetStartPos());
+ }
+ inline const Factor* GetFactor(size_t pos, FactorType factorType) const
+ {
+ return GetWord(pos)[factorType];
+ }
+
+ /***
+ * \return The bitmap of source words we cover
+ */
+ inline const WordsBitmap &GetWordsBitmap() const
+ {
+ return m_sourceCompleted;
+ }
+
+ inline bool IsSourceCompleted() const {
+ return m_sourceCompleted.IsComplete();
+ }
+
+ int RecombineCompare(const Hypothesis &compare) const;
+
+ void ToStream(std::ostream& out) const
+ {
+ if (m_prevHypo != NULL)
+ {
+ m_prevHypo->ToStream(out);
+ }
+ out << (Phrase) GetCurrTargetPhrase();
+ }
+
+ inline bool PrintAlignmentInfo() const{ return GetCurrTargetPhrase().PrintAlignmentInfo(); }
+
+
+
+
+
+ TO_STRING();
+
+ inline void SetWinningHypo(const Hypothesis *hypo)
+ {
+ m_winningHypo = hypo;
+ }
+ inline const Hypothesis *GetWinningHypo() const
+ {
+ return m_winningHypo;
+ }
+
+ void AddArc(Hypothesis *loserHypo);
+ void CleanupArcList();
+
+ //! returns a list alternative previous hypotheses (or NULL if n-best support is disabled)
+ inline const ArcList* GetArcList() const
+ {
+ return m_arcList;
+ }
+ const ScoreComponentCollection& GetScoreBreakdown() const
+ {
+ return m_scoreBreakdown;
+ }
+ float GetTotalScore() const { return m_totalScore; }
+ float GetScore() const { return m_totalScore-m_futureScore; }
+
+
+
+
+ //! target span that trans opt would populate if applied to this hypo. Used for alignment check
+ size_t GetNextStartPos(const TranslationOption &transOpt) const;
+
+ std::vector<std::vector<unsigned int> > *GetLMStats() const { return NULL; }
+
+ static unsigned int GetHypothesesCreated()
+ {
+ return s_HypothesesCreated;
+ }
+
+ const ScoreComponentCollection &GetCachedReorderingScore() const;
+
+ const TranslationOption &GetTranslationOption() const
+ { return *m_transOpt; }
+};
+
+std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis);
+
+// sorting helper
+struct CompareHypothesisTotalScore
+{
+ bool operator()(const Hypothesis* hypo1, const Hypothesis* hypo2) const
+ {
+ return hypo1->GetTotalScore() > hypo2->GetTotalScore();
+ }
+};
+
+#ifdef USE_HYPO_POOL
+
+#define FREEHYPO(hypo) \
+{ \
+ ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool(); \
+ pool.freeObject(hypo); \
+} \
+
+#else
+#define FREEHYPO(hypo) delete hypo
+#endif
+
+/** defines less-than relation on hypotheses.
+* The particular order is not important for us, we need just to figure out
+* which hypothesis are equal based on:
+* the last n-1 target words are the same
+* and the covers (source words translated) are the same
+*/
+class HypothesisRecombinationOrderer
+{
+public:
+ bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+ {
+ return hypoA->RecombineCompare(*hypoB) < 0;
+ }
+};
+
+}
+#endif
diff --git a/moses/src/HypothesisStack.cpp b/moses/src/HypothesisStack.cpp
new file mode 100644
index 000000000..0a413902a
--- /dev/null
+++ b/moses/src/HypothesisStack.cpp
@@ -0,0 +1,31 @@
+
+#include "HypothesisStack.h"
+
+namespace Moses
+{
+HypothesisStack::~HypothesisStack()
+{
+ // delete all hypos
+ while (m_hypos.begin() != m_hypos.end())
+ {
+ Remove(m_hypos.begin());
+ }
+}
+
+/** Remove hypothesis pointed to by iterator but don't delete the object. */
+void HypothesisStack::Detach(const HypothesisStack::iterator &iter)
+{
+ m_hypos.erase(iter);
+}
+
+
+void HypothesisStack::Remove(const HypothesisStack::iterator &iter)
+{
+ Hypothesis *h = *iter;
+ Detach(iter);
+ FREEHYPO(h);
+}
+
+
+}
+
diff --git a/moses/src/HypothesisStack.h b/moses/src/HypothesisStack.h
new file mode 100644
index 000000000..cf019f528
--- /dev/null
+++ b/moses/src/HypothesisStack.h
@@ -0,0 +1,48 @@
+#ifndef moses_HypothesisStack_h
+#define moses_HypothesisStack_h
+
+#include <vector>
+#include <set>
+#include "Hypothesis.h"
+#include "WordsBitmap.h"
+
+namespace Moses
+{
+
+ class Manager;
+
+class HypothesisStack
+{
+
+protected:
+ typedef std::set< Hypothesis*, HypothesisRecombinationOrderer > _HCType;
+ _HCType m_hypos; /**< contains hypotheses */
+ Manager& m_manager;
+
+public:
+ HypothesisStack(Manager& manager): m_manager(manager) {}
+ typedef _HCType::iterator iterator;
+ typedef _HCType::const_iterator const_iterator;
+ //! iterators
+ const_iterator begin() const { return m_hypos.begin(); }
+ const_iterator end() const { return m_hypos.end(); }
+ size_t size() const { return m_hypos.size(); }
+ virtual inline float GetWorstScore() const { return -numeric_limits<float>::infinity(); };
+ virtual float GetWorstScoreForBitmap( WordsBitmapID ) { return -numeric_limits<float>::infinity(); };
+ virtual float GetWorstScoreForBitmap( WordsBitmap ) { return -numeric_limits<float>::infinity(); };
+
+ virtual ~HypothesisStack();
+ virtual bool AddPrune(Hypothesis *hypothesis) = 0;
+ virtual const Hypothesis *GetBestHypothesis() const = 0;
+ virtual std::vector<const Hypothesis*> GetSortedList() const = 0;
+
+ //! remove hypothesis pointed to by iterator but don't delete the object
+ virtual void Detach(const HypothesisStack::iterator &iter);
+ /** destroy Hypothesis pointed to by iterator (object pool version) */
+ virtual void Remove(const HypothesisStack::iterator &iter);
+
+};
+
+}
+
+#endif
diff --git a/moses/src/HypothesisStackCubePruning.cpp b/moses/src/HypothesisStackCubePruning.cpp
new file mode 100644
index 000000000..5a707d2b3
--- /dev/null
+++ b/moses/src/HypothesisStackCubePruning.cpp
@@ -0,0 +1,315 @@
+// $Id: HypothesisStackCubePruning.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <algorithm>
+#include <set>
+#include <queue>
+#include "HypothesisStackCubePruning.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "StaticData.h"
+#include "Manager.h"
+
+using namespace std;
+
+namespace Moses
+{
+HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
+ HypothesisStack(manager)
+{
+ m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
+ m_bestScore = -std::numeric_limits<float>::infinity();
+ m_worstScore = -std::numeric_limits<float>::infinity();
+}
+
+/** remove all hypotheses from the collection */
+void HypothesisStackCubePruning::RemoveAll()
+{
+ // delete all bitmap accessors;
+ _BMType::iterator iter;
+ for (iter = m_bitmapAccessor.begin(); iter != m_bitmapAccessor.end(); ++iter)
+ {
+ delete iter->second;
+ }
+}
+
+pair<HypothesisStackCubePruning::iterator, bool> HypothesisStackCubePruning::Add(Hypothesis *hypo)
+{
+ std::pair<iterator, bool> ret = m_hypos.insert(hypo);
+
+ if (ret.second)
+ { // equiv hypo doesn't exists
+ VERBOSE(3,"added hyp to stack");
+
+ // Update best score, if this hypothesis is new best
+ if (hypo->GetTotalScore() > m_bestScore)
+ {
+ VERBOSE(3,", best on stack");
+ m_bestScore = hypo->GetTotalScore();
+ // this may also affect the worst score
+ if ( m_bestScore + m_beamWidth > m_worstScore )
+ m_worstScore = m_bestScore + m_beamWidth;
+ }
+
+ // Prune only if stack is twice as big as needed (lazy pruning)
+ VERBOSE(3,", now size " << m_hypos.size());
+ if (m_hypos.size() > 2*m_maxHypoStackSize-1)
+ {
+ PruneToSize(m_maxHypoStackSize);
+ }
+ else {
+ VERBOSE(3,std::endl);
+ }
+ }
+
+ return ret;
+}
+
+bool HypothesisStackCubePruning::AddPrune(Hypothesis *hypo)
+{
+ if (hypo->GetTotalScore() < m_worstScore)
+ { // too bad for stack. don't bother adding hypo into collection
+ m_manager.GetSentenceStats().AddDiscarded();
+ VERBOSE(3,"discarded, too bad for stack" << std::endl);
+ FREEHYPO(hypo);
+ return false;
+ }
+
+ // over threshold, try to add to collection
+ std::pair<iterator, bool> addRet = Add(hypo);
+ if (addRet.second)
+ { // nothing found. add to collection
+ return true;
+ }
+
+ // equiv hypo exists, recombine with other hypo
+ iterator &iterExisting = addRet.first;
+ Hypothesis *hypoExisting = *iterExisting;
+ assert(iterExisting != m_hypos.end());
+
+ m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
+
+ // found existing hypo with same target ending.
+ // keep the best 1
+ if (hypo->GetTotalScore() > hypoExisting->GetTotalScore())
+ { // incoming hypo is better than the one we have
+ VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
+ if (m_nBestIsEnabled) {
+ hypo->AddArc(hypoExisting);
+ Detach(iterExisting);
+ } else {
+ Remove(iterExisting);
+ }
+
+ bool added = Add(hypo).second;
+ if (!added)
+ {
+ iterExisting = m_hypos.find(hypo);
+ TRACE_ERR("Offending hypo = " << **iterExisting << endl);
+ assert(false);
+ }
+ return false;
+ }
+ else
+ { // already storing the best hypo. discard current hypo
+ VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
+ if (m_nBestIsEnabled) {
+ hypoExisting->AddArc(hypo);
+ } else {
+ FREEHYPO(hypo);
+ }
+ return false;
+ }
+}
+
+void HypothesisStackCubePruning::AddInitial(Hypothesis *hypo)
+{
+ std::pair<iterator, bool> addRet = Add(hypo);
+ assert (addRet.second);
+
+ const WordsBitmap &bitmap = hypo->GetWordsBitmap();
+ m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this);
+}
+
+void HypothesisStackCubePruning::PruneToSize(size_t newSize)
+{
+ if (m_hypos.size() > newSize) // ok, if not over the limit
+ {
+ priority_queue<float> bestScores;
+
+ // push all scores to a heap
+ // (but never push scores below m_bestScore+m_beamWidth)
+ iterator iter = m_hypos.begin();
+ float score = 0;
+ while (iter != m_hypos.end())
+ {
+ Hypothesis *hypo = *iter;
+ score = hypo->GetTotalScore();
+ if (score > m_bestScore+m_beamWidth)
+ {
+ bestScores.push(score);
+ }
+ ++iter;
+ }
+
+ // pop the top newSize scores (and ignore them, these are the scores of hyps that will remain)
+ // ensure to never pop beyond heap size
+ size_t minNewSizeHeapSize = newSize > bestScores.size() ? bestScores.size() : newSize;
+ for (size_t i = 1 ; i < minNewSizeHeapSize ; i++)
+ bestScores.pop();
+
+ // and remember the threshold
+ float scoreThreshold = bestScores.top();
+
+ // delete all hypos under score threshold
+ iter = m_hypos.begin();
+ while (iter != m_hypos.end())
+ {
+ Hypothesis *hypo = *iter;
+ float score = hypo->GetTotalScore();
+ if (score < scoreThreshold)
+ {
+ iterator iterRemove = iter++;
+ Remove(iterRemove);
+ m_manager.GetSentenceStats().AddPruning();
+ }
+ else
+ {
+ ++iter;
+ }
+ }
+ VERBOSE(3,", pruned to size " << size() << endl);
+
+ IFVERBOSE(3)
+ {
+ TRACE_ERR("stack now contains: ");
+ for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++)
+ {
+ Hypothesis *hypo = *iter;
+ TRACE_ERR( hypo->GetId() << " (" << hypo->GetTotalScore() << ") ");
+ }
+ TRACE_ERR( endl);
+ }
+
+ // set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack
+ m_worstScore = scoreThreshold;
+ }
+}
+
+const Hypothesis *HypothesisStackCubePruning::GetBestHypothesis() const
+{
+ if (!m_hypos.empty())
+ {
+ const_iterator iter = m_hypos.begin();
+ Hypothesis *bestHypo = *iter;
+ while (++iter != m_hypos.end())
+ {
+ Hypothesis *hypo = *iter;
+ if (hypo->GetTotalScore() > bestHypo->GetTotalScore())
+ bestHypo = hypo;
+ }
+ return bestHypo;
+ }
+ return NULL;
+}
+
+vector<const Hypothesis*> HypothesisStackCubePruning::GetSortedList() const
+{
+ vector<const Hypothesis*> ret; ret.reserve(m_hypos.size());
+ std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
+ sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
+
+ return ret;
+}
+
+
+void HypothesisStackCubePruning::CleanupArcList()
+{
+ // only necessary if n-best calculations are enabled
+ if (!m_nBestIsEnabled) return;
+
+ iterator iter;
+ for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter)
+ {
+ Hypothesis *mainHypo = *iter;
+ mainHypo->CleanupArcList();
+ }
+}
+
+void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap
+ , HypothesisStackCubePruning &stack
+ , const WordsRange &range
+ , BitmapContainer &bitmapContainer
+ , const SquareMatrix &futureScore
+ , const TranslationOptionList &transOptList)
+{
+ _BMType::iterator bcExists = m_bitmapAccessor.find(newBitmap);
+
+ BitmapContainer *bmContainer;
+ if (bcExists == m_bitmapAccessor.end()) {
+ bmContainer = new BitmapContainer(newBitmap, stack);
+ m_bitmapAccessor[newBitmap] = bmContainer;
+ }
+ else {
+ bmContainer = bcExists->second;
+ }
+
+ BackwardsEdge *edge = new BackwardsEdge(bitmapContainer
+ , *bmContainer
+ , transOptList
+ , futureScore,
+ m_manager.GetSource());
+ bmContainer->AddBackwardsEdge(edge);
+}
+
+
+TO_STRING_BODY(HypothesisStackCubePruning);
+
+
+// friend
+std::ostream& operator<<(std::ostream& out, const HypothesisStackCubePruning& hypoColl)
+{
+ HypothesisStackCubePruning::const_iterator iter;
+
+ for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter)
+ {
+ const Hypothesis &hypo = **iter;
+ out << hypo << endl;
+
+ }
+ return out;
+}
+
+void
+HypothesisStackCubePruning::AddHypothesesToBitmapContainers()
+{
+ HypothesisStackCubePruning::const_iterator iter;
+ for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter)
+ {
+ Hypothesis *h = *iter;
+ const WordsBitmap &bitmap = h->GetWordsBitmap();
+ BitmapContainer *container = m_bitmapAccessor[bitmap];
+ container->AddHypothesis(h);
+ }
+}
+
+}
+
diff --git a/moses/src/HypothesisStackCubePruning.h b/moses/src/HypothesisStackCubePruning.h
new file mode 100644
index 000000000..20f7ef724
--- /dev/null
+++ b/moses/src/HypothesisStackCubePruning.h
@@ -0,0 +1,154 @@
+// $Id: HypothesisStackCubePruning.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_HypothesisStackCubePruning_h
+#define moses_HypothesisStackCubePruning_h
+
+#include <limits>
+#include <map>
+#include <set>
+#include "Hypothesis.h"
+#include "BitmapContainer.h"
+#include "HypothesisStack.h"
+
+namespace Moses
+{
+
+class BitmapContainer;
+class TranslationOptionList;
+class Manager;
+
+typedef std::map<WordsBitmap, BitmapContainer*> _BMType;
+
+/** Stack for instances of Hypothesis, includes functions for pruning. */
+class HypothesisStackCubePruning : public HypothesisStack
+{
+public:
+ friend std::ostream& operator<<(std::ostream&, const HypothesisStackCubePruning&);
+
+protected:
+ _BMType m_bitmapAccessor;
+
+ float m_bestScore; /**< score of the best hypothesis in collection */
+ float m_worstScore; /**< score of the worse hypthesis in collection */
+ float m_beamWidth; /**< minimum score due to threashold pruning */
+ size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
+ bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
+
+ /** add hypothesis to stack. Prune if necessary.
+ * Returns false if equiv hypo exists in collection, otherwise returns true
+ */
+ std::pair<HypothesisStackCubePruning::iterator, bool> Add(Hypothesis *hypothesis);
+
+ /** destroy all instances of Hypothesis in this collection */
+ void RemoveAll();
+
+public:
+ HypothesisStackCubePruning(Manager& manager);
+ ~HypothesisStackCubePruning()
+ {
+ RemoveAll();
+ m_bitmapAccessor.clear();
+ }
+
+ /** adds the hypo, but only if within thresholds (beamThr, stackSize).
+ * This function will recombine hypotheses silently! There is no record
+ * (could affect n-best list generation...TODO)
+ * Call stack for adding hypothesis is
+ AddPrune()
+ Add()
+ AddNoPrune()
+ */
+ bool AddPrune(Hypothesis *hypothesis);
+
+ void AddInitial(Hypothesis *hypo);
+
+ /** set maximum number of hypotheses in the collection
+ * \param maxHypoStackSize maximum number (typical number: 100)
+ */
+ inline void SetMaxHypoStackSize(size_t maxHypoStackSize)
+ {
+ m_maxHypoStackSize = maxHypoStackSize;
+ }
+
+ inline size_t GetMaxHypoStackSize() const
+ {
+ return m_maxHypoStackSize;
+ }
+
+ /** set beam threshold, hypotheses in the stack must not be worse than
+ * this factor times the best score to be allowed in the stack
+ * \param beamThreshold minimum factor (typical number: 0.03)
+ */
+ inline void SetBeamWidth(float beamWidth)
+ {
+ m_beamWidth = beamWidth;
+ }
+
+ /** return score of the best hypothesis in the stack */
+ inline float GetBestScore() const
+ {
+ return m_bestScore;
+ }
+
+ /** return worst score allowed for the stack */
+ inline float GetWorstScore() const
+ {
+ return m_worstScore;
+ }
+
+ void AddHypothesesToBitmapContainers();
+
+ const _BMType& GetBitmapAccessor() const
+ {
+ return m_bitmapAccessor;
+ }
+
+ void SetBitmapAccessor(const WordsBitmap &newBitmap
+ , HypothesisStackCubePruning &stack
+ , const WordsRange &range
+ , BitmapContainer &bitmapContainer
+ , const SquareMatrix &futureScore
+ , const TranslationOptionList &transOptList);
+
+ /** pruning, if too large.
+ * Pruning algorithm: find a threshold and delete all hypothesis below it.
+ * The threshold is chosen so that exactly newSize top items remain on the
+ * stack in fact, in situations where some of the hypothesis fell below
+ * m_beamWidth, the stack will contain less items.
+ * \param newSize maximum size */
+ void PruneToSize(size_t newSize);
+
+ //! return the hypothesis with best score. Used to get the translated at end of decoding
+ const Hypothesis *GetBestHypothesis() const;
+ //! return all hypothesis, sorted by descending score. Used in creation of N best list
+ std::vector<const Hypothesis*> GetSortedList() const;
+
+ /** make all arcs in point to the equiv hypothesis that contains them.
+ * Ie update doubly linked list be hypo & arcs
+ */
+ void CleanupArcList();
+
+ TO_STRING();
+};
+
+}
+#endif
diff --git a/moses/src/HypothesisStackNormal.cpp b/moses/src/HypothesisStackNormal.cpp
new file mode 100644
index 000000000..be7ffa904
--- /dev/null
+++ b/moses/src/HypothesisStackNormal.cpp
@@ -0,0 +1,303 @@
+// $Id: HypothesisStackNormal.cpp 1511 2007-11-12 20:21:44Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <algorithm>
+#include <set>
+#include <queue>
+#include "HypothesisStackNormal.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "StaticData.h"
+#include "Manager.h"
+
+using namespace std;
+
+namespace Moses
+{
+HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
+ HypothesisStack(manager)
+{
+ m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
+ m_bestScore = -std::numeric_limits<float>::infinity();
+ m_worstScore = -std::numeric_limits<float>::infinity();
+}
+
+/** remove all hypotheses from the collection */
+void HypothesisStackNormal::RemoveAll()
+{
+ while (m_hypos.begin() != m_hypos.end())
+ {
+ Remove(m_hypos.begin());
+ }
+}
+
+pair<HypothesisStackNormal::iterator, bool> HypothesisStackNormal::Add(Hypothesis *hypo)
+{
+ std::pair<iterator, bool> ret = m_hypos.insert(hypo);
+ if (ret.second)
+ { // equiv hypo doesn't exists
+ VERBOSE(3,"added hyp to stack");
+
+ // Update best score, if this hypothesis is new best
+ if (hypo->GetTotalScore() > m_bestScore)
+ {
+ VERBOSE(3,", best on stack");
+ m_bestScore = hypo->GetTotalScore();
+ // this may also affect the worst score
+ if ( m_bestScore + m_beamWidth > m_worstScore )
+ m_worstScore = m_bestScore + m_beamWidth;
+ }
+ // update best/worst score for stack diversity 1
+ if ( m_minHypoStackDiversity == 1 &&
+ hypo->GetTotalScore() > GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) )
+ {
+ SetWorstScoreForBitmap( hypo->GetWordsBitmap().GetID(), hypo->GetTotalScore() );
+ }
+
+ VERBOSE(3,", now size " << m_hypos.size());
+
+ // prune only if stack is twice as big as needed (lazy pruning)
+ size_t toleratedSize = 2*m_maxHypoStackSize-1;
+ // add in room for stack diversity
+ if (m_minHypoStackDiversity)
+ toleratedSize += m_minHypoStackDiversity << StaticData::Instance().GetMaxDistortion();
+ if (m_hypos.size() > toleratedSize)
+ {
+ PruneToSize(m_maxHypoStackSize);
+ }
+ else {
+ VERBOSE(3,std::endl);
+ }
+ }
+
+ return ret;
+}
+
+bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
+{
+ // too bad for stack. don't bother adding hypo into collection
+ if (!StaticData::Instance().GetDisableDiscarding() &&
+ hypo->GetTotalScore() < m_worstScore
+ && ! ( m_minHypoStackDiversity > 0
+ && hypo->GetTotalScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) )
+ {
+ m_manager.GetSentenceStats().AddDiscarded();
+ VERBOSE(3,"discarded, too bad for stack" << std::endl);
+ FREEHYPO(hypo);
+ return false;
+ }
+
+ // over threshold, try to add to collection
+ std::pair<iterator, bool> addRet = Add(hypo);
+ if (addRet.second)
+ { // nothing found. add to collection
+ return true;
+ }
+
+ // equiv hypo exists, recombine with other hypo
+ iterator &iterExisting = addRet.first;
+ Hypothesis *hypoExisting = *iterExisting;
+ assert(iterExisting != m_hypos.end());
+
+ m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
+
+ // found existing hypo with same target ending.
+ // keep the best 1
+ if (hypo->GetTotalScore() > hypoExisting->GetTotalScore())
+ { // incoming hypo is better than the one we have
+ VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
+ if (m_nBestIsEnabled) {
+ hypo->AddArc(hypoExisting);
+ Detach(iterExisting);
+ } else {
+ Remove(iterExisting);
+ }
+
+ bool added = Add(hypo).second;
+ if (!added)
+ {
+ iterExisting = m_hypos.find(hypo);
+ TRACE_ERR("Offending hypo = " << **iterExisting << endl);
+ abort();
+ }
+ return false;
+ }
+ else
+ { // already storing the best hypo. discard current hypo
+ VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
+ if (m_nBestIsEnabled) {
+ hypoExisting->AddArc(hypo);
+ } else {
+ FREEHYPO(hypo);
+ }
+ return false;
+ }
+}
+
+void HypothesisStackNormal::PruneToSize(size_t newSize)
+{
+ if ( size() <= newSize ) return; // ok, if not over the limit
+
+ // we need to store a temporary list of hypotheses
+ vector< Hypothesis* > hypos = GetSortedListNOTCONST();
+ bool* included = (bool*) malloc(sizeof(bool) * hypos.size());
+ for(size_t i=0; i<hypos.size(); i++) included[i] = false;
+
+ // clear out original set
+ for( iterator iter = m_hypos.begin(); iter != m_hypos.end(); )
+ {
+ iterator removeHyp = iter++;
+ Detach(removeHyp);
+ }
+
+ // add best hyps for each coverage according to minStackDiversity
+ if ( m_minHypoStackDiversity > 0 )
+ {
+ map< WordsBitmapID, size_t > diversityCount;
+ for(size_t i=0; i<hypos.size(); i++)
+ {
+ Hypothesis *hyp = hypos[i];
+ WordsBitmapID coverage = hyp->GetWordsBitmap().GetID();;
+ if (diversityCount.find( coverage ) == diversityCount.end())
+ diversityCount[ coverage ] = 0;
+
+ if (diversityCount[ coverage ] < m_minHypoStackDiversity)
+ {
+ m_hypos.insert( hyp );
+ included[i] = true;
+ diversityCount[ coverage ]++;
+ if (diversityCount[ coverage ] == m_minHypoStackDiversity)
+ SetWorstScoreForBitmap( coverage, hyp->GetTotalScore());
+ }
+ }
+ }
+
+ // only add more if stack not full after satisfying minStackDiversity
+ if ( size() < newSize ) {
+
+ // add best remaining hypotheses
+ for(size_t i=0; i<hypos.size()
+ && size() < newSize
+ && hypos[i]->GetTotalScore() > m_bestScore+m_beamWidth; i++)
+ {
+ if (! included[i])
+ {
+ m_hypos.insert( hypos[i] );
+ included[i] = true;
+ if (size() == newSize)
+ m_worstScore = hypos[i]->GetTotalScore();
+ }
+ }
+ }
+
+ // delete hypotheses that have not been included
+ for(size_t i=0; i<hypos.size(); i++)
+ {
+ if (! included[i])
+ {
+ FREEHYPO( hypos[i] );
+ m_manager.GetSentenceStats().AddPruning();
+ }
+ }
+ free(included);
+
+ // some reporting....
+ VERBOSE(3,", pruned to size " << size() << endl);
+ IFVERBOSE(3)
+ {
+ TRACE_ERR("stack now contains: ");
+ for(iterator iter = m_hypos.begin(); iter != m_hypos.end(); iter++)
+ {
+ Hypothesis *hypo = *iter;
+ TRACE_ERR( hypo->GetId() << " (" << hypo->GetTotalScore() << ") ");
+ }
+ TRACE_ERR( endl);
+ }
+}
+
+const Hypothesis *HypothesisStackNormal::GetBestHypothesis() const
+{
+ if (!m_hypos.empty())
+ {
+ const_iterator iter = m_hypos.begin();
+ Hypothesis *bestHypo = *iter;
+ while (++iter != m_hypos.end())
+ {
+ Hypothesis *hypo = *iter;
+ if (hypo->GetTotalScore() > bestHypo->GetTotalScore())
+ bestHypo = hypo;
+ }
+ return bestHypo;
+ }
+ return NULL;
+}
+
+vector<const Hypothesis*> HypothesisStackNormal::GetSortedList() const
+{
+ vector<const Hypothesis*> ret; ret.reserve(m_hypos.size());
+ std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
+ sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
+
+ return ret;
+}
+
+vector<Hypothesis*> HypothesisStackNormal::GetSortedListNOTCONST()
+{
+ vector<Hypothesis*> ret; ret.reserve(m_hypos.size());
+ std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
+ sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
+
+ return ret;
+}
+
+void HypothesisStackNormal::CleanupArcList()
+{
+ // only necessary if n-best calculations are enabled
+ if (!m_nBestIsEnabled) return;
+
+ iterator iter;
+ for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter)
+ {
+ Hypothesis *mainHypo = *iter;
+ mainHypo->CleanupArcList();
+ }
+}
+
+TO_STRING_BODY(HypothesisStackNormal);
+
+
+// friend
+std::ostream& operator<<(std::ostream& out, const HypothesisStackNormal& hypoColl)
+{
+ HypothesisStackNormal::const_iterator iter;
+
+ for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter)
+ {
+ const Hypothesis &hypo = **iter;
+ out << hypo << endl;
+
+ }
+ return out;
+}
+
+
+}
+
diff --git a/moses/src/HypothesisStackNormal.h b/moses/src/HypothesisStackNormal.h
new file mode 100644
index 000000000..d6ad5d67d
--- /dev/null
+++ b/moses/src/HypothesisStackNormal.h
@@ -0,0 +1,137 @@
+// $Id: HypothesisStackNormal.h 1511 2007-11-12 20:21:44Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_HypothesisStackNormal_h
+#define moses_HypothesisStackNormal_h
+
+#include <limits>
+#include <set>
+#include "Hypothesis.h"
+#include "HypothesisStack.h"
+#include "WordsBitmap.h"
+
+namespace Moses
+{
+ // class WordsBitmap;
+ // typedef size_t WordsBitmapID;
+
+/** Stack for instances of Hypothesis, includes functions for pruning. */
+class HypothesisStackNormal: public HypothesisStack
+{
+public:
+ friend std::ostream& operator<<(std::ostream&, const HypothesisStackNormal&);
+
+protected:
+ float m_bestScore; /**< score of the best hypothesis in collection */
+ float m_worstScore; /**< score of the worse hypothesis in collection */
+ map< WordsBitmapID, float > m_diversityWorstScore; /**< score of worst hypothesis for particular source word coverage */
+ float m_beamWidth; /**< minimum score due to threashold pruning */
+ size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
+ size_t m_minHypoStackDiversity; /**< minimum number of hypothesis with different source word coverage */
+ bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
+
+ /** add hypothesis to stack. Prune if necessary.
+ * Returns false if equiv hypo exists in collection, otherwise returns true
+ */
+ std::pair<HypothesisStackNormal::iterator, bool> Add(Hypothesis *hypothesis);
+
+ /** destroy all instances of Hypothesis in this collection */
+ void RemoveAll();
+
+ void SetWorstScoreForBitmap( WordsBitmapID id, float worstScore ) {
+ m_diversityWorstScore[ id ] = worstScore;
+ }
+
+public:
+ float GetWorstScoreForBitmap( WordsBitmapID id ) {
+ if (m_diversityWorstScore.find( id ) == m_diversityWorstScore.end())
+ return -numeric_limits<float>::infinity();
+ return m_diversityWorstScore[ id ];
+ }
+ float GetWorstScoreForBitmap( const WordsBitmap &coverage ) {
+ return GetWorstScoreForBitmap( coverage.GetID() );
+ }
+
+ HypothesisStackNormal(Manager& manager);
+
+ /** adds the hypo, but only if within thresholds (beamThr, stackSize).
+ * This function will recombine hypotheses silently! There is no record
+ * (could affect n-best list generation...TODO)
+ * Call stack for adding hypothesis is
+ AddPrune()
+ Add()
+ AddNoPrune()
+ */
+ bool AddPrune(Hypothesis *hypothesis);
+
+ /** set maximum number of hypotheses in the collection
+ * \param maxHypoStackSize maximum number (typical number: 100)
+ * \param maxHypoStackSize maximum number (defauly: 0)
+ */
+ inline void SetMaxHypoStackSize(size_t maxHypoStackSize, size_t minHypoStackDiversity)
+ {
+ m_maxHypoStackSize = maxHypoStackSize;
+ m_minHypoStackDiversity = minHypoStackDiversity;
+ }
+
+ /** set beam threshold, hypotheses in the stack must not be worse than
+ * this factor times the best score to be allowed in the stack
+ * \param beamThreshold minimum factor (typical number: 0.03)
+ */
+ inline void SetBeamWidth(float beamWidth)
+ {
+ m_beamWidth = beamWidth;
+ }
+ /** return score of the best hypothesis in the stack */
+ inline float GetBestScore() const
+ {
+ return m_bestScore;
+ }
+ /** return worst allowable score */
+ inline float GetWorstScore() const
+ {
+ return m_worstScore;
+ }
+
+ /** pruning, if too large.
+ * Pruning algorithm: find a threshold and delete all hypothesis below it.
+ * The threshold is chosen so that exactly newSize top items remain on the
+ * stack in fact, in situations where some of the hypothesis fell below
+ * m_beamWidth, the stack will contain less items.
+ * \param newSize maximum size */
+ void PruneToSize(size_t newSize);
+
+ //! return the hypothesis with best score. Used to get the translated at end of decoding
+ const Hypothesis *GetBestHypothesis() const;
+ //! return all hypothesis, sorted by descending score. Used in creation of N best list
+ std::vector<const Hypothesis*> GetSortedList() const;
+ std::vector<Hypothesis*> GetSortedListNOTCONST();
+
+ /** make all arcs in point to the equiv hypothesis that contains them.
+ * Ie update doubly linked list be hypo & arcs
+ */
+ void CleanupArcList();
+
+ TO_STRING();
+};
+
+}
+#endif
diff --git a/moses/src/InputFileStream.cpp b/moses/src/InputFileStream.cpp
new file mode 100644
index 000000000..954d36967
--- /dev/null
+++ b/moses/src/InputFileStream.cpp
@@ -0,0 +1,62 @@
+// $Id: InputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "InputFileStream.h"
+#include "gzfilebuf.h"
+#include <iostream>
+
+using namespace std;
+
+namespace Moses
+{
+InputFileStream::InputFileStream(const std::string &filePath)
+: std::istream(NULL)
+, m_streambuf(NULL)
+{
+ if (filePath.size() > 3 &&
+ filePath.substr(filePath.size() - 3, 3) == ".gz")
+ {
+ m_streambuf = new gzfilebuf(filePath.c_str());
+ } else {
+ std::filebuf* fb = new std::filebuf();
+ fb = fb->open(filePath.c_str(), std::ios::in);
+ if (! fb) {
+ cerr << "Can't read " << filePath.c_str() << endl;
+ exit(1);
+ }
+ m_streambuf = fb;
+ }
+ this->init(m_streambuf);
+}
+
+InputFileStream::~InputFileStream()
+{
+ delete m_streambuf;
+ m_streambuf = NULL;
+}
+
+void InputFileStream::Close()
+{
+}
+
+
+}
+
diff --git a/moses/src/InputFileStream.h b/moses/src/InputFileStream.h
new file mode 100644
index 000000000..a0254e1eb
--- /dev/null
+++ b/moses/src/InputFileStream.h
@@ -0,0 +1,48 @@
+// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_InputFileStream_h
+#define moses_InputFileStream_h
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+namespace Moses
+{
+
+/** Used in place of std::istream, can read zipped files if it ends in .gz
+*/
+class InputFileStream : public std::istream
+{
+protected:
+ std::streambuf *m_streambuf;
+public:
+
+ InputFileStream(const std::string &filePath);
+ ~InputFileStream();
+
+ void Close();
+};
+
+}
+
+#endif
diff --git a/moses/src/InputType.cpp b/moses/src/InputType.cpp
new file mode 100644
index 000000000..9bb719404
--- /dev/null
+++ b/moses/src/InputType.cpp
@@ -0,0 +1,59 @@
+// $Id: InputType.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cstdlib>
+
+#include "InputType.h"
+
+namespace Moses
+{
+
+InputType::InputType(long translationId) : m_translationId(translationId) {}
+InputType::~InputType() {}
+
+TO_STRING_BODY(InputType);
+
+std::ostream& operator<<(std::ostream& out,InputType const& x)
+{
+ x.Print(out); return out;
+}
+
+// default implementation is one column equals one word
+int InputType::ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const
+{
+ int dist = 0;
+ if (prev.GetNumWordsCovered() == 0) {
+ dist = current.GetStartPos();
+ } else {
+ dist = (int)prev.GetEndPos() - (int)current.GetStartPos() + 1 ;
+ }
+ return abs(dist);
+}
+
+bool InputType::CanIGetFromAToB(size_t start, size_t end) const
+{
+ return true;
+}
+
+}
+
+
diff --git a/moses/src/InputType.h b/moses/src/InputType.h
new file mode 100644
index 000000000..24e4fab53
--- /dev/null
+++ b/moses/src/InputType.h
@@ -0,0 +1,132 @@
+// $Id: InputType.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_InputType_h
+#define moses_InputType_h
+
+#include <string>
+#include "TypeDef.h"
+#include "Phrase.h"
+#include "TargetPhraseCollection.h"
+#include "ReorderingConstraint.h"
+
+namespace Moses
+{
+
+class WordsRange;
+class Factor;
+class PhraseDictionary;
+class TranslationOptionCollection;
+
+//! base class for sentences and confusion networks
+class InputType
+{
+protected:
+ long m_translationId; //< contiguous Id
+ bool m_hasMetaData;
+ long m_segId;
+ ReorderingConstraint m_reorderingConstraint; /**< limits on reordering specified either by "-mp" switch or xml tags */
+
+public:
+
+ InputType(long translationId = 0);
+ virtual ~InputType();
+
+ virtual InputTypeEnum GetType() const = 0;
+
+ long GetTranslationId() const
+ {
+ return m_translationId;
+ }
+ void SetTranslationId(long translationId)
+ {
+ m_translationId = translationId;
+ }
+ //! returns the number of words moved
+ virtual int ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const;
+
+ //! In a word lattice, tells you if there's a path from node start to node end
+ virtual bool CanIGetFromAToB(size_t start, size_t end) const;
+
+ //! is there a path covering [range] (lattice only, otherwise true)
+ inline bool IsCoveragePossible(const WordsRange& range) const
+ {
+ return CanIGetFromAToB(range.GetStartPos(), range.GetEndPos() + 1);
+ }
+
+ //! In a word lattice, you can't always get from node A to node B
+ inline bool IsExtensionPossible(const WordsRange& prev, const WordsRange& current) const
+ {
+ // return ComputeDistortionDistance(prev, current) < 100000;
+ size_t t = prev.GetEndPos()+1; // 2
+ size_t l = current.GetEndPos()+1; //l=1
+ size_t r = l;
+ if (l<t) { r = t; } else { l = t; } //r=2
+ if (!CanIGetFromAToB(l,r)) return false;
+
+ // there's another check here: a current span may end at a place that previous could get to,
+ // but it may not *START* at a place it can get to. We'll also have to check if we're going left or right
+
+ r = current.GetStartPos();
+ l = prev.GetEndPos()+1;
+ if (l == r) return true;
+ if (prev.GetEndPos() > current.GetStartPos()) {
+ r = prev.GetStartPos();
+ l = current.GetEndPos()+1;
+ if (r == l) return true;
+ }
+ return CanIGetFromAToB(l,r);
+ }
+
+ //! number of words in this sentence/confusion network
+ virtual size_t GetSize() const =0;
+
+ //! populate this InputType with data from in stream
+ virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder) =0;
+
+ //! Output debugging info to stream out
+ virtual void Print(std::ostream&) const =0;
+
+ //! create trans options specific to this InputType
+ virtual TranslationOptionCollection* CreateTranslationOptionCollection() const=0;
+
+ //! return substring. Only valid for Sentence class. TODO - get rid of this fn
+ virtual Phrase GetSubString(const WordsRange&) const =0;
+
+ //! return substring at a particular position. Only valid for Sentence class. TODO - get rid of this fn
+ virtual const Word& GetWord(size_t pos) const=0;
+
+ //! Returns the reordering constraints
+ const ReorderingConstraint& GetReorderingConstraint() const
+ {
+ return m_reorderingConstraint;
+ };
+
+ TO_STRING();
+
+};
+
+std::ostream& operator<<(std::ostream&,InputType const&);
+
+}
+
+#endif
diff --git a/moses/src/LMList.cpp b/moses/src/LMList.cpp
new file mode 100644
index 000000000..44219f776
--- /dev/null
+++ b/moses/src/LMList.cpp
@@ -0,0 +1,54 @@
+// $Id: LMList.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "LMList.h"
+#include "Phrase.h"
+#include "LanguageModelSingleFactor.h"
+#include "ScoreComponentCollection.h"
+
+using namespace std;
+
+namespace Moses
+{
+void LMList::CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, ScoreComponentCollection* breakdown) const
+{
+ const_iterator lmIter;
+ for (lmIter = begin(); lmIter != end(); ++lmIter)
+ {
+ const LanguageModel &lm = **lmIter;
+ const float weightLM = lm.GetWeight();
+
+ float fullScore, nGramScore;
+
+ // do not process, if factors not defined yet (happens in partial translation options)
+ if (!lm.Useable(phrase))
+ continue;
+
+ lm.CalcScore(phrase, fullScore, nGramScore);
+
+ breakdown->Assign(&lm, nGramScore); // I'm not sure why += doesn't work here- it should be 0.0 right?
+ retFullScore += fullScore * weightLM;
+ retNGramScore += nGramScore * weightLM;
+ }
+}
+
+}
+
diff --git a/moses/src/LMList.h b/moses/src/LMList.h
new file mode 100644
index 000000000..7860383db
--- /dev/null
+++ b/moses/src/LMList.h
@@ -0,0 +1,23 @@
+#ifndef moses_LMList_h
+#define moses_LMList_h
+
+#include <list>
+#include "LanguageModel.h"
+
+namespace Moses
+{
+
+class Phrase;
+class ScoreColl;
+class ScoreComponentCollection;
+
+//! List of language models
+class LMList : public std::list < LanguageModel* >
+{
+public:
+ void CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, ScoreComponentCollection* breakdown) const;
+
+};
+
+}
+#endif
diff --git a/moses/src/LVoc.cpp b/moses/src/LVoc.cpp
new file mode 100644
index 000000000..6091c722f
--- /dev/null
+++ b/moses/src/LVoc.cpp
@@ -0,0 +1,7 @@
+#include<limits>
+#include "LVoc.h"
+
+//rather pointless file because LVoc is template all wee need here is the definitions of consts
+
+const LabelId InvalidLabelId = std::numeric_limits<LabelId>::max();
+const LabelId Epsilon = InvalidLabelId-1;
diff --git a/moses/src/LVoc.h b/moses/src/LVoc.h
new file mode 100644
index 000000000..3f4b40439
--- /dev/null
+++ b/moses/src/LVoc.h
@@ -0,0 +1,68 @@
+#ifndef moses_LVoc_h
+#define moses_LVoc_h
+
+#include<map>
+#include<vector>
+#include<iostream>
+#include<fstream>
+#include <sstream>
+
+typedef unsigned LabelId;
+extern const LabelId InvalidLabelId;
+extern const LabelId Epsilon;
+
+typedef std::vector<LabelId> IPhrase;
+
+// A = type of things to numberize, ie, std::string
+// B = map type to use, might consider using hash_map for better performance
+template<typename A,typename B=std::map<A,LabelId> >
+class LVoc {
+ typedef A Key;
+ typedef B M;
+ typedef std::vector<Key> V;
+ M m;
+ V data;
+public:
+ LVoc() {}
+
+ bool isKnown(const Key& k) const {return m.find(k)!=m.end();}
+ LabelId index(const Key& k) const {
+ typename M::const_iterator i=m.find(k);
+ return i!=m.end()? i->second : InvalidLabelId;}
+ LabelId add(const Key& k) {
+ std::pair<typename M::iterator,bool> p
+ =m.insert(std::make_pair(k,data.size()));
+ if(p.second) data.push_back(k);
+ assert(static_cast<size_t>(p.first->second)<data.size());
+ return p.first->second;
+ }
+ Key const& symbol(LabelId i) const {
+ assert(static_cast<size_t>(i)<data.size());
+ return data[i];}
+
+ typedef typename V::const_iterator const_iterator;
+ const_iterator begin() const {return data.begin();}
+ const_iterator end() const {return data.end();}
+
+ void Write(const std::string& fname) const {
+ std::ofstream out(fname.c_str()); Write(out);}
+ void Write(std::ostream& out) const {
+ for(int i=data.size()-1;i>=0;--i)
+ out<<i<<' '<<data[i]<<'\n';
+ }
+ void Read(const std::string& fname) {
+ std::ifstream in(fname.c_str());Read(in);}
+ void Read(std::istream& in) {
+ Key k;size_t i;std::string line;
+ while(getline(in,line)) {
+ std::istringstream is(line);
+ if(is>>i>>k) {
+ if(i>=data.size()) data.resize(i+1);
+ data[i]=k;
+ m[k]=i;
+ }
+ }
+ }
+};
+
+#endif
diff --git a/moses/src/LanguageModel.cpp b/moses/src/LanguageModel.cpp
new file mode 100644
index 000000000..6347d1649
--- /dev/null
+++ b/moses/src/LanguageModel.cpp
@@ -0,0 +1,191 @@
+// $Id: LanguageModel.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <sstream>
+
+#include "FFState.h"
+#include "LanguageModel.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "Manager.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+LanguageModel::LanguageModel(bool registerScore, ScoreIndexManager &scoreIndexManager)
+{
+ if (registerScore)
+ scoreIndexManager.AddScoreProducer(this);
+}
+LanguageModel::~LanguageModel() {}
+
+// don't inline virtual funcs...
+size_t LanguageModel::GetNumScoreComponents() const
+{
+ return 1;
+}
+
+void LanguageModel::CalcScore(const Phrase &phrase
+ , float &fullScore
+ , float &ngramScore) const
+{
+ fullScore = 0;
+ ngramScore = 0;
+
+ size_t phraseSize = phrase.GetSize();
+ vector<const Word*> contextFactor;
+ contextFactor.reserve(m_nGramOrder);
+
+ // start of sentence
+ for (size_t currPos = 0 ; currPos < m_nGramOrder - 1 && currPos < phraseSize ; currPos++)
+ {
+ contextFactor.push_back(&phrase.GetWord(currPos));
+ fullScore += GetValue(contextFactor);
+ }
+
+ if (phraseSize >= m_nGramOrder)
+ {
+ contextFactor.push_back(&phrase.GetWord(m_nGramOrder - 1));
+ ngramScore = GetValue(contextFactor);
+ }
+
+ // main loop
+ for (size_t currPos = m_nGramOrder; currPos < phraseSize ; currPos++)
+ { // used by hypo to speed up lm score calc
+ for (size_t currNGramOrder = 0 ; currNGramOrder < m_nGramOrder - 1 ; currNGramOrder++)
+ {
+ contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];
+ }
+ contextFactor[m_nGramOrder - 1] = &phrase.GetWord(currPos);
+ float partScore = GetValue(contextFactor);
+ ngramScore += partScore;
+ }
+ fullScore += ngramScore;
+}
+
+LanguageModel::State LanguageModel::GetState(const std::vector<const Word*> &contextFactor, unsigned int* len) const
+{
+ State state;
+ unsigned int dummy;
+ if (!len) len = &dummy;
+ GetValue(contextFactor,&state,len);
+ return state;
+}
+
+struct LMState : public FFState {
+ const void* lmstate;
+ LMState(const void* lms) { lmstate = lms; }
+ virtual int Compare(const FFState& o) const {
+ const LMState& other = static_cast<const LMState&>(o);
+ if (other.lmstate > lmstate) return 1;
+ else if (other.lmstate < lmstate) return -1;
+ return 0;
+ }
+};
+
+const FFState* LanguageModel::EmptyHypothesisState() const {
+ return new LMState(NULL);
+}
+
+FFState* LanguageModel::Evaluate(
+ const Hypothesis& hypo,
+ const FFState* ps,
+ ScoreComponentCollection* out) const {
+ // In this function, we only compute the LM scores of n-grams that overlap a
+ // phrase boundary. Phrase-internal scores are taken directly from the
+ // translation option. In the unigram case, there is no overlap, so we don't
+ // need to do anything.
+ if(m_nGramOrder <= 1)
+ return NULL;
+
+ clock_t t=0;
+ IFVERBOSE(2) { t = clock(); } // track time
+ const void* prevlm = ps ? (static_cast<const LMState *>(ps)->lmstate) : NULL;
+ LMState* res = new LMState(prevlm);
+ if (hypo.GetCurrTargetLength() == 0)
+ return res;
+ const size_t currEndPos = hypo.GetCurrTargetWordsRange().GetEndPos();
+ const size_t startPos = hypo.GetCurrTargetWordsRange().GetStartPos();
+
+ // 1st n-gram
+ vector<const Word*> contextFactor(m_nGramOrder);
+ size_t index = 0;
+ for (int currPos = (int) startPos - (int) m_nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)
+ {
+ if (currPos >= 0)
+ contextFactor[index++] = &hypo.GetWord(currPos);
+ else
+ contextFactor[index++] = &GetSentenceStartArray();
+ }
+ float lmScore = GetValue(contextFactor);
+ //cout<<"context factor: "<<GetValue(contextFactor)<<endl;
+
+ // main loop
+ size_t endPos = std::min(startPos + m_nGramOrder - 2
+ , currEndPos);
+ for (size_t currPos = startPos + 1 ; currPos <= endPos ; currPos++)
+ {
+ // shift all args down 1 place
+ for (size_t i = 0 ; i < m_nGramOrder - 1 ; i++)
+ contextFactor[i] = contextFactor[i + 1];
+
+ // add last factor
+ contextFactor.back() = &hypo.GetWord(currPos);
+
+ lmScore += GetValue(contextFactor);
+ }
+
+ // end of sentence
+ if (hypo.IsSourceCompleted())
+ {
+ const size_t size = hypo.GetSize();
+ contextFactor.back() = &GetSentenceEndArray();
+
+ for (size_t i = 0 ; i < m_nGramOrder - 1 ; i ++)
+ {
+ int currPos = (int)(size - m_nGramOrder + i + 1);
+ if (currPos < 0)
+ contextFactor[i] = &GetSentenceStartArray();
+ else
+ contextFactor[i] = &hypo.GetWord((size_t)currPos);
+ }
+ lmScore += GetValue(contextFactor, &res->lmstate);
+ } else {
+ for (size_t currPos = endPos+1; currPos <= currEndPos; currPos++) {
+ for (size_t i = 0 ; i < m_nGramOrder - 1 ; i++)
+ contextFactor[i] = contextFactor[i + 1];
+ contextFactor.back() = &hypo.GetWord(currPos);
+ }
+ res->lmstate = GetState(contextFactor);
+ }
+ out->PlusEquals(this, lmScore);
+ IFVERBOSE(2) { hypo.GetManager().GetSentenceStats().AddTimeCalcLM( clock()-t ); }
+ return res;
+}
+
+}
diff --git a/moses/src/LanguageModel.h b/moses/src/LanguageModel.h
new file mode 100644
index 000000000..1304a412b
--- /dev/null
+++ b/moses/src/LanguageModel.h
@@ -0,0 +1,146 @@
+// $Id: LanguageModel.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LanguageModel_h
+#define moses_LanguageModel_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "FeatureFunction.h"
+#include "Word.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+class Factor;
+class Phrase;
+
+//! Abstract base class which represent a language model on a contiguous phrase
+class LanguageModel : public StatefulFeatureFunction
+{
+protected:
+ float m_weight; //! scoring weight. Shouldn't this now be superceded by ScoreProducer???
+ std::string m_filePath; //! for debugging purposes
+ size_t m_nGramOrder; //! max n-gram length contained in this LM
+ Word m_sentenceStartArray, m_sentenceEndArray; //! Contains factors which represents the beging and end words for this LM.
+ //! Usually <s> and </s>
+
+ /** constructor to be called by inherited class
+ * \param registerScore whether this LM will be directly used to score sentence.
+ * Usually true, except where LM is a component in a composite LM, eg. LanguageModelJoint
+ */
+ LanguageModel(bool registerScore, ScoreIndexManager &scoreIndexManager);
+
+public:
+ /* Returned from LM implementations which points at the state used. For example, if a trigram score was requested
+ * but the LM backed off to using the trigram, the State pointer will point to the bigram.
+ * Used for more agressive pruning of hypothesis
+ */
+ typedef const void* State;
+
+ virtual ~LanguageModel();
+
+ //! see ScoreProducer.h
+ size_t GetNumScoreComponents() const;
+
+ //! Single or multi-factor
+ virtual LMType GetLMType() const = 0;
+
+ /* whether this LM can be used on a particular phrase.
+ * Should return false if phrase size = 0 or factor types required don't exists
+ */
+ virtual bool Useable(const Phrase &phrase) const = 0;
+
+ /* calc total unweighted LM score of this phrase and return score via arguments.
+ * Return scores should always be in natural log, regardless of representation with LM implementation.
+ * Uses GetValue() of inherited class.
+ * Useable() should be called beforehand on the phrase
+ * \param fullScore scores of all unigram, bigram... of contiguous n-gram of the phrase
+ * \param ngramScore score of only n-gram of order m_nGramOrder
+ */
+ void CalcScore(const Phrase &phrase
+ , float &fullScore
+ , float &ngramScore) const;
+ /* get score of n-gram. n-gram should not be bigger than m_nGramOrder
+ * Specific implementation can return State and len data to be used in hypothesis pruning
+ * \param contextFactor n-gram to be scored
+ * \param finalState state used by LM. Return arg
+ * \param len ???
+ */
+ virtual float GetValue(const std::vector<const Word*> &contextFactor
+ , State* finalState = 0
+ , unsigned int* len = 0) const = 0;
+ //! get State for a particular n-gram
+ State GetState(const std::vector<const Word*> &contextFactor, unsigned int* len = 0) const;
+
+ //! max n-gram order of LM
+ size_t GetNGramOrder() const
+ {
+ return m_nGramOrder;
+ }
+
+ //! Contains factors which represents the beging and end words for this LM. Usually <s> and </s>
+ const Word &GetSentenceStartArray() const
+ {
+ return m_sentenceStartArray;
+ }
+ const Word &GetSentenceEndArray() const
+ {
+ return m_sentenceEndArray;
+ }
+
+ //! scoring weight. Shouldn't this now be superceded by ScoreProducer???
+ float GetWeight() const
+ {
+ return m_weight;
+ }
+ void SetWeight(float weight)
+ {
+ m_weight = weight;
+ }
+
+ virtual std::string GetScoreProducerDescription() const = 0;
+
+ std::string GetScoreProducerWeightShortName() const
+ {
+ return "lm";
+ }
+
+ //! overrideable funtions for IRST LM to cleanup. Maybe something to do with on demand/cache loading/unloading
+ virtual void InitializeBeforeSentenceProcessing(){};
+ virtual void CleanUpAfterSentenceProcessing() {};
+
+ virtual const FFState* EmptyHypothesisState() const;
+
+ virtual FFState* Evaluate(
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
+
+};
+
+}
+
+#endif
diff --git a/moses/src/LanguageModelFactory.cpp b/moses/src/LanguageModelFactory.cpp
new file mode 100644
index 000000000..12d337d82
--- /dev/null
+++ b/moses/src/LanguageModelFactory.cpp
@@ -0,0 +1,151 @@
+// $Id: LanguageModelFactory.cpp 2180 2009-02-18 11:35:41Z hieuhoang1972 $
+
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <iostream>
+#include "LanguageModelFactory.h"
+#include "UserMessage.h"
+#include "TypeDef.h"
+#include "FactorCollection.h"
+
+// include appropriate header
+#ifdef LM_SRI
+# include "LanguageModelSRI.h"
+#endif
+#ifdef LM_IRST
+# include "LanguageModelIRST.h"
+#endif
+#ifdef LM_RAND
+# include "LanguageModelRandLM.h"
+#endif
+#ifdef LM_REMOTE
+# include "LanguageModelRemote.h"
+#endif
+
+#include "LanguageModelInternal.h"
+#include "LanguageModelSkip.h"
+#include "LanguageModelJoint.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+namespace LanguageModelFactory
+{
+
+ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
+ , const std::vector<FactorType> &factorTypes
+ , size_t nGramOrder
+ , const std::string &languageModelFile
+ , float weight
+ , ScoreIndexManager &scoreIndexManager
+ , int dub)
+ {
+ LanguageModel *lm = NULL;
+ switch (lmImplementation)
+ {
+ case RandLM:
+ #ifdef LM_RAND
+ lm = new LanguageModelRandLM(true,
+ scoreIndexManager);
+ #endif
+ break;
+ case Remote:
+ #ifdef LM_REMOTE
+ lm = new LanguageModelRemote(true,scoreIndexManager);
+ #endif
+ break;
+
+ case SRI:
+ #ifdef LM_SRI
+ lm = new LanguageModelSRI(true, scoreIndexManager);
+ #elif LM_INTERNAL
+ lm = new LanguageModelInternal(true, scoreIndexManager);
+ #endif
+ break;
+ case IRST:
+ #ifdef LM_IRST
+ lm = new LanguageModelIRST(true, scoreIndexManager, dub);
+ #endif
+ break;
+ case Skip:
+ #ifdef LM_SRI
+ lm = new LanguageModelSkip(new LanguageModelSRI(false, scoreIndexManager)
+ , true
+ , scoreIndexManager);
+ #elif LM_INTERNAL
+ lm = new LanguageModelSkip(new LanguageModelInternal(false, scoreIndexManager)
+ , true
+ , scoreIndexManager);
+ #endif
+ break;
+ case Joint:
+ #ifdef LM_SRI
+ lm = new LanguageModelJoint(new LanguageModelSRI(false, scoreIndexManager)
+ , true
+ , scoreIndexManager);
+ #elif LM_INTERNAL
+ lm = new LanguageModelJoint(new LanguageModelInternal(false, scoreIndexManager)
+ , true
+ , scoreIndexManager);
+ #endif
+ break;
+ case Internal:
+ #ifdef LM_INTERNAL
+ lm = new LanguageModelInternal(true, scoreIndexManager);
+ #endif
+ break;
+ }
+
+ if (lm == NULL)
+ {
+ UserMessage::Add("Language model type unknown. Probably not compiled into library");
+ }
+ else
+ {
+ switch (lm->GetLMType())
+ {
+ case SingleFactor:
+ if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorTypes[0], weight, nGramOrder))
+ {
+ cerr << "single factor model failed" << endl;
+ delete lm;
+ lm = NULL;
+ }
+ break;
+ case MultiFactor:
+ if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorTypes, weight, nGramOrder))
+ {
+ cerr << "multi factor model failed" << endl;
+ delete lm;
+ lm = NULL;
+ }
+ break;
+ }
+ }
+
+ return lm;
+ }
+}
+
+}
+
diff --git a/moses/src/LanguageModelFactory.h b/moses/src/LanguageModelFactory.h
new file mode 100644
index 000000000..c0aea8f88
--- /dev/null
+++ b/moses/src/LanguageModelFactory.h
@@ -0,0 +1,34 @@
+// $Id: LanguageModelFactory.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_LanguageModelFactory_h
+#define moses_LanguageModelFactory_h
+
+#include <string>
+#include <vector>
+#include "TypeDef.h"
+
+namespace Moses
+{
+
+class LanguageModel;
+class ScoreIndexManager;
+
+namespace LanguageModelFactory {
+
+ /**
+ * creates a language model that will use the appropriate
+ * language model toolkit as its underlying implementation
+ */
+ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
+ , const std::vector<FactorType> &factorTypes
+ , size_t nGramOrder
+ , const std::string &languageModelFile
+ , float weight
+ , ScoreIndexManager &scoreIndexManager
+ , int dub);
+
+};
+
+}
+
+#endif
diff --git a/moses/src/LanguageModelIRST.cpp b/moses/src/LanguageModelIRST.cpp
new file mode 100644
index 000000000..1e8d206f2
--- /dev/null
+++ b/moses/src/LanguageModelIRST.cpp
@@ -0,0 +1,236 @@
+// $Id: LanguageModelIRST.cpp 2650 2010-01-09 19:00:37Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <fstream>
+#include "dictionary.h"
+#include "n_gram.h"
+#include "lmtable.h"
+#include "lmmacro.h"
+
+
+#include "LanguageModelIRST.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+LanguageModelIRST::LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager, int dub)
+:LanguageModelSingleFactor(registerScore, scoreIndexManager)
+,m_lmtb(0),m_lmtb_dub(dub)
+{
+}
+
+LanguageModelIRST::~LanguageModelIRST()
+{
+ delete m_lmtb;
+ delete m_lmtb_ng;
+}
+
+
+bool LanguageModelIRST::Load(const std::string &filePath,
+ FactorType factorType,
+ float weight,
+ size_t nGramOrder)
+{
+ char *SepString = " \t\n";
+ cerr << "In LanguageModelIRST::Load: nGramOrder = " << nGramOrder << "\n";
+
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ m_factorType = factorType;
+ m_weight = weight;
+ m_nGramOrder = nGramOrder;
+
+ // get name of LM file and, if any, of the micro-macro map file
+ char *filenamesOrig = strdup(filePath.c_str());
+ char *filenames = filenamesOrig;
+ m_filePath = strsep(&filenames, SepString);
+
+ // Open the input file (possibly gzipped)
+ InputFileStream inp(m_filePath);
+
+ if (filenames) {
+ // case LMfile + MAPfile: create an object of lmmacro class and load both LM file and map
+ cerr << "Loading LM file + MAP\n";
+ m_mapFilePath = strsep(&filenames, SepString);
+ if (!FileExists(m_mapFilePath)) {
+ cerr << "ERROR: Map file <" << m_mapFilePath << "> does not exist\n";
+ free(filenamesOrig);
+ return false;
+ }
+ InputFileStream inpMap(m_mapFilePath);
+ m_lmtb = new lmmacro(m_filePath, inp, inpMap);
+
+
+ } else {
+ // case (standard) LMfile only: create an object of lmtable
+ cerr << "Loading LM file (no MAP)\n";
+ m_lmtb = (lmtable *)new lmtable;
+
+ // Load the (possibly binary) model
+#ifdef WIN32
+ m_lmtb->load(inp); //don't use memory map
+#else
+ if (m_filePath.compare(m_filePath.size()-3,3,".mm")==0)
+ m_lmtb->load(inp,m_filePath.c_str(),NULL,1);
+ else
+ m_lmtb->load(inp,m_filePath.c_str(),NULL,0);
+#endif
+
+ }
+
+ m_lmtb_ng=new ngram(m_lmtb->getDict()); // ngram of words/micro tags
+ m_lmtb_size=m_lmtb->maxlevel();
+
+ // LM can be ok, just outputs warnings
+
+ // Mauro: in the original, the following two instructions are wrongly switched:
+ m_unknownId = m_lmtb->getDict()->oovcode(); // at the level of micro tags
+ CreateFactors(factorCollection);
+
+ VERBOSE(1, "IRST: m_unknownId=" << m_unknownId << std::endl);
+
+ //install caches
+ m_lmtb->init_probcache();
+ m_lmtb->init_statecache();
+ m_lmtb->init_lmtcaches(m_lmtb->maxlevel()>2?m_lmtb->maxlevel()-1:2);
+
+ if (m_lmtb_dub >0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
+
+ free(filenamesOrig);
+ return true;
+}
+
+void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection)
+{ // add factors which have srilm id
+ // code copied & paste from SRI LM class. should do template function
+ std::map<size_t, int> lmIdMap;
+ size_t maxFactorId = 0; // to create lookup vector later on
+
+ dict_entry *entry;
+ dictionary_iter iter(m_lmtb->getDict()); // at the level of micro tags
+ while ( (entry = iter.next()) != NULL)
+ {
+ size_t factorId = factorCollection.AddFactor(Output, m_factorType, entry->word)->GetId();
+ lmIdMap[factorId] = entry->code;
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ }
+
+ size_t factorId;
+
+ m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
+ factorId = m_sentenceStart->GetId();
+ m_lmtb_sentenceStart=lmIdMap[factorId] = GetLmID(BOS_);
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceStartArray[m_factorType] = m_sentenceStart;
+
+ m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
+ factorId = m_sentenceEnd->GetId();
+ m_lmtb_sentenceEnd=lmIdMap[factorId] = GetLmID(EOS_);
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+
+ // add to lookup vector in object
+ m_lmIdLookup.resize(maxFactorId+1);
+
+ fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId);
+
+ map<size_t, int>::iterator iterMap;
+ for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap)
+ {
+ m_lmIdLookup[iterMap->first] = iterMap->second;
+ }
+
+
+}
+
+int LanguageModelIRST::GetLmID( const std::string &str ) const
+{
+ return m_lmtb->getDict()->encode( str.c_str() ); // at the level of micro tags
+}
+
+float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const
+{
+ unsigned int dummy;
+ if (!len) { len = &dummy; }
+ FactorType factorType = GetFactorType();
+
+ // set up context
+ size_t count = contextFactor.size();
+
+ m_lmtb_ng->size=0;
+ if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
+ if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);
+
+ for (size_t i = 0 ; i < count ; i++)
+ {
+ //int lmId = GetLmID((*contextFactor[i])[factorType]);
+#ifdef DEBUG
+ cout << "i=" << i << " -> " << (*contextFactor[i])[factorType]->GetString() << "\n";
+#endif
+ int lmId = GetLmID((*contextFactor[i])[factorType]->GetString());
+ // cerr << (*contextFactor[i])[factorType]->GetString() << " = " << lmId;
+ m_lmtb_ng->pushc(lmId);
+ }
+
+ if (finalState){
+ *finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);
+ // back off stats not currently available
+ *len = 0;
+ }
+
+ float prob = m_lmtb->clprob(*m_lmtb_ng);
+
+
+ return TransformIRSTScore(prob);
+}
+
+
+void LanguageModelIRST::CleanUpAfterSentenceProcessing(){
+ TRACE_ERR( "reset caches\n");
+ m_lmtb->reset_caches();
+
+#ifndef WIN32
+ TRACE_ERR( "reset mmap\n");
+ m_lmtb->reset_mmap();
+#endif
+
+}
+
+void LanguageModelIRST::InitializeBeforeSentenceProcessing(){
+ //nothing to do
+#ifdef TRACE_CACHE
+ m_lmtb->sentence_id++;
+#endif
+}
+
+}
+
diff --git a/moses/src/LanguageModelIRST.h b/moses/src/LanguageModelIRST.h
new file mode 100644
index 000000000..378c8fffc
--- /dev/null
+++ b/moses/src/LanguageModelIRST.h
@@ -0,0 +1,88 @@
+// $Id: LanguageModelIRST.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelIRST_h
+#define moses_LanguageModelIRST_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "LanguageModelSingleFactor.h"
+
+class lmtable; // irst lm table
+class lmmacro; // irst lm for macro tags
+class ngram;
+
+namespace Moses
+{
+class Phrase;
+
+/** Implementation of single factor LM using IRST's code.
+* This is the default LM for Moses and is available from the same sourceforge repository
+*/
+class LanguageModelIRST : public LanguageModelSingleFactor
+{
+protected:
+ std::vector<int> m_lmIdLookup;
+ lmtable* m_lmtb;
+ ngram* m_lmtb_ng;
+
+ int m_unknownId;
+ int m_lmtb_sentenceStart; //lmtb symbols to initialize ngram with
+ int m_lmtb_sentenceEnd; //lmt symbol to initialize ngram with
+ int m_lmtb_size; //max ngram stored in the table
+ int m_lmtb_dub; //dictionary upperboud
+
+ std::string m_mapFilePath;
+
+// float GetValue(LmId wordId, ngram *context) const;
+
+ void CreateFactors(FactorCollection &factorCollection);
+ int GetLmID( const std::string &str ) const;
+
+ int GetLmID( const Factor *factor ) const{
+ size_t factorId = factor->GetId();
+ return ( factorId >= m_lmIdLookup.size()) ? m_unknownId : m_lmIdLookup[factorId];
+ };
+
+public:
+ LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager, int dub);
+ ~LanguageModelIRST();
+ bool Load(const std::string &filePath
+ , FactorType factorType
+ , float weight
+ , size_t nGramOrder);
+
+ virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
+
+ void CleanUpAfterSentenceProcessing();
+ void InitializeBeforeSentenceProcessing();
+
+ void set_dictionary_upperbound(int dub){ m_lmtb_size=dub ;
+//m_lmtb->set_dictionary_upperbound(dub);
+};
+};
+
+}
+
+#endif
diff --git a/moses/src/LanguageModelInternal.cpp b/moses/src/LanguageModelInternal.cpp
new file mode 100644
index 000000000..13e6dacfa
--- /dev/null
+++ b/moses/src/LanguageModelInternal.cpp
@@ -0,0 +1,272 @@
+
+#include "LanguageModelInternal.h"
+#include "FactorCollection.h"
+#include "NGramNode.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+LanguageModelInternal::LanguageModelInternal(bool registerScore, ScoreIndexManager &scoreIndexManager)
+:LanguageModelSingleFactor(registerScore, scoreIndexManager)
+{
+}
+
+bool LanguageModelInternal::Load(const std::string &filePath
+ , FactorType factorType
+ , float weight
+ , size_t nGramOrder)
+{
+ assert(nGramOrder <= 3);
+ if (nGramOrder > 3)
+ {
+ UserMessage::Add("Can only do up to trigram. Aborting");
+ abort();
+ }
+
+ VERBOSE(1, "Loading Internal LM: " << filePath << endl);
+
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ m_filePath = filePath;
+ m_factorType = factorType;
+ m_weight = weight;
+ m_nGramOrder = nGramOrder;
+
+ // make sure start & end tags in factor collection
+ m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
+ m_sentenceStartArray[m_factorType] = m_sentenceStart;
+
+ m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
+ m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+
+ // read in file
+ VERBOSE(1, filePath << endl);
+
+ InputFileStream inFile(filePath);
+
+ // to create lookup vector later on
+ size_t maxFactorId = 0;
+ map<size_t, const NGramNode*> lmIdMap;
+
+ string line;
+ int lineNo = 0;
+
+ while( !getline(inFile, line, '\n').eof())
+ {
+ lineNo++;
+
+ if (line.size() != 0 && line.substr(0,1) != "\\")
+ {
+ vector<string> tokens = Tokenize(line, "\t");
+ if (tokens.size() >= 2)
+ {
+ // split unigram/bigram trigrams
+ vector<string> factorStr = Tokenize(tokens[1], " ");
+
+ // create / traverse down tree
+ NGramCollection *ngramColl = &m_map;
+ NGramNode *nGram;
+ const Factor *factor;
+ for (int currFactor = (int) factorStr.size() - 1 ; currFactor >= 0 ; currFactor--)
+ {
+ factor = factorCollection.AddFactor(Output, m_factorType, factorStr[currFactor]);
+ nGram = ngramColl->GetOrCreateNGram(factor);
+
+ ngramColl = nGram->GetNGramColl();
+
+ }
+
+ NGramNode *rootNGram = m_map.GetNGram(factor);
+ nGram->SetRootNGram(rootNGram);
+
+ // create vector of factors used in this LM
+ size_t factorId = factor->GetId();
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ lmIdMap[factorId] = rootNGram;
+ //factorCollection.SetFactorLmId(factor, rootNGram);
+
+ float score = TransformSRIScore(Scan<float>(tokens[0]));
+ nGram->SetScore( score );
+ if (tokens.size() == 3)
+ {
+ float logBackOff = TransformSRIScore(Scan<float>(tokens[2]));
+ nGram->SetLogBackOff( logBackOff );
+ }
+ else
+ {
+ nGram->SetLogBackOff( 0 );
+ }
+ }
+ }
+ }
+
+ // add to lookup vector in object
+ m_lmIdLookup.resize(maxFactorId+1);
+ fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), static_cast<const NGramNode*>(NULL));
+
+ map<size_t, const NGramNode*>::iterator iterMap;
+ for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap)
+ {
+ m_lmIdLookup[iterMap->first] = iterMap->second;
+ }
+
+ return true;
+}
+
+float LanguageModelInternal::GetValue(const std::vector<const Word*> &contextFactor
+ , State* finalState
+ , unsigned int* len) const
+{
+ const size_t ngram = contextFactor.size();
+ switch (ngram)
+ {
+ case 1: return GetValue((*contextFactor[0])[m_factorType], finalState); break;
+ case 2: return GetValue((*contextFactor[0])[m_factorType]
+ , (*contextFactor[1])[m_factorType], finalState); break;
+ case 3: return GetValue((*contextFactor[0])[m_factorType]
+ , (*contextFactor[1])[m_factorType]
+ , (*contextFactor[2])[m_factorType], finalState); break;
+ }
+
+ assert (false);
+ return 0;
+}
+
+float LanguageModelInternal::GetValue(const Factor *factor0, State* finalState) const
+{
+ float prob;
+ const NGramNode *nGram = GetLmID(factor0);
+ if (nGram == NULL)
+ {
+ if (finalState != NULL)
+ *finalState = NULL;
+ prob = -numeric_limits<float>::infinity();
+ }
+ else
+ {
+ if (finalState != NULL)
+ *finalState = static_cast<const void*>(nGram);
+ prob = nGram->GetScore();
+ }
+ return FloorScore(prob);
+}
+float LanguageModelInternal::GetValue(const Factor *factor0, const Factor *factor1, State* finalState) const
+{
+ float score;
+ const NGramNode *nGram[2];
+
+ nGram[1] = GetLmID(factor1);
+ if (nGram[1] == NULL)
+ {
+ if (finalState != NULL)
+ *finalState = NULL;
+ score = -numeric_limits<float>::infinity();
+ }
+ else
+ {
+ nGram[0] = nGram[1]->GetNGram(factor0);
+ if (nGram[0] == NULL)
+ { // something unigram
+ if (finalState != NULL)
+ *finalState = static_cast<const void*>(nGram[1]);
+
+ nGram[0] = GetLmID(factor0);
+ if (nGram[0] == NULL)
+ { // stops at unigram
+ score = nGram[1]->GetScore();
+ }
+ else
+ { // unigram unigram
+ score = nGram[1]->GetScore() + nGram[0]->GetLogBackOff();
+ }
+ }
+ else
+ { // bigram
+ if (finalState != NULL)
+ *finalState = static_cast<const void*>(nGram[0]);
+ score = nGram[0]->GetScore();
+ }
+ }
+
+ return FloorScore(score);
+
+}
+
+float LanguageModelInternal::GetValue(const Factor *factor0, const Factor *factor1, const Factor *factor2, State* finalState) const
+{
+ float score;
+ const NGramNode *nGram[3];
+
+ nGram[2] = GetLmID(factor2);
+ if (nGram[2] == NULL)
+ {
+ if (finalState != NULL)
+ *finalState = NULL;
+ score = -numeric_limits<float>::infinity();
+ }
+ else
+ {
+ nGram[1] = nGram[2]->GetNGram(factor1);
+ if (nGram[1] == NULL)
+ { // something unigram
+ if (finalState != NULL)
+ *finalState = static_cast<const void*>(nGram[2]);
+
+ nGram[1] = GetLmID(factor1);
+ if (nGram[1] == NULL)
+ { // stops at unigram
+ score = nGram[2]->GetScore();
+ }
+ else
+ {
+ nGram[0] = nGram[1]->GetNGram(factor0);
+ if (nGram[0] == NULL)
+ { // unigram unigram
+ score = nGram[2]->GetScore() + nGram[1]->GetLogBackOff();
+ }
+ else
+ { // unigram bigram
+ score = nGram[2]->GetScore() + nGram[1]->GetLogBackOff() + nGram[0]->GetLogBackOff();
+ }
+ }
+ }
+ else
+ { // trigram, or something bigram
+ nGram[0] = nGram[1]->GetNGram(factor0);
+ if (nGram[0] != NULL)
+ { // trigram
+ if (finalState != NULL)
+ *finalState = static_cast<const void*>(nGram[0]);
+ score = nGram[0]->GetScore();
+ }
+ else
+ {
+ if (finalState != NULL)
+ *finalState = static_cast<const void*>(nGram[1]);
+
+ score = nGram[1]->GetScore();
+ nGram[1] = nGram[1]->GetRootNGram();
+ nGram[0] = nGram[1]->GetNGram(factor0);
+ if (nGram[0] == NULL)
+ { // just bigram
+ // do nothing
+ }
+ else
+ {
+ score += nGram[0]->GetLogBackOff();
+ }
+
+ }
+ // else do nothing. just use 1st bigram
+ }
+ }
+
+ return FloorScore(score);
+
+}
+
+}
+
diff --git a/moses/src/LanguageModelInternal.h b/moses/src/LanguageModelInternal.h
new file mode 100644
index 000000000..2113bfe37
--- /dev/null
+++ b/moses/src/LanguageModelInternal.h
@@ -0,0 +1,41 @@
+#ifndef moses_LanguageModelInternal_h
+#define moses_LanguageModelInternal_h
+
+#include "LanguageModelSingleFactor.h"
+#include "NGramCollection.h"
+
+namespace Moses
+{
+
+/** Guaranteed cross-platform LM implementation designed to mimic LM used in regression tests
+*/
+class LanguageModelInternal : public LanguageModelSingleFactor
+{
+protected:
+ std::vector<const NGramNode*> m_lmIdLookup;
+ NGramCollection m_map;
+
+ const NGramNode* GetLmID( const Factor *factor ) const
+ {
+ size_t factorId = factor->GetId();
+ return ( factorId >= m_lmIdLookup.size()) ? NULL : m_lmIdLookup[factorId];
+ };
+
+ float GetValue(const Factor *factor0, State* finalState) const;
+ float GetValue(const Factor *factor0, const Factor *factor1, State* finalState) const;
+ float GetValue(const Factor *factor0, const Factor *factor1, const Factor *factor2, State* finalState) const;
+
+public:
+ LanguageModelInternal(bool registerScore, ScoreIndexManager &scoreIndexManager);
+ bool Load(const std::string &filePath
+ , FactorType factorType
+ , float weight
+ , size_t nGramOrder);
+ float GetValue(const std::vector<const Word*> &contextFactor
+ , State* finalState = 0
+ , unsigned int* len = 0) const;
+};
+
+}
+
+#endif
diff --git a/moses/src/LanguageModelJoint.cpp b/moses/src/LanguageModelJoint.cpp
new file mode 100644
index 000000000..33c6e4fd9
--- /dev/null
+++ b/moses/src/LanguageModelJoint.cpp
@@ -0,0 +1,22 @@
+// $Id: LanguageModelJoint.cpp 886 2006-10-17 11:07:17Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "LanguageModelJoint.h"
diff --git a/moses/src/LanguageModelJoint.h b/moses/src/LanguageModelJoint.h
new file mode 100644
index 000000000..2dd3c14e2
--- /dev/null
+++ b/moses/src/LanguageModelJoint.h
@@ -0,0 +1,133 @@
+// $Id: LanguageModelJoint.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelJoint_h
+#define moses_LanguageModelJoint_h
+
+#include <vector>
+#include <string>
+#include <sstream>
+#include "LanguageModelSingleFactor.h"
+#include "LanguageModelMultiFactor.h"
+#include "Word.h"
+#include "FactorTypeSet.h"
+#include "FactorCollection.h"
+
+namespace Moses
+{
+
+class Phrase;
+class FactorCollection;
+
+/** LM of multiple factors. A simple extension of single factor LM - factors backoff together.
+ * Rather slow as this uses string concatenation/split
+*/
+class LanguageModelJoint : public LanguageModelMultiFactor
+{
+protected:
+ LanguageModelSingleFactor *m_lmImpl;
+ std::vector<FactorType> m_factorTypesOrdered;
+
+ size_t m_implFactor;
+public:
+ LanguageModelJoint(LanguageModelSingleFactor *lmImpl, bool registerScore, ScoreIndexManager &scoreIndexManager)
+ :LanguageModelMultiFactor(registerScore, scoreIndexManager)
+ {
+ m_lmImpl = lmImpl;
+ }
+
+ ~LanguageModelJoint()
+ {
+ delete m_lmImpl;
+ }
+
+ bool Load(const std::string &filePath
+ , const std::vector<FactorType> &factorTypes
+ , float weight
+ , size_t nGramOrder)
+ {
+ m_factorTypes = FactorMask(factorTypes);
+ m_weight = weight;
+ m_filePath = filePath;
+ m_nGramOrder = nGramOrder;
+
+ m_factorTypesOrdered= factorTypes;
+ m_implFactor = 0;
+
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ // sentence markers
+ for (size_t index = 0 ; index < factorTypes.size() ; ++index)
+ {
+ FactorType factorType = factorTypes[index];
+ m_sentenceStartArray[factorType] = factorCollection.AddFactor(Output, factorType, BOS_);
+ m_sentenceEndArray[factorType] = factorCollection.AddFactor(Output, factorType, EOS_);
+ }
+
+ return m_lmImpl->Load(filePath, m_implFactor, weight, nGramOrder);
+ }
+
+ float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
+ {
+ if (contextFactor.size() == 0)
+ {
+ return 0;
+ }
+
+ // joint context for internal LM
+ std::vector<const Word*> jointContext;
+
+ for (size_t currPos = 0 ; currPos < m_nGramOrder ; ++currPos )
+ {
+ const Word &word = *contextFactor[currPos];
+
+ // add word to chunked context
+ std::stringstream stream("");
+
+ const Factor *factor = word[ m_factorTypesOrdered[0] ];
+ stream << factor->GetString();
+
+ for (size_t index = 1 ; index < m_factorTypesOrdered.size() ; ++index)
+ {
+ FactorType factorType = m_factorTypesOrdered[index];
+ const Factor *factor = word[factorType];
+ stream << "|" << factor->GetString();
+ }
+
+ factor = FactorCollection::Instance().AddFactor(Output, m_implFactor, stream.str());
+
+ Word* jointWord = new Word;
+ jointWord->SetFactor(m_implFactor, factor);
+ jointContext.push_back(jointWord);
+ }
+
+ // calc score on chunked phrase
+ float ret = m_lmImpl->GetValue(jointContext, finalState, len);
+
+ RemoveAllInColl(jointContext);
+
+ return ret;
+ }
+
+};
+
+}
+#endif
diff --git a/moses/src/LanguageModelMultiFactor.cpp b/moses/src/LanguageModelMultiFactor.cpp
new file mode 100644
index 000000000..d8a8263a3
--- /dev/null
+++ b/moses/src/LanguageModelMultiFactor.cpp
@@ -0,0 +1,56 @@
+// $Id: LanguageModelMultiFactor.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "LanguageModelMultiFactor.h"
+#include "Phrase.h"
+
+namespace Moses
+{
+LanguageModelMultiFactor::LanguageModelMultiFactor(bool registerScore, ScoreIndexManager &scoreIndexManager)
+:LanguageModel(registerScore, scoreIndexManager)
+{}
+
+std::string LanguageModelMultiFactor::GetScoreProducerDescription() const
+{
+ std::ostringstream oss;
+ // what about LMs that are over multiple factors at once, POS + stem, for example?
+ oss << GetNGramOrder() << "-gram LM score, factor-type= ??? " << ", file=" << m_filePath;
+ return oss.str();
+}
+
+bool LanguageModelMultiFactor::Useable(const Phrase &phrase) const
+{
+ if (phrase.GetSize()==0)
+ return false;
+
+ // whether phrase contains all factors in this LM
+ const Word &word = phrase.GetWord(0);
+ for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; ++currFactor)
+ {
+ if (m_factorTypes[currFactor] && word[currFactor] == NULL)
+ return false;
+ }
+ return true;
+
+}
+
+}
+
diff --git a/moses/src/LanguageModelMultiFactor.h b/moses/src/LanguageModelMultiFactor.h
new file mode 100644
index 000000000..1b3df4d38
--- /dev/null
+++ b/moses/src/LanguageModelMultiFactor.h
@@ -0,0 +1,60 @@
+// $Id: LanguageModelMultiFactor.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelMultiFactor_h
+#define moses_LanguageModelMultiFactor_h
+
+#include <vector>
+#include <string>
+#include "LanguageModel.h"
+#include "Word.h"
+#include "FactorTypeSet.h"
+
+namespace Moses
+{
+
+class Phrase;
+
+//! Abstract class for for multi factor LM
+class LanguageModelMultiFactor : public LanguageModel
+{
+protected:
+ FactorMask m_factorTypes;
+
+ LanguageModelMultiFactor(bool registerScore, ScoreIndexManager &scoreIndexManager);
+
+public:
+ virtual bool Load(const std::string &filePath
+ , const std::vector<FactorType> &factorTypes
+ , float weight
+ , size_t nGramOrder) = 0;
+
+ LMType GetLMType() const
+ {
+ return MultiFactor;
+ }
+
+ std::string GetScoreProducerDescription() const;
+ bool Useable(const Phrase &phrase) const;
+};
+
+}
+#endif
diff --git a/moses/src/LanguageModelRandLM.cpp b/moses/src/LanguageModelRandLM.cpp
new file mode 100644
index 000000000..805878e0c
--- /dev/null
+++ b/moses/src/LanguageModelRandLM.cpp
@@ -0,0 +1,114 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <fstream>
+
+#include "LanguageModelRandLM.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+
+bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType, float weight,
+ size_t nGramOrder) {
+ cerr << "Loading LanguageModelRandLM..." << endl;
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ m_filePath = filePath;
+ m_factorType = factorType;
+ m_weight = weight;
+ m_nGramOrder = nGramOrder;
+ int cache_MB = 50; // increase cache size
+ m_lm = randlm::RandLM::initRandLM(filePath, nGramOrder, cache_MB);
+ assert(m_lm != NULL);
+ // get special word ids
+ m_oov_id = m_lm->getWordID(m_lm->getOOV());
+ CreateFactors(factorCollection);
+ return true;
+}
+
+void LanguageModelRandLM::CreateFactors(FactorCollection &factorCollection) { // add factors which have randlm id
+ // code copied & paste from SRI LM class. should do template function
+ // first get all bf vocab in map
+ std::map<size_t, randlm::WordID> randlm_ids_map; // map from factor id -> randlm id
+ size_t maxFactorId = 0; // to create lookup vector later on
+ for(std::map<randlm::Word, randlm::WordID>::const_iterator vIter = m_lm->vocabStart();
+ vIter != m_lm->vocabEnd(); vIter++){
+ // get word from randlm vocab and associate with (new) factor id
+ size_t factorId=factorCollection.AddFactor(Output,m_factorType,vIter->first)->GetId();
+ randlm_ids_map[factorId] = vIter->second;
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ }
+ // add factors for BOS and EOS and store bf word ids
+ size_t factorId;
+ m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, m_lm->getBOS());
+ factorId = m_sentenceStart->GetId();
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceStartArray[m_factorType] = m_sentenceStart;
+
+ m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, m_lm->getEOS());
+ factorId = m_sentenceEnd->GetId();
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+
+ // add to lookup vector in object
+ m_randlm_ids_vec.resize(maxFactorId+1);
+ // fill with OOV code
+ fill(m_randlm_ids_vec.begin(), m_randlm_ids_vec.end(), m_oov_id);
+
+ for (map<size_t, randlm::WordID>::const_iterator iter = randlm_ids_map.begin();
+ iter != randlm_ids_map.end() ; ++iter)
+ m_randlm_ids_vec[iter->first] = iter->second;
+
+}
+
+randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const {
+ return m_lm->getWordID(str);
+}
+
+float LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
+ State* finalState, unsigned int* len) const {
+ unsigned int dummy; // is this needed ?
+ if (!len) { len = &dummy; }
+ FactorType factorType = GetFactorType();
+ // set up context
+ randlm::WordID ngram[MAX_NGRAM_SIZE];
+ int count = contextFactor.size();
+ for (int i = 0 ; i < count ; i++) {
+ ngram[i] = GetLmID((*contextFactor[i])[factorType]);
+ //std::cerr << m_lm->getWord(ngram[i]) << " ";
+ }
+ int found = 0;
+ float logprob = FloorScore(TransformSRIScore(m_lm->getProb(&ngram[0], count, &found, finalState)));
+ *len = 0; // not available
+ //if (finalState)
+ // std::cerr << " = " << logprob << "(" << *finalState << ", " << *len <<")"<< std::endl;
+ //else
+ // std::cerr << " = " << logprob << std::endl;
+ return logprob;
+}
+
+}
+
+
diff --git a/moses/src/LanguageModelRandLM.h b/moses/src/LanguageModelRandLM.h
new file mode 100644
index 000000000..be19e6787
--- /dev/null
+++ b/moses/src/LanguageModelRandLM.h
@@ -0,0 +1,67 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelRandLM_h
+#define moses_LanguageModelRandLM_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "Util.h"
+#include "LanguageModelSingleFactor.h"
+#include "RandLM.h"
+
+class randlm::RandLM;
+
+namespace Moses
+{
+class Factor;
+class Phrase;
+
+// RandLM wrapper (single factor LM)
+
+class LanguageModelRandLM : public LanguageModelSingleFactor {
+public:
+ LanguageModelRandLM(bool registerScore, ScoreIndexManager &scoreIndexManager)
+ : LanguageModelSingleFactor(registerScore, scoreIndexManager), m_lm(0) {}
+ bool Load(const std::string &filePath, FactorType factorType, float weight, size_t nGramOrder);
+ virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
+ ~LanguageModelRandLM() {
+ delete m_lm;
+ }
+ void CleanUpAfterSentenceProcessing() {
+ m_lm->clearCaches(); // clear caches
+ }
+ void InitializeBeforeSentenceProcessing() {} // nothing to do
+ protected:
+ std::vector<randlm::WordID> m_randlm_ids_vec;
+ randlm::RandLM* m_lm;
+ randlm::WordID m_oov_id;
+ void CreateFactors(FactorCollection &factorCollection);
+ randlm::WordID GetLmID( const std::string &str ) const;
+ randlm::WordID GetLmID( const Factor *factor ) const{
+ size_t factorId = factor->GetId();
+ return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
+ };
+
+};
+
+}
+
+#endif
diff --git a/moses/src/LanguageModelRemote.cpp b/moses/src/LanguageModelRemote.cpp
new file mode 100644
index 000000000..78a7bf9c3
--- /dev/null
+++ b/moses/src/LanguageModelRemote.cpp
@@ -0,0 +1,139 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include "LanguageModelRemote.h"
+#include "Factor.h"
+
+namespace Moses {
+
+const Factor* LanguageModelRemote::BOS = NULL;
+const Factor* LanguageModelRemote::EOS = (LanguageModelRemote::BOS + 1);
+
+LanguageModelRemote::LanguageModelRemote(bool registerScore, ScoreIndexManager &scoreIndexManager)
+:LanguageModelSingleFactor(registerScore, scoreIndexManager)
+{
+}
+
+bool LanguageModelRemote::Load(const std::string &filePath
+ , FactorType factorType
+ , float weight
+ , size_t nGramOrder)
+{
+ m_factorType = factorType;
+ m_weight = weight;
+ m_nGramOrder = nGramOrder;
+
+ int cutAt = filePath.find(':',0);
+ std::string host = filePath.substr(0,cutAt);
+ //std::cerr << "port string = '" << filePath.substr(cutAt+1,filePath.size()-cutAt) << "'\n";
+ int port = atoi(filePath.substr(cutAt+1,filePath.size()-cutAt).c_str());
+ bool good = start(host,port);
+ if (!good) {
+ std::cerr << "failed to connect to lm server on " << host << " on port " << port << std::endl;
+ }
+ ClearSentenceCache();
+ return good;
+}
+
+
+bool LanguageModelRemote::start(const std::string& host, int port) {
+ //std::cerr << "host = " << host << ", port = " << port << "\n";
+ sock = socket(AF_INET, SOCK_STREAM, 0);
+ hp = gethostbyname(host.c_str());
+ if (hp==NULL) { herror("gethostbyname failed"); exit(1); }
+
+ bzero((char *)&server, sizeof(server));
+ bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length);
+ server.sin_family = hp->h_addrtype;
+ server.sin_port = htons(port);
+
+ int errors = 0;
+ while (connect(sock, (struct sockaddr *)&server, sizeof(server)) < 0) {
+ //std::cerr << "Error: connect()\n";
+ sleep(1);
+ errors++;
+ if (errors > 5) return false;
+ }
+ return true;
+}
+
+float LanguageModelRemote::GetValue(const std::vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const {
+ size_t count = contextFactor.size();
+ if (count == 0) {
+ if (finalState) *finalState = NULL;
+ return 0;
+ }
+ //std::cerr << "contextFactor.size() = " << count << "\n";
+ size_t max = m_nGramOrder;
+ const FactorType factor = GetFactorType();
+ if (max > count) max = count;
+
+ Cache* cur = &m_cache;
+ int pc = static_cast<int>(count) - 1;
+ for (int i = 0; i < pc; ++i) {
+ const Factor* f = contextFactor[i]->GetFactor(factor);
+ cur = &cur->tree[f ? f : BOS];
+ }
+ const Factor* event_word = contextFactor[pc]->GetFactor(factor);
+ cur = &cur->tree[event_word ? event_word : EOS];
+ if (cur->prob) {
+ if (finalState) *finalState = cur->boState;
+ if (len) *len = m_nGramOrder;
+ return cur->prob;
+ }
+ cur->boState = *reinterpret_cast<const State*>(&m_curId);
+ ++m_curId;
+
+ std::ostringstream os;
+ os << "prob ";
+ if (event_word == NULL) {
+ os << "</s>";
+ } else {
+ os << event_word->GetString();
+ }
+ for (size_t i=1; i<max; i++) {
+ const Factor* f = contextFactor[count-1-i]->GetFactor(factor);
+ if (f == NULL) {
+ os << " <s>";
+ } else {
+ os << ' ' << f->GetString();
+ }
+ }
+ os << std::endl;
+ std::string out = os.str();
+ write(sock, out.c_str(), out.size());
+ char res[6];
+ int r = read(sock, res, 6);
+ int errors = 0;
+ int cnt = 0;
+ while (1) {
+ if (r < 0) {
+ errors++; sleep(1);
+ //std::cerr << "Error: read()\n";
+ if (errors > 5) exit(1);
+ } else if (r==0 || res[cnt] == '\n') { break; }
+ else {
+ cnt += r;
+ if (cnt==6) break;
+ read(sock, &res[cnt], 6-cnt);
+ }
+ }
+ cur->prob = FloorScore(TransformSRIScore(*reinterpret_cast<float*>(res)));
+ if (finalState) {
+ *finalState = cur->boState;
+ if (len) *len = m_nGramOrder;
+ }
+ return cur->prob;
+}
+
+LanguageModelRemote::~LanguageModelRemote() {
+ // Step 8 When finished send all lingering transmissions and close the connection
+ close(sock);
+}
+
+}
diff --git a/moses/src/LanguageModelRemote.h b/moses/src/LanguageModelRemote.h
new file mode 100644
index 000000000..ccc72d120
--- /dev/null
+++ b/moses/src/LanguageModelRemote.h
@@ -0,0 +1,43 @@
+#ifndef moses_LanguageModelRemote_h
+#define moses_LanguageModelRemote_h
+
+#include "LanguageModelSingleFactor.h"
+#include "TypeDef.h"
+#include "Factor.h"
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+namespace Moses
+{
+
+class LanguageModelRemote : public LanguageModelSingleFactor {
+ private:
+ struct Cache {
+ std::map<const Factor*, Cache> tree;
+ float prob;
+ State boState;
+ Cache() : prob(0) {}
+ };
+
+ int sock, port;
+ struct hostent *hp;
+ struct sockaddr_in server;
+ mutable size_t m_curId;
+ mutable Cache m_cache;
+ bool start(const std::string& host, int port);
+ static const Factor* BOS;
+ static const Factor* EOS;
+ public:
+ LanguageModelRemote(bool registerScore, ScoreIndexManager &scoreIndexManager);
+ ~LanguageModelRemote();
+ void ClearSentenceCache() { m_cache.tree.clear(); m_curId = 1000; }
+ virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
+ bool Load(const std::string &filePath
+ , FactorType factorType
+ , float weight
+ , size_t nGramOrder);
+};
+
+}
+#endif
diff --git a/moses/src/LanguageModelSRI.cpp b/moses/src/LanguageModelSRI.cpp
new file mode 100644
index 000000000..548ddaee1
--- /dev/null
+++ b/moses/src/LanguageModelSRI.cpp
@@ -0,0 +1,174 @@
+// $Id: LanguageModelSRI.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <fstream>
+#include "Ngram.h"
+#include "Vocab.h"
+
+#include "LanguageModelSRI.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+LanguageModelSRI::LanguageModelSRI(bool registerScore, ScoreIndexManager &scoreIndexManager)
+:LanguageModelSingleFactor(registerScore, scoreIndexManager)
+, m_srilmVocab(0)
+, m_srilmModel(0)
+{
+}
+
+LanguageModelSRI::~LanguageModelSRI()
+{
+ delete m_srilmModel;
+ delete m_srilmVocab;
+}
+
+bool LanguageModelSRI::Load(const std::string &filePath
+ , FactorType factorType
+ , float weight
+ , size_t nGramOrder)
+{
+ m_srilmVocab = new Vocab();
+ m_srilmModel = new Ngram(*m_srilmVocab, nGramOrder);
+ m_factorType = factorType;
+ m_weight = weight;
+ m_nGramOrder = nGramOrder;
+ m_filePath = filePath;
+
+ m_srilmModel->skipOOVs() = false;
+
+ File file( filePath.c_str(), "r" );
+ m_srilmModel->read(file);
+
+ // LM can be ok, just outputs warnings
+ CreateFactors();
+ m_unknownId = m_srilmVocab->unkIndex();
+
+ return true;
+}
+
+void LanguageModelSRI::CreateFactors()
+{ // add factors which have srilm id
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ std::map<size_t, VocabIndex> lmIdMap;
+ size_t maxFactorId = 0; // to create lookup vector later on
+
+ VocabString str;
+ VocabIter iter(*m_srilmVocab);
+ while ( (str = iter.next()) != NULL)
+ {
+ VocabIndex lmId = GetLmID(str);
+ size_t factorId = factorCollection.AddFactor(Output, m_factorType, str)->GetId();
+ lmIdMap[factorId] = lmId;
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ }
+
+ size_t factorId;
+
+ m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
+ factorId = m_sentenceStart->GetId();
+ lmIdMap[factorId] = GetLmID(BOS_);
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceStartArray[m_factorType] = m_sentenceStart;
+
+ m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
+ factorId = m_sentenceEnd->GetId();
+ lmIdMap[factorId] = GetLmID(EOS_);
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+
+ // add to lookup vector in object
+ m_lmIdLookup.resize(maxFactorId+1);
+
+ fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId);
+
+ map<size_t, VocabIndex>::iterator iterMap;
+ for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap)
+ {
+ m_lmIdLookup[iterMap->first] = iterMap->second;
+ }
+}
+
+VocabIndex LanguageModelSRI::GetLmID( const std::string &str ) const
+{
+ return m_srilmVocab->getIndex( str.c_str(), m_unknownId );
+}
+VocabIndex LanguageModelSRI::GetLmID( const Factor *factor ) const
+{
+ size_t factorId = factor->GetId();
+ return ( factorId >= m_lmIdLookup.size()) ? m_unknownId : m_lmIdLookup[factorId];
+}
+
+float LanguageModelSRI::GetValue(VocabIndex wordId, VocabIndex *context) const
+{
+ float p = m_srilmModel->wordProb( wordId, context );
+ return FloorScore(TransformSRIScore(p)); // log10->log
+}
+
+float LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int *len) const
+{
+ FactorType factorType = GetFactorType();
+ size_t count = contextFactor.size();
+ if (count <= 0)
+ {
+ finalState = NULL;
+ return 0;
+ }
+
+ // set up context
+ VocabIndex context[MAX_NGRAM_SIZE];
+ for (size_t i = 0 ; i < count - 1 ; i++)
+ {
+ context[i] = GetLmID((*contextFactor[count-2-i])[factorType]);
+ }
+ context[count-1] = Vocab_None;
+
+ assert((*contextFactor[count-1])[factorType] != NULL);
+ // call sri lm fn
+ VocabIndex lmId= GetLmID((*contextFactor[count-1])[factorType]);
+ float ret = GetValue(lmId, context);
+
+ if (finalState) {
+ for (int i = count - 2 ; i >= 0 ; i--)
+ context[i+1] = context[i];
+ context[0] = lmId;
+ unsigned int dummy;
+ if (!len) { len = &dummy; }
+ *finalState = m_srilmModel->contextID(context,*len);
+ (*len)++;
+ }
+ return ret;
+}
+
+}
+
+
+
diff --git a/moses/src/LanguageModelSRI.h b/moses/src/LanguageModelSRI.h
new file mode 100644
index 000000000..21e3744c7
--- /dev/null
+++ b/moses/src/LanguageModelSRI.h
@@ -0,0 +1,65 @@
+// $Id: LanguageModelSRI.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelSRI_h
+#define moses_LanguageModelSRI_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "TypeDef.h"
+#include "Vocab.h"
+#include "LanguageModelSingleFactor.h"
+
+class Factor;
+class Phrase;
+class Ngram; // SRI forward decl
+
+namespace Moses
+{
+
+class LanguageModelSRI : public LanguageModelSingleFactor
+{
+protected:
+ std::vector<VocabIndex> m_lmIdLookup;
+ Vocab *m_srilmVocab;
+ Ngram *m_srilmModel;
+ VocabIndex m_unknownId;
+
+ float GetValue(VocabIndex wordId, VocabIndex *context) const;
+ void CreateFactors();
+ VocabIndex GetLmID( const std::string &str ) const;
+ VocabIndex GetLmID( const Factor *factor ) const;
+
+public:
+ LanguageModelSRI(bool registerScore, ScoreIndexManager &scoreIndexManager);
+ ~LanguageModelSRI();
+ bool Load(const std::string &filePath
+ , FactorType factorType
+ , float weight
+ , size_t nGramOrder);
+
+ virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
+};
+
+
+}
+#endif
diff --git a/moses/src/LanguageModelSingleFactor.cpp b/moses/src/LanguageModelSingleFactor.cpp
new file mode 100644
index 000000000..e32f5fbdf
--- /dev/null
+++ b/moses/src/LanguageModelSingleFactor.cpp
@@ -0,0 +1,60 @@
+// $Id: LanguageModelSingleFactor.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <sstream>
+
+#include "LanguageModelSingleFactor.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+// static variable init
+LanguageModelSingleFactor::State LanguageModelSingleFactor::UnknownState=0;
+
+LanguageModelSingleFactor::LanguageModelSingleFactor(bool registerScore, ScoreIndexManager &scoreIndexManager)
+:LanguageModel(registerScore, scoreIndexManager)
+{
+}
+LanguageModelSingleFactor::~LanguageModelSingleFactor() {}
+
+
+std::string LanguageModelSingleFactor::GetScoreProducerDescription() const
+{
+ std::ostringstream oss;
+ // what about LMs that are over multiple factors at once, POS + stem, for example?
+ oss << "LM_" << GetNGramOrder() << "gram";
+ return oss.str();
+}
+
+}
+
+
+
+
diff --git a/moses/src/LanguageModelSingleFactor.h b/moses/src/LanguageModelSingleFactor.h
new file mode 100644
index 000000000..31b828c3e
--- /dev/null
+++ b/moses/src/LanguageModelSingleFactor.h
@@ -0,0 +1,87 @@
+// $Id: LanguageModelSingleFactor.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelSingleFactor_h
+#define moses_LanguageModelSingleFactor_h
+
+#include "LanguageModel.h"
+#include "Phrase.h"
+
+namespace Moses
+{
+
+class FactorCollection;
+class Factor;
+
+//! Abstract class for for single factor LM
+class LanguageModelSingleFactor : public LanguageModel
+{
+protected:
+ const Factor *m_sentenceStart, *m_sentenceEnd;
+ FactorType m_factorType;
+
+ LanguageModelSingleFactor(bool registerScore, ScoreIndexManager &scoreIndexManager);
+
+public:
+ static State UnknownState;
+
+ virtual ~LanguageModelSingleFactor();
+ virtual bool Load(const std::string &filePath
+ , FactorType factorType
+ , float weight
+ , size_t nGramOrder) = 0;
+
+ LMType GetLMType() const
+ {
+ return SingleFactor;
+ }
+
+ bool Useable(const Phrase &phrase) const
+ {
+ return (phrase.GetSize()>0 && phrase.GetFactor(0, m_factorType) != NULL);
+ }
+
+ const Factor *GetSentenceStart() const
+ {
+ return m_sentenceStart;
+ }
+ const Factor *GetSentenceEnd() const
+ {
+ return m_sentenceEnd;
+ }
+ FactorType GetFactorType() const
+ {
+ return m_factorType;
+ }
+ float GetWeight() const
+ {
+ return m_weight;
+ }
+ void SetWeight(float weight)
+ {
+ m_weight = weight;
+ }
+ std::string GetScoreProducerDescription() const;
+};
+
+}
+
+#endif
diff --git a/moses/src/LanguageModelSkip.cpp b/moses/src/LanguageModelSkip.cpp
new file mode 100644
index 000000000..a3f0b8da7
--- /dev/null
+++ b/moses/src/LanguageModelSkip.cpp
@@ -0,0 +1,22 @@
+// $Id: LanguageModelSkip.cpp 916 2006-10-24 16:27:13Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "LanguageModelSkip.h"
diff --git a/moses/src/LanguageModelSkip.h b/moses/src/LanguageModelSkip.h
new file mode 100644
index 000000000..466dd9949
--- /dev/null
+++ b/moses/src/LanguageModelSkip.h
@@ -0,0 +1,129 @@
+// $Id: LanguageModelSkip.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelSkip_h
+#define moses_LanguageModelSkip_h
+
+#include <vector>
+#include <algorithm>
+#include "LanguageModelMultiFactor.h"
+#include "LanguageModelSingleFactor.h"
+#include "Phrase.h"
+#include "FactorCollection.h"
+
+namespace Moses
+{
+
+/* Hacked up LM which skips any factor with string '---'
+* order of chunk hardcoded to 3 (m_realNGramOrder)
+*/
+class LanguageModelSkip : public LanguageModelSingleFactor
+{
+protected:
+ size_t m_realNGramOrder;
+ LanguageModelSingleFactor *m_lmImpl;
+
+public:
+ /** Constructor
+ * \param lmImpl SRI or IRST LM which this LM can use to load data
+ */
+ LanguageModelSkip(LanguageModelSingleFactor *lmImpl
+ , bool registerScore
+ , ScoreIndexManager &scoreIndexManager)
+ : LanguageModelSingleFactor(registerScore, scoreIndexManager)
+ {
+ m_lmImpl = lmImpl;
+ }
+ ~LanguageModelSkip()
+ {
+ delete m_lmImpl;
+ }
+ bool Load(const std::string &filePath
+ , FactorType factorType
+ , float weight
+ , size_t nGramOrder)
+ {
+ m_factorType = factorType;
+ m_weight = weight;
+ m_filePath = filePath;
+ m_nGramOrder = nGramOrder;
+
+ m_realNGramOrder = 3;
+
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ m_sentenceStartArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, BOS_);
+ m_sentenceEndArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, EOS_);
+
+ return m_lmImpl->Load(filePath, m_factorType, weight, nGramOrder);
+ }
+
+ float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
+ {
+ if (contextFactor.size() == 0)
+ {
+ return 0;
+ }
+
+ // only process context where last word is a word we want
+ const Factor *factor = (*contextFactor.back())[m_factorType];
+ std::string strWord = factor->GetString();
+ if (strWord.find("---") == 0)
+ return 0;
+
+ // add last word
+ std::vector<const Word*> chunkContext;
+ Word* chunkWord = new Word;
+ chunkWord->SetFactor(m_factorType, factor);
+ chunkContext.push_back(chunkWord);
+
+ // create context in reverse 'cos we skip words we don't want
+ for (int currPos = (int)contextFactor.size() - 2 ; currPos >= 0 && chunkContext.size() < m_realNGramOrder ; --currPos )
+ {
+ const Word &word = *contextFactor[currPos];
+ factor = word[m_factorType];
+ std::string strWord = factor->GetString();
+ bool skip = strWord.find("---") == 0;
+ if (skip)
+ continue;
+
+ // add word to chunked context
+ Word* chunkWord = new Word;
+ chunkWord->SetFactor(m_factorType, factor);
+ chunkContext.push_back(chunkWord);
+ }
+
+ // create context factor the right way round
+ std::reverse(chunkContext.begin(), chunkContext.end());
+
+ // calc score on chunked phrase
+ float ret = m_lmImpl->GetValue(chunkContext, finalState, len);
+
+ RemoveAllInColl(chunkContext);
+
+ return ret;
+ }
+};
+
+}
+
+#endif
+
diff --git a/moses/src/LexicalReordering.cpp b/moses/src/LexicalReordering.cpp
new file mode 100644
index 000000000..382081aa5
--- /dev/null
+++ b/moses/src/LexicalReordering.cpp
@@ -0,0 +1,269 @@
+#include "LexicalReordering.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+LexicalReordering::LexicalReordering(const std::string &filePath,
+ const std::vector<float>& weights,
+ Direction direction,
+ Condition condition,
+ std::vector< FactorType >& f_factors,
+ std::vector< FactorType >& e_factors)
+ : m_NumScoreComponents(weights.size()), m_MaxContextLength(0)
+{
+ std::cerr << "Creating lexical reordering...\n";
+ //add ScoreProducer
+ const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
+ const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
+ std::cerr << "weights: ";
+ for(size_t w = 0; w < weights.size(); ++w){
+ std::cerr << weights[w] << " ";
+ }
+ std::cerr << "\n";
+ m_Direction = DecodeDirection(direction);
+ m_Condition = DecodeCondition(condition);
+
+ //m_FactorsE = e_factors;
+ //m_FactorsF = f_factors;
+ //Todo:should check that
+ //- if condition contains e or c than e_factors non empty
+ //- if condition contains f f_factors non empty
+ for(size_t i = 0; i < m_Condition.size(); ++i){
+ switch(m_Condition[i]){
+ case E:
+ m_FactorsE = e_factors;
+ if(m_FactorsE.empty()){
+ //problem
+ std::cerr << "Problem e factor mask is unexpectedly empty\n";
+ }
+ break;
+ case F:
+ m_FactorsF = f_factors;
+ if(m_FactorsF.empty()){
+ //problem
+ std::cerr << "Problem f factor mask is unexpectedly empty\n";
+ }
+ break;
+ case C:
+ m_FactorsC = e_factors;
+ m_MaxContextLength = 1;
+ if(m_FactorsC.empty()){
+ //problem
+ std::cerr << "Problem c factor mask is unexpectedly empty\n";
+ }
+ break;
+ default:
+ //problem
+ std::cerr << "Unknown conditioning option!\n";
+ break;
+ }
+ }
+ if(weights.size() == m_Direction.size()){
+ m_OneScorePerDirection = true;
+ std::cerr << "Reordering types NOT individualy weighted!\n";
+ } else {
+ m_OneScorePerDirection = false;
+ }
+ m_Table = LexicalReorderingTable::LoadAvailable(filePath, m_FactorsF, m_FactorsE, m_FactorsC);
+}
+
+LexicalReordering::~LexicalReordering(){
+ if(m_Table){
+ delete m_Table;
+ }
+}
+
+std::vector<float> LexicalReordering::CalcScore(Hypothesis* hypothesis) const {
+ std::vector<float> score(GetNumScoreComponents(), 0);
+ std::vector<float> values;
+
+ //for every direction
+ for(size_t i = 0; i < m_Direction.size(); ++i){
+ //grab data
+ if(Forward == m_Direction[i]){
+ //relates to prev hypothesis as we dont know next phrase for current yet
+ //sanity check: is there a previous hypothesis?
+ if(0 == hypothesis->GetPrevHypo()->GetId()){
+ continue; //no score continue with next direction
+ }
+ //grab probs for prev hypothesis
+ const ScoreComponentCollection &reorderingScoreColl =
+ hypothesis->GetPrevHypo()->GetCachedReorderingScore();
+ values = reorderingScoreColl.GetScoresForProducer(this);
+ /*
+ values = m_Table->GetScore((hypothesis->GetPrevHypo()->GetSourcePhrase()).GetSubString(hypothesis->GetPrevHypo()->GetCurrSourceWordsRange()),
+ hypothesis->GetPrevHypo()->GetCurrTargetPhrase(),
+ auxGetContext(hypothesis->GetPrevHypo()));
+ */
+ }
+ if(Backward == m_Direction[i])
+ {
+ const ScoreComponentCollection &reorderingScoreColl =
+ hypothesis->GetCachedReorderingScore();
+ values = reorderingScoreColl.GetScoresForProducer(this);
+ /*
+ values = m_Table->GetScore(hypothesis->GetSourcePhrase().GetSubString(hypothesis->GetCurrSourceWordsRange()),
+ hypothesis->GetCurrTargetPhrase(),
+ auxGetContext(hypothesis));
+ */
+ }
+
+ //add score
+ //sanity check: do we have any probs?
+ assert(values.size() == (GetNumOrientationTypes() * m_Direction.size()));
+
+ OrientationType orientation = GetOrientationType(hypothesis);
+ float value = values[orientation + i * GetNumOrientationTypes()];
+ if(m_OneScorePerDirection){
+ //one score per direction
+ score[i] = value;
+ } else {
+ //one score per direction and orientation
+ score[orientation + i * GetNumOrientationTypes()] = value;
+ }
+ }
+ return score;
+}
+
+Phrase LexicalReordering::auxGetContext(const Hypothesis* hypothesis) const {
+ const Hypothesis* h = hypothesis;
+ Phrase c(Output);
+ if(0 == hypothesis->GetId()){
+ return c;
+ }
+ while(0 != hypothesis->GetPrevHypo()->GetId() && c.GetSize() < m_MaxContextLength){
+ hypothesis = hypothesis->GetPrevHypo();
+ int needed = m_MaxContextLength - c.GetSize();
+ const Phrase& p = hypothesis->GetCurrTargetPhrase();
+ Phrase tmp(Output);
+ if(needed > p.GetSize()){
+ //needed -= p.GetSize();
+ tmp = p;
+ } else {
+ WordsRange range(p.GetSize() - needed, p.GetSize()-1);
+ tmp = p.GetSubString(range);
+ }
+ //new code: new append returns void not this...
+ tmp.Append(c); c = tmp;
+ }
+ return c;
+}
+
+std::vector<LexicalReordering::Condition> LexicalReordering::DecodeCondition(LexicalReordering::Condition c){
+ std::vector<LexicalReordering::Condition> result;
+ switch(c){
+ case F:
+ case E:
+ case C:
+ result.push_back(c);
+ break;
+ case FE:
+ result.push_back(F);
+ result.push_back(E);
+ break;
+ case FEC:
+ result.push_back(F);
+ result.push_back(E);
+ result.push_back(C);
+ break;
+ }
+ return result;
+}
+
+std::vector<LexicalReordering::Direction> LexicalReordering::DecodeDirection(LexicalReordering::Direction d){
+ std::vector<Direction> result;
+ if(Bidirectional == d){
+ result.push_back(Backward);
+ result.push_back(Forward);
+ } else {
+ result.push_back(d);
+ }
+ return result;
+}
+
+LexicalReordering::OrientationType LexicalMonotonicReordering::GetOrientationType(Hypothesis* currHypothesis) const
+{
+ const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo();
+ const WordsRange currWordsRange = currHypothesis->GetCurrSourceWordsRange();
+ //check if there is a previous hypo
+ if(0 == prevHypothesis->GetId()){
+ if(0 == currWordsRange.GetStartPos()){
+ return Monotone;
+ } else {
+ return NonMonotone;
+ }
+ } else {
+ const WordsRange prevWordsRange = prevHypothesis->GetCurrSourceWordsRange();
+
+ if(prevWordsRange.GetEndPos() == currWordsRange.GetStartPos()-1){
+ return Monotone;
+ } else {
+ return NonMonotone;
+ }
+ }
+}
+
+LexicalReordering::OrientationType LexicalOrientationReordering::GetOrientationType(Hypothesis* currHypothesis) const
+{
+ const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo();
+ const WordsRange currWordsRange = currHypothesis->GetCurrSourceWordsRange();
+ //check if there is a previous hypo
+ if(0 == prevHypothesis->GetId()){
+ if(0 == currWordsRange.GetStartPos()){
+ return Monotone;
+ } else {
+ return Discontinuous;
+ }
+ } else {
+ const WordsRange prevWordsRange = prevHypothesis->GetCurrSourceWordsRange();
+
+ if(prevWordsRange.GetEndPos() == currWordsRange.GetStartPos()-1){
+ return Monotone;
+ } else if(prevWordsRange.GetStartPos() == currWordsRange.GetEndPos()+1) {
+ return Swap;
+ } else {
+ return Discontinuous;
+ }
+ }
+}
+
+
+LexicalReordering::OrientationType LexicalDirectionalReordering::GetOrientationType(Hypothesis* currHypothesis) const{
+ const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo();
+ const WordsRange currWordsRange = currHypothesis->GetCurrSourceWordsRange();
+ //check if there is a previous hypo
+ if(0 == prevHypothesis->GetId()){
+ return Right;
+ } else {
+ const WordsRange prevWordsRange = prevHypothesis->GetCurrSourceWordsRange();
+
+ if(prevWordsRange.GetEndPos() <= currWordsRange.GetStartPos()){
+ return Right;
+ } else {
+ return Left;
+ }
+ }
+}
+
+Score LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
+{
+ return m_Table->GetScore(f, e, Phrase(Output));
+}
+
+FFState* LexicalReordering::Evaluate(
+ const Hypothesis& hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* out) const {
+ out->PlusEquals(this, CalcScore(const_cast<Hypothesis*>(&hypo)));
+
+ //TODO need to return proper state, calc score should not use previous
+ //hypothesis, it should use the state.
+ return NULL;
+}
+
+const FFState* LexicalReordering::EmptyHypothesisState() const {
+ return NULL;
+}
+
+}
+
diff --git a/moses/src/LexicalReordering.h b/moses/src/LexicalReordering.h
new file mode 100644
index 000000000..d39b0ed57
--- /dev/null
+++ b/moses/src/LexicalReordering.h
@@ -0,0 +1,159 @@
+#ifndef moses_LexicalReordering_h
+#define moses_LexicalReordering_h
+
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "Phrase.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "WordsRange.h"
+#include "ScoreProducer.h"
+#include "FeatureFunction.h"
+
+#include "LexicalReorderingTable.h"
+
+namespace Moses
+{
+
+class Factor;
+class Phrase;
+class Hypothesis;
+class InputType;
+
+using namespace std;
+
+class LexicalReordering : public StatefulFeatureFunction {
+ public: //types & consts
+ typedef int OrientationType;
+ enum Direction {Forward, Backward, Bidirectional, Unidirectional = Backward};
+ enum Condition {F,E,C,FE,FEC};
+ public: //con- & destructors
+ LexicalReordering(const std::string &filePath,
+ const std::vector<float>& weights,
+ Direction direction,
+ Condition condition,
+ std::vector< FactorType >& f_factors,
+ std::vector< FactorType >& e_factors);
+ virtual ~LexicalReordering();
+ public: //interface
+ //inherited
+ virtual size_t GetNumScoreComponents() const {
+ return m_NumScoreComponents;
+ };
+
+ virtual FFState* Evaluate(
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
+
+ const FFState* EmptyHypothesisState() const;
+
+ virtual std::string GetScoreProducerDescription() const {
+ return "Generic Lexical Reordering Model... overwrite in subclass.";
+ };
+
+ std::string GetScoreProducerWeightShortName() const {
+ return "d";
+ };
+
+ //new
+ virtual int GetNumOrientationTypes() const = 0;
+ virtual OrientationType GetOrientationType(Hypothesis*) const = 0;
+
+ std::vector<float> CalcScore(Hypothesis* hypothesis) const;
+ void InitializeForInput(const InputType& i){
+ m_Table->InitializeForInput(i);
+ }
+
+ Score GetProb(const Phrase& f, const Phrase& e) const;
+ //helpers
+ static std::vector<Condition> DecodeCondition(Condition c);
+ static std::vector<Direction> DecodeDirection(Direction d);
+ private:
+ Phrase auxGetContext(const Hypothesis* hypothesis) const;
+ private:
+ LexicalReorderingTable* m_Table;
+ size_t m_NumScoreComponents;
+ std::vector< Direction > m_Direction;
+ std::vector< Condition > m_Condition;
+ bool m_OneScorePerDirection;
+ std::vector< FactorType > m_FactorsE, m_FactorsF, m_FactorsC;
+ int m_MaxContextLength;
+};
+
+
+class LexicalMonotonicReordering : public LexicalReordering {
+ private:
+ enum {Monotone = 0, NonMonotone = 1};
+ public:
+ LexicalMonotonicReordering(const std::string &filePath,
+ const std::vector<float>& w,
+ Direction direction,
+ Condition condition,
+ std::vector< FactorType >& f_factors,
+ std::vector< FactorType >& e_factors)
+ : LexicalReordering(filePath, w, direction, condition, f_factors, e_factors){
+ std::cerr << "Created lexical monotonic reordering\n";
+ }
+ public:
+ virtual int GetNumOrientationTypes() const {
+ return 2;
+ };
+ virtual std::string GetScoreProducerDescription() const {
+ return "MonotonicLexicalReorderingModel";
+ };
+ virtual int GetOrientationType(Hypothesis* currHypothesis) const;
+};
+
+class LexicalOrientationReordering : public LexicalReordering {
+ private:
+ enum {Monotone = 0, Swap = 1, Discontinuous = 2};
+ public:
+ LexicalOrientationReordering(const std::string &filePath,
+ const std::vector<float>& w,
+ Direction direction,
+ Condition condition,
+ std::vector< FactorType >& f_factors,
+ std::vector< FactorType >& e_factors)
+ : LexicalReordering(filePath, w, direction, condition, f_factors, e_factors){
+ std::cerr << "Created lexical orientation reordering\n";
+ }
+ public:
+ virtual int GetNumOrientationTypes() const {
+ return 3;
+ }
+ virtual std::string GetScoreProducerDescription() const {
+ return "OrientationLexicalReorderingModel";
+ };
+ virtual OrientationType GetOrientationType(Hypothesis* currHypothesis) const;
+};
+
+class LexicalDirectionalReordering : public LexicalReordering {
+ private:
+ enum {Left = 0, Right = 1};
+ public:
+ LexicalDirectionalReordering(const std::string &filePath,
+ const std::vector<float>& w,
+ Direction direction,
+ Condition condition,
+ std::vector< FactorType >& f_factors,
+ std::vector< FactorType >& e_factors)
+ : LexicalReordering(filePath, w, direction, condition, f_factors, e_factors){
+ std::cerr << "Created lexical directional Reordering\n";
+ }
+ public:
+ virtual int GetNumOrientationTypes() const {
+ return 2;
+ };
+ virtual std::string GetScoreProducerDescription() const {
+ return "DirectionalLexicalReorderingModel";
+ };
+ virtual OrientationType GetOrientationType(Hypothesis* currHypothesis) const;
+};
+
+
+}
+
+#endif
+
diff --git a/moses/src/LexicalReorderingTable.cpp b/moses/src/LexicalReorderingTable.cpp
new file mode 100644
index 000000000..9d14d2c0d
--- /dev/null
+++ b/moses/src/LexicalReorderingTable.cpp
@@ -0,0 +1,686 @@
+#include "LexicalReorderingTable.h"
+#include "InputFileStream.h"
+//#include "LVoc.h" //need IPhrase
+
+#include "StaticData.h"
+#include "PhraseDictionary.h"
+#include "GenerationDictionary.h"
+#include "TargetPhrase.h"
+#include "TargetPhraseCollection.h"
+
+namespace Moses
+{
+/*
+ * local helper functions
+ */
+//cleans str of leading and tailing spaces
+std::string auxClearString(const std::string& str){
+ int i = 0, j = str.size()-1;
+ while(i <= j){
+ if(' ' != str[i]){
+ break;
+ } else {
+ ++i;
+ }
+ }
+ while(j >= i){
+ if(' ' != str[j]){
+ break;
+ } else {
+ --j;
+ }
+ }
+ return str.substr(i,j-i+1);
+}
+
+void auxAppend(IPhrase& head, const IPhrase& tail){
+ head.reserve(head.size()+tail.size());
+ for(size_t i = 0; i < tail.size(); ++i){
+ head.push_back(tail[i]);
+ }
+}
+/*
+ * functions for LexicalReorderingTable
+ */
+
+LexicalReorderingTable* LexicalReorderingTable::LoadAvailable(const std::string& filePath, const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors){
+ //decide use Tree or Memory table
+ if(FileExists(filePath+".binlexr.idx")){
+ //there exists a binary version use that
+ return new LexicalReorderingTableTree(filePath, f_factors, e_factors, c_factors);
+ } else {
+ //use plain memory
+ return new LexicalReorderingTableMemory(filePath, f_factors, e_factors, c_factors);
+ }
+ }
+
+/*
+ * functions for LexicalReorderingTableMemory
+ */
+LexicalReorderingTableMemory::LexicalReorderingTableMemory(
+ const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors)
+ : LexicalReorderingTable(f_factors, e_factors, c_factors)
+{
+ LoadFromFile(filePath);
+}
+
+LexicalReorderingTableMemory::~LexicalReorderingTableMemory(){
+}
+
+std::vector<float> LexicalReorderingTableMemory::GetScore(const Phrase& f,
+ const Phrase& e,
+ const Phrase& c) {
+ //rather complicated because of const can't use []... as [] might enter new things into std::map
+ //also can't have to be careful with words range if c is empty can't use c.GetSize()-1 will underflow and be large
+ TableType::const_iterator r;
+ std::string key;
+ if(0 == c.GetSize()){
+ key = MakeKey(f,e,c);
+ r = m_Table.find(key);
+ if(m_Table.end() != r){
+ return r->second;
+ }
+ } else {
+ //right try from large to smaller context
+ for(size_t i = 0; i <= c.GetSize(); ++i){
+ Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1)));
+ key = MakeKey(f,e,sub_c);
+ r = m_Table.find(key);
+ if(m_Table.end() != r){
+ return r->second;
+ }
+ }
+ }
+ return Score();
+}
+
+void LexicalReorderingTableMemory::DbgDump(std::ostream* out) const{
+ TableType::const_iterator i;
+ for(i = m_Table.begin(); i != m_Table.end(); ++i){
+ *out << " key: '" << i->first << "' score: ";
+ *out << "(num scores: " << (i->second).size() << ")";
+ for(size_t j = 0; j < (i->second).size(); ++j){
+ *out << (i->second)[j] << " ";
+ }
+ *out << "\n";
+ }
+};
+
+std::string LexicalReorderingTableMemory::MakeKey(const Phrase& f,
+ const Phrase& e,
+ const Phrase& c) const {
+ /*
+ std::string key;
+ if(!m_FactorsF.empty()){
+ key += f.GetStringRep(m_FactorsF);
+ }
+ if(!m_FactorsE.empty()){
+ if(!key.empty()){
+ key += " ||| ";
+ }
+ key += e.GetStringRep(m_FactorsE);
+ }
+ */
+ return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)),
+ auxClearString(e.GetStringRep(m_FactorsE)),
+ auxClearString(c.GetStringRep(m_FactorsC)));
+}
+
+std::string LexicalReorderingTableMemory::MakeKey(const std::string& f,
+ const std::string& e,
+ const std::string& c) const{
+ std::string key;
+ if(!f.empty()){
+ key += f;
+ }
+ if(!m_FactorsE.empty()){
+ if(!key.empty()){
+ key += "|||";
+ }
+ key += e;
+ }
+ if(!m_FactorsC.empty()){
+ if(!key.empty()){
+ key += "|||";
+ }
+ key += c;
+ }
+ return key;
+}
+
+void LexicalReorderingTableMemory::LoadFromFile(const std::string& filePath){
+ std::string fileName = filePath;
+ if(!FileExists(fileName) && FileExists(fileName+".gz")){
+ fileName += ".gz";
+ }
+ InputFileStream file(fileName);
+ std::string line(""), key("");
+ int numScores = -1;
+ std::cerr << "Loading table into memory...";
+ while(!getline(file, line).eof()){
+ std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");
+ int t = 0 ;
+ std::string f(""),e(""),c("");
+
+ if(!m_FactorsF.empty()){
+ //there should be something for f
+ f = auxClearString(tokens.at(t));
+ ++t;
+ }
+ if(!m_FactorsE.empty()){
+ //there should be something for e
+ e = auxClearString(tokens.at(t));
+ ++t;
+ }
+ if(!m_FactorsC.empty()){
+ //there should be something for c
+ c = auxClearString(tokens.at(t));
+ ++t;
+ }
+ //last token are the probs
+ std::vector<float> p = Scan<float>(Tokenize(tokens.at(t)));
+ //sanity check: all lines must have equall number of probs
+ if(-1 == numScores){
+ numScores = (int)p.size(); //set in first line
+ }
+ if((int)p.size() != numScores){
+ TRACE_ERR( "found inconsistent number of probabilities... found " << p.size() << " expected " << numScores << std::endl);
+ exit(0);
+ }
+ std::transform(p.begin(),p.end(),p.begin(),TransformScore);
+ std::transform(p.begin(),p.end(),p.begin(),FloorScore);
+ //save it all into our map
+ m_Table[MakeKey(f,e,c)] = p;
+ }
+ std::cerr << "done.\n";
+}
+
+/*
+ * functions for LexicalReorderingTableTree
+ */
+LexicalReorderingTableTree::LexicalReorderingTableTree(
+ const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors)
+ : LexicalReorderingTable(f_factors, e_factors, c_factors), m_UseCache(false), m_FilePath(filePath)
+{
+ m_Table.reset(new PrefixTreeMap());
+ m_Table->Read(m_FilePath+".binlexr");
+}
+
+LexicalReorderingTableTree::~LexicalReorderingTableTree(){
+}
+
+Score LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, const Phrase& c) {
+ if( (!m_FactorsF.empty() && 0 == f.GetSize())
+ || (!m_FactorsE.empty() && 0 == e.GetSize())){
+ //NOTE: no check for c as c might be empty, e.g. start of sentence
+ //not a proper key
+ // phi: commented out, since e may be empty (drop-unknown)
+ //std::cerr << "Not a proper key!\n";
+ return Score();
+ }
+ CacheType::iterator i;;
+ if(m_UseCache){
+ std::pair<CacheType::iterator, bool> r = m_Cache.insert(std::make_pair(MakeCacheKey(f,e),Candidates()));
+ if(!r.second){
+ return auxFindScoreForContext((r.first)->second, c);
+ }
+ i = r.first;
+ } else if(!m_Cache.empty()) {
+ //although we might not be caching now, cache might be none empty!
+ i = m_Cache.find(MakeCacheKey(f,e));
+ if(i != m_Cache.end()){
+ return auxFindScoreForContext(i->second, c);
+ }
+ }
+ //not in cache go to file...
+ Score score;
+ Candidates cands;
+ m_Table->GetCandidates(MakeTableKey(f,e), &cands);
+ if(cands.empty()){
+ return Score();
+ }
+
+ if(m_FactorsC.empty()){
+ assert(1 == cands.size());
+ return cands[0].GetScore(0);
+ } else {
+ score = auxFindScoreForContext(cands, c);
+ }
+ //cache for future use
+ if(m_UseCache){
+ i->second = cands;
+ }
+ return score;
+};
+
+Score LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands, const Phrase& context){
+ if(m_FactorsC.empty()){
+ assert(cands.size() <= 1);
+ return (1 == cands.size())?(cands[0].GetScore(0)):(Score());
+ } else {
+ std::vector<std::string> cvec;
+ for(size_t i = 0; i < context.GetSize(); ++i){
+ /* old code
+ std::string s = context.GetWord(i).ToString(m_FactorsC);
+ cvec.push_back(s.substr(0,s.size()-1));
+ */
+ cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false));
+ }
+ IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId);
+ IPhrase sub_c;
+ IPhrase::iterator start = c.begin();
+ for(size_t j = 0; j <= context.GetSize(); ++j, ++start){
+ sub_c.assign(start, c.end());
+ for(size_t cand = 0; cand < cands.size(); ++cand){
+ IPhrase p = cands[cand].GetPhrase(0);
+ if(cands[cand].GetPhrase(0) == sub_c){
+ return cands[cand].GetScore(0);
+ }
+ }
+ }
+ return Score();
+ }
+}
+
+/*
+void LexicalReorderingTableTree::DbgDump(std::ostream* pout){
+ std::ostream& out = *pout;
+ //TODO!
+}
+*/
+
+void LexicalReorderingTableTree::InitializeForInput(const InputType& input){
+ ClearCache();
+ if(ConfusionNet const* cn = dynamic_cast<ConfusionNet const*>(&input)){
+ Cache(*cn);
+ } else if(Sentence const* s = dynamic_cast<Sentence const*>(&input)){
+ // Cache(*s); ... this just takes up too much memory, we cache elsewhere
+ DisableCache();
+ }
+ if (!m_Table.get()) {
+ //load thread specific table.
+ m_Table.reset(new PrefixTreeMap());
+ m_Table->Read(m_FilePath+".binlexr");
+ }
+};
+
+bool LexicalReorderingTableTree::Create(std::istream& inFile,
+ const std::string& outFileName){
+ std::string line;
+ //TRACE_ERR("Entering Create...\n");
+ std::string
+ ofn(outFileName+".binlexr.srctree"),
+ oft(outFileName+".binlexr.tgtdata"),
+ ofi(outFileName+".binlexr.idx"),
+ ofsv(outFileName+".binlexr.voc0"),
+ oftv(outFileName+".binlexr.voc1");
+
+
+ FILE *os = fOpen(ofn.c_str(),"wb");
+ FILE *ot = fOpen(oft.c_str(),"wb");
+
+ //TRACE_ERR("opend files....\n");
+
+ typedef PrefixTreeSA<LabelId,OFF_T> PSA;
+ PSA *psa = new PSA;
+ PSA::setDefault(InvalidOffT);
+ WordVoc* voc[3];
+
+ LabelId currFirstWord = InvalidLabelId;
+ IPhrase currKey;
+
+ Candidates cands;
+ std::vector<OFF_T> vo;
+ size_t lnc = 0;
+ size_t numTokens = 0;
+ size_t numKeyTokens = 0;
+ while(getline(inFile, line)){
+ //TRACE_ERR(lnc<<":"<<line<<"\n");
+ ++lnc;
+ if(0 == lnc % 10000){
+ TRACE_ERR(".");
+ }
+ IPhrase key;
+ Score score;
+
+ std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");
+ std::string w;
+ if(1 == lnc){
+ //do some init stuff in the first line
+ numTokens = tokens.size();
+ if(tokens.size() == 2){ //f ||| score
+ numKeyTokens = 1;
+ voc[0] = new WordVoc();
+ voc[1] = 0;
+ } else if(3 == tokens.size() || 4 == tokens.size()){ //either f ||| e ||| score or f ||| e ||| c ||| score
+ numKeyTokens = 2;
+ voc[0] = new WordVoc(); //f voc
+ voc[1] = new WordVoc(); //e voc
+ voc[2] = voc[1]; //c & e share voc
+ }
+ } else {
+ //sanity check ALL lines must have same number of tokens
+ assert(numTokens == tokens.size());
+ }
+ int phrase = 0;
+ for(; phrase < numKeyTokens; ++phrase){
+ //conditioned on more than just f... need |||
+ if(phrase >=1){
+ key.push_back(PrefixTreeMap::MagicWord);
+ }
+ std::istringstream is(tokens[phrase]);
+ while(is >> w) {
+ key.push_back(voc[phrase]->add(w));
+ }
+ }
+ //collect all non key phrases, i.e. c
+ std::vector<IPhrase> tgt_phrases;
+ tgt_phrases.resize(numTokens - numKeyTokens - 1);
+ for(int j = 0; j < tgt_phrases.size(); ++j, ++phrase){
+ std::istringstream is(tokens[numKeyTokens + j]);
+ while(is >> w) {
+ tgt_phrases[j].push_back(voc[phrase]->add(w));
+ }
+ }
+ //last token is score
+ std::istringstream is(tokens[numTokens-1]);
+ while(is >> w) {
+ score.push_back(atof(w.c_str()));
+ }
+ //transform score now...
+ std::transform(score.begin(),score.end(),score.begin(),TransformScore);
+ std::transform(score.begin(),score.end(),score.begin(),FloorScore);
+ std::vector<Score> scores;
+ scores.push_back(score);
+
+ if(key.empty()) {
+ TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n");
+ continue;
+ }
+ //first time inits
+ if(currFirstWord == InvalidLabelId){
+ currFirstWord = key[0];
+ }
+ if(currKey.empty()){
+ currKey = key;
+ //insert key into tree
+ assert(psa);
+ PSA::Data& d = psa->insert(key);
+ if(d == InvalidOffT) {
+ d = fTell(ot);
+ } else {
+ TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '" << line << "\n");
+ return false;
+ }
+ }
+ if(currKey != key){
+ //ok new key
+ currKey = key;
+ //a) write cands for old key
+ cands.writeBin(ot);
+ cands.clear();
+ //b) check if we need to move on to new tree root
+ if(key[0] != currFirstWord){
+ // write key prefix tree to file and clear
+ PTF pf;
+ if(currFirstWord >= vo.size()){
+ vo.resize(currFirstWord+1,InvalidOffT);
+ }
+ vo[currFirstWord] = fTell(os);
+ pf.create(*psa, os);
+ // clear
+ delete psa; psa = new PSA;
+ currFirstWord = key[0];
+ }
+ //c) insert key into tree
+ assert(psa);
+ PSA::Data& d = psa->insert(key);
+ if(d == InvalidOffT) {
+ d = fTell(ot);
+ } else {
+ TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '" << line << "\n");
+ return false;
+ }
+ }
+ cands.push_back(GenericCandidate(tgt_phrases, scores));
+ }
+ //flush remainders
+ cands.writeBin(ot);
+ cands.clear();
+ //process last currFirstWord
+ PTF pf;
+ if(currFirstWord >= vo.size()) {
+ vo.resize(currFirstWord+1,InvalidOffT);
+ }
+ vo[currFirstWord] = fTell(os);
+ pf.create(*psa,os);
+ delete psa;
+ psa=0;
+
+ fClose(os);
+ fClose(ot);
+ /*
+ std::vector<size_t> inv;
+ for(size_t i = 0; i < vo.size(); ++i){
+ if(vo[i] == InvalidOffT){
+ inv.push_back(i);
+ }
+ }
+ if(inv.size()) {
+ TRACE_ERR("WARNING: there are src voc entries with no phrase "
+ "translation: count "<<inv.size()<<"\n"
+ "There exists phrase translations for "<<vo.size()-inv.size()
+ <<" entries\n");
+ }
+ */
+ FILE *oi = fOpen(ofi.c_str(),"wb");
+ fWriteVector(oi,vo);
+ fClose(oi);
+
+ if(voc[0]){
+ voc[0]->Write(ofsv);
+ delete voc[0];
+ }
+ if(voc[1]){
+ voc[1]->Write(oftv);
+ delete voc[1];
+ }
+ return true;
+}
+
+std::string LexicalReorderingTableTree::MakeCacheKey(const Phrase& f,
+ const Phrase& e) const {
+ std::string key;
+ if(!m_FactorsF.empty()){
+ key += auxClearString(f.GetStringRep(m_FactorsF));
+ }
+ if(!m_FactorsE.empty()){
+ if(!key.empty()){
+ key += "|||";
+ }
+ key += auxClearString(e.GetStringRep(m_FactorsE));
+ }
+ return key;
+};
+
+IPhrase LexicalReorderingTableTree::MakeTableKey(const Phrase& f,
+ const Phrase& e) const {
+ IPhrase key;
+ std::vector<std::string> keyPart;
+ if(!m_FactorsF.empty()){
+ for(int i = 0; i < f.GetSize(); ++i){
+ /* old code
+ std::string s = f.GetWord(i).ToString(m_FactorsF);
+ keyPart.push_back(s.substr(0,s.size()-1));
+ */
+ keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false));
+ }
+ auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId));
+ keyPart.clear();
+ }
+ if(!m_FactorsE.empty()){
+ if(!key.empty()){
+ key.push_back(PrefixTreeMap::MagicWord);
+ }
+ for(int i = 0; i < e.GetSize(); ++i){
+ /* old code
+ std::string s = e.GetWord(i).ToString(m_FactorsE);
+ keyPart.push_back(s.substr(0,s.size()-1));
+ */
+ keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false));
+ }
+ auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId));
+ //keyPart.clear();
+ }
+ return key;
+};
+
+
+struct State {
+ State(PPimp* t, const std::string& p) : pos(t), path(p){
+ }
+ PPimp* pos;
+ std::string path;
+};
+
+void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f){
+ if(m_FactorsE.empty()){
+ //f is all of key...
+ Candidates cands;
+ m_Table->GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);
+ m_Cache[MakeCacheKey(f,Phrase(Output))] = cands;
+ } else {
+ ObjectPool<PPimp> pool;
+ PPimp* pPos = m_Table->GetRoot();
+ //1) goto subtree for f
+ for(int i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i){
+ /* old code
+ pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId);
+ */
+ pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);
+ }
+ if(0 != pPos && pPos->isValid()){
+ pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord);
+ }
+ if(0 == pPos || !pPos->isValid()){
+ return;
+ }
+ //2) explore whole subtree depth first & cache
+ std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||";
+
+ std::vector<State> stack;
+ stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),""));
+ Candidates cands;
+ while(!stack.empty()){
+ if(stack.back().pos->isValid()){
+ LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx);
+ std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId);
+ //cache this
+ m_Table->GetCandidates(*stack.back().pos,&cands);
+ if(!cands.empty()){
+ m_Cache[cache_key + auxClearString(next_path)] = cands;
+ }
+ cands.clear();
+ PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0));
+ ++stack.back().pos->idx;
+ stack.push_back(State(next_pos,next_path));
+ } else {
+ stack.pop_back();
+ }
+ }
+ }
+}
+
+void LexicalReorderingTableTree::Cache(const ConfusionNet& input){
+ return;
+}
+
+void LexicalReorderingTableTree::Cache(const Sentence& input){
+ //only works with sentences...
+ int prev_cache_size = m_Cache.size();
+ int max_phrase_length = input.GetSize();
+ for(size_t len = 0; len <= max_phrase_length; ++len){
+ for(size_t start = 0; start+len <= input.GetSize(); ++start){
+ Phrase f = input.GetSubString(WordsRange(start, start+len));
+ auxCacheForSrcPhrase(f);
+ }
+ }
+ std::cerr << "Cached " << m_Cache.size() - prev_cache_size << " new primary reordering table keys\n";
+}
+/*
+Pre fetching implementation using Phrase and Generation Dictionaries
+*//*
+void LexicalReorderingTableTree::Cache(const ConfusionNet& input){
+ typedef TargetPhraseCollection::iterator Iter;
+ typedef TargetPhraseCollection::const_iterator ConstIter;
+ //not implemented for confusion networks...
+ Sentence const* s = dynamic_cast<Sentence const*>(&input);
+ if(!s){
+ return;
+ }
+ int max_phrase_length = input.GetSize();
+
+ std::vector<PhraseDictionaryBase*> PhraseTables = StaticData::Instance()->GetPhraseDictionaries();
+ //new code:
+ //std::vector<PhraseDictionary*> PhraseTables = StaticData::Instance()->GetPhraseDictionaries();
+ std::vector<GenerationDictionary*> GenTables = StaticData::Instance()->GetGenerationDictionaries();
+ for(size_t len = 1; len <= max_phrase_length; ++len){
+ for(size_t start = 0; start+len <= input.GetSize(); ++start){
+ Phrase f = s->GetSubString(WordsRange(start, start+len));
+ //find all translations of f
+ TargetPhraseCollection list;
+
+ for(size_t t = 0; t < PhraseTables.size(); ++t){
+ //if(doIntersect(PhraseTables[t]->GetOutputFactorMask(),FactorMask(m_FactorsE))){
+ //this table gives us something we need
+
+ const TargetPhraseCollection* new_list = PhraseTables[t]->GetTargetPhraseCollection(f);
+ TargetPhraseCollection curr_list;
+ for(ConstIter i = new_list->begin(); i != new_list->end(); ++i){
+ for(Iter j = list.begin(); j != list.end(); ++j){
+ curr_list.Add((*j)->MergeNext(*(*i)));
+ }
+ }
+ if(list.IsEmpty()){
+ list = *new_list;
+ } else {
+ list = curr_list;
+ }
+ //}
+ }
+ for(size_t g = 0; g < GenTables.size(); ++g){
+ //if(doIntersect(GenTables[g]->GetOutputFactorMask(),FactorMask(m_FactorsE))){
+ TargetPhraseCollection curr_list;
+ for(Iter j = list.begin(); j != list.end(); ++j){
+ for(size_t w = 0; w < (*j)->GetSize(); ++w){
+ const OutputWordCollection* words = GenTables[g]->FindWord((*j)->GetWord(w));
+ for(OutputWordCollection::const_iterator i = words->begin(); i != words->end(); ++i){
+ TargetPhrase* p = new TargetPhrase(*(*j));
+ Word& pw = p->GetWord(w);
+ pw.Merge(i->first);
+ curr_list.Add(p);
+ }
+ }
+ }
+ list = curr_list;
+ //}
+ }
+ //cache for each translation
+ for(Iter e = list.begin(); e < list.end(); ++e){
+ Candidates cands;
+ m_Table.GetCandidates(MakeTableKey(f,*(*e)), &cands);
+ m_Cache.insert(std::make_pair(MakeCacheKey(f,*(*e)),cands));
+ }
+ }
+ }
+};
+*/
+
+}
+
diff --git a/moses/src/LexicalReorderingTable.h b/moses/src/LexicalReorderingTable.h
new file mode 100644
index 000000000..97b6df4a0
--- /dev/null
+++ b/moses/src/LexicalReorderingTable.h
@@ -0,0 +1,158 @@
+#ifndef moses_LexicalReorderingTable_h
+#define moses_LexicalReorderingTable_h
+
+//stdlib dependencies:
+#include <vector>
+#include <map>
+#include <memory>
+#include <string>
+#include <iostream>
+
+#ifdef WITH_THREADS
+#include <boost/thread/tss.hpp>
+#endif
+
+//moses dependencies:
+#include "TypeDef.h"
+#include "Phrase.h"
+#include "InputType.h"
+#include "ConfusionNet.h"
+#include "Sentence.h"
+#include "PrefixTreeMap.h"
+
+namespace Moses
+{
+
+class Phrase;
+class InputType;
+class ConfusionNet;
+
+//additional types
+
+typedef std::vector<float> Score;
+typedef std::vector<FactorType> FactorList;
+
+class LexicalReorderingTable {
+ public:
+ LexicalReorderingTable(const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors)
+ : m_FactorsF(f_factors), m_FactorsE(e_factors), m_FactorsC(c_factors) {
+ }
+ virtual ~LexicalReorderingTable(){
+ }
+ public:
+ static LexicalReorderingTable* LoadAvailable(const std::string& filePath, const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors);
+ public:
+ virtual Score GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0;
+ virtual void InitializeForInput(const InputType&){
+ /* override for on-demand loading */
+ };
+ virtual void InitializeForInputPhrase(const Phrase&){
+ };
+ /*
+ int GetNumScoreComponents() const {
+ return m_NumScores;
+ }
+ */
+ const FactorList& GetFFactorMask() const {
+ return m_FactorsF;
+ }
+ const FactorList& GetEFactorMask() const {
+ return m_FactorsE;
+ }
+ const FactorList& GetCFactorMask() const {
+ return m_FactorsC;
+ }
+ virtual void DbgDump(std::ostream* out) const{
+ *out << "Overwrite in subclass...\n";
+ };
+ protected:
+ FactorList m_FactorsF;
+ FactorList m_FactorsE;
+ FactorList m_FactorsC;
+};
+
+class LexicalReorderingTableMemory : public LexicalReorderingTable {
+ //implements LexicalReorderingTable saving all scores in one large std::map<> thingy
+ //to be used for non binary tables... uses a LOT of memory
+ public:
+ LexicalReorderingTableMemory( const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors);
+ virtual ~LexicalReorderingTableMemory();
+ public:
+ virtual std::vector<float> GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
+ void DbgDump(std::ostream* out) const;
+ private:
+ std::string MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const;
+ std::string MakeKey(const std::string& f, const std::string& e, const std::string& c) const;
+
+ void LoadFromFile(const std::string& filePath);
+ private:
+ typedef std::map< std::string, std::vector<float> > TableType;
+ TableType m_Table;
+};
+
+class LexicalReorderingTableTree : public LexicalReorderingTable {
+ //implements LexicalReorderingTable using the crafty PDT code...
+ public:
+ LexicalReorderingTableTree(const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors);
+ ~LexicalReorderingTableTree();
+ public:
+ bool IsCacheEnabled() const {
+ return m_UseCache;
+ };
+ void EnableCache() {
+ m_UseCache = true;
+ };
+ void DisableCache() {
+ m_UseCache = false;
+ };
+ void ClearCache(){
+ if (m_UseCache) {
+ m_Cache.clear();
+ }
+ };
+
+ virtual std::vector<float> GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
+
+ virtual void InitializeForInput(const InputType& input);
+ virtual void InitializeForInputPhrase(const Phrase& f){
+ ClearCache();
+ auxCacheForSrcPhrase(f);
+ }
+ public:
+ static bool Create(std::istream& inFile, const std::string& outFileName);
+ private:
+ std::string MakeCacheKey(const Phrase& f, const Phrase& e) const;
+ IPhrase MakeTableKey(const Phrase& f, const Phrase& e) const;
+
+ void Cache(const ConfusionNet& input);
+ void Cache(const Sentence& input);
+
+ void auxCacheForSrcPhrase(const Phrase& f);
+ Score auxFindScoreForContext(const Candidates& cands, const Phrase& contex);
+ private:
+ //typedef LexicalReorderingCand CandType;
+ typedef std::map< std::string, Candidates > CacheType;
+ #ifdef WITH_THREADS
+ typedef boost::thread_specific_ptr<PrefixTreeMap> TableType;
+ #else
+ typedef std::auto_ptr<PrefixTreeMap> TableType;
+ #endif
+
+ static const int SourceVocId = 0;
+ static const int TargetVocId = 1;
+
+ bool m_UseCache;
+ std::string m_FilePath;
+ CacheType m_Cache;
+ TableType m_Table;
+};
+
+}
+
+#endif
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
new file mode 100644
index 000000000..60fd9bc17
--- /dev/null
+++ b/moses/src/Makefile.am
@@ -0,0 +1,232 @@
+lib_LTLIBRARIES = libmoses.la
+AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
+
+libmoses_ladir = ${includedir}
+
+libmoses_la_HEADERS = \
+ TypeDef.h \
+ PrefixTree.h \
+ File.h \
+ FilePtr.h \
+ ObjectPool.h \
+ BitmapContainer.h \
+ ConfusionNet.h \
+ DecodeGraph.h \
+ DecodeStep.h \
+ DecodeStepGeneration.h \
+ DecodeStepTranslation.h \
+ Dictionary.h \
+ DPR_reordering.h \
+ DummyScoreProducers.h \
+ Factor.h \
+ FactorCollection.h \
+ FactorTypeSet.h \
+ FeatureFunction.h \
+ FFState.h \
+ FloydWarshall.h \
+ GenerationDictionary.h \
+ GlobalLexicalModel.h \
+ hash.h \
+ Hypothesis.h \
+ HypothesisStack.h \
+ HypothesisStackCubePruning.h \
+ HypothesisStackNormal.h \
+ InputType.h \
+ InputFileStream.h \
+ LMList.h \
+ LVoc.h \
+ LanguageModel.h \
+ LanguageModelFactory.h \
+ LanguageModelInternal.h \
+ LanguageModelMultiFactor.h \
+ LanguageModelRemote.h \
+ LanguageModelSingleFactor.h \
+ LanguageModelSkip.h \
+ TrellisPath.h \
+ TrellisPathList.h \
+ TrellisPathCollection.h \
+ LexicalReordering.h \
+ LexicalReorderingTable.h \
+ Manager.h \
+ NGramCollection.h \
+ NGramNode.h \
+ PCNTools.h \
+ Parameter.h \
+ PartialTranslOptColl.h \
+ Phrase.h \
+ PhraseDictionary.h \
+ PhraseDictionaryMemory.h \
+ PhraseDictionaryNode.h \
+ PhraseDictionaryTree.h \
+ PhraseDictionaryTreeAdaptor.h \
+ PrefixTreeMap.h \
+ ReorderingConstraint.h \
+ ScoreComponentCollection.h \
+ ScoreIndexManager.h \
+ ScoreProducer.h \
+ Search.h \
+ SearchCubePruning.h \
+ SearchNormal.h \
+ Sentence.h \
+ SentenceStats.h \
+ SquareMatrix.h \
+ StaticData.h \
+ TargetPhrase.h \
+ TargetPhraseCollection.h \
+ Timer.h \
+ TranslationOption.h \
+ TranslationOptionCollection.h \
+ TranslationOptionCollectionText.h \
+ TranslationOptionCollectionConfusionNet.h \
+ TranslationOptionList.h \
+ UserMessage.h \
+ Util.h \
+ Word.h \
+ WordsBitmap.h \
+ WordLattice.h \
+ WordsRange.h \
+ XmlOption.h
+
+if PROTOBUF
+libmoses_la_HEADERS += rule.pb.h hypergraph.pb.h
+endif
+
+if SRI_LM
+libmoses_la_HEADERS += LanguageModelSRI.h
+endif
+
+if IRST_LM
+libmoses_la_HEADERS += LanguageModelIRST.h
+endif
+
+if RAND_LM
+libmoses_la_HEADERS += LanguageModelRandLM.h
+endif
+
+if INTERNAL_LM
+libmoses_la_HEADERS += LanguageModelInternal.h \
+ NGramCollection.h \
+ NGramNode.h
+endif
+
+libmoses_la_SOURCES = \
+ BitmapContainer.cpp \
+ ConfusionNet.cpp \
+ DecodeGraph.cpp \
+ DecodeStep.cpp \
+ DecodeStepGeneration.cpp \
+ DecodeStepTranslation.cpp \
+ Dictionary.cpp \
+ DummyScoreProducers.cpp \
+ DPR_reordering.cpp \
+ Factor.cpp \
+ FactorCollection.cpp \
+ FactorTypeSet.cpp \
+ FeatureFunction.cpp \
+ FFState.cpp \
+ FloydWarshall.cpp \
+ GenerationDictionary.cpp \
+ GlobalLexicalModel.cpp \
+ hash.cpp \
+ Hypothesis.cpp \
+ HypothesisStack.cpp \
+ HypothesisStackCubePruning.cpp \
+ HypothesisStackNormal.cpp \
+ InputType.cpp \
+ InputFileStream.cpp \
+ LMList.cpp \
+ LVoc.cpp \
+ LanguageModel.cpp \
+ LanguageModelFactory.cpp \
+ LanguageModelInternal.cpp \
+ LanguageModelMultiFactor.cpp \
+ LanguageModelRemote.cpp \
+ LanguageModelSingleFactor.cpp \
+ LanguageModelSkip.cpp \
+ TrellisPath.cpp \
+ TrellisPathCollection.cpp \
+ LexicalReordering.cpp \
+ LexicalReorderingTable.cpp \
+ Manager.cpp \
+ NGramCollection.cpp \
+ NGramNode.cpp \
+ PCNTools.cpp \
+ Parameter.cpp \
+ PartialTranslOptColl.cpp \
+ Phrase.cpp \
+ PhraseDictionary.cpp \
+ PhraseDictionaryMemory.cpp \
+ PhraseDictionaryNode.cpp \
+ PhraseDictionaryTree.cpp \
+ PhraseDictionaryTreeAdaptor.cpp \
+ PrefixTreeMap.cpp \
+ ReorderingConstraint.cpp \
+ ScoreComponentCollection.cpp \
+ ScoreIndexManager.cpp \
+ ScoreProducer.cpp \
+ Search.cpp \
+ SearchCubePruning.cpp \
+ SearchNormal.cpp \
+ Sentence.cpp \
+ SentenceStats.cpp \
+ SquareMatrix.cpp \
+ StaticData.cpp \
+ TargetPhrase.cpp \
+ TargetPhraseCollection.cpp \
+ Timer.cpp \
+ TranslationOption.cpp \
+ TranslationOptionCollection.cpp \
+ TranslationOptionCollectionText.cpp \
+ TranslationOptionCollectionConfusionNet.cpp \
+ TranslationOptionList.cpp \
+ UserMessage.cpp \
+ Util.cpp \
+ Word.cpp \
+ WordsBitmap.cpp \
+ WordLattice.cpp \
+ WordsRange.cpp \
+ XmlOption.cpp
+
+if PROTOBUF
+BUILT_SOURCES = \
+ rule.pb.h \
+ rule.pb.cc \
+ hypergraph.pb.h \
+ hypergraph.pb.cc
+
+CLEANFILES = $(BUILT_SOURCES)
+SUFFIXES = .proto
+
+rule.pb.cc: rule.proto
+ @PROTOC@ --cpp_out=. $<
+rule.pb.h: rule.proto
+ @PROTOC@ --cpp_out=. $<
+
+hypergraph.pb.cc: hypergraph.proto
+ @PROTOC@ --cpp_out=. $<
+hypergraph.pb.h: hypergraph.proto
+ @PROTOC@ --cpp_out=. $<
+
+libmoses_la_SOURCES += rule.pb.cc hypergraph.pb.cc
+
+endif
+
+if SRI_LM
+libmoses_la_SOURCES += LanguageModelSRI.cpp
+endif
+
+if IRST_LM
+libmoses_la_SOURCES += LanguageModelIRST.cpp
+endif
+
+if RAND_LM
+libmoses_la_SOURCES += LanguageModelRandLM.cpp
+endif
+
+if INTERNAL_LM
+libmoses_la_SOURCES += LanguageModelInternal.cpp \
+ NGramCollection.cpp \
+ NGramNode.cpp
+endif
+
+libmoses_la_LIBADD = $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
new file mode 100644
index 000000000..22c1486fe
--- /dev/null
+++ b/moses/src/Manager.cpp
@@ -0,0 +1,859 @@
+// $Id: Manager.cpp 2958 2010-03-08 16:30:31Z abarun $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+#ifdef WIN32
+#include <hash_set>
+#else
+#include <ext/hash_set>
+#endif
+
+#include <limits>
+#include <cmath>
+#include "Manager.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "TargetPhrase.h"
+#include "TrellisPath.h"
+#include "TrellisPathCollection.h"
+#include "TranslationOption.h"
+#include "LMList.h"
+#include "TranslationOptionCollection.h"
+#include "DummyScoreProducers.h"
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#include "rule.pb.h"
+#endif
+
+using namespace std;
+
+namespace Moses
+{
+Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm)
+:m_source(source)
+,m_transOptColl(source.CreateTranslationOptionCollection())
+,m_search(Search::CreateSearch(*this, source, searchAlgorithm, *m_transOptColl))
+,m_start(clock())
+,interrupted_flag(0)
+{
+ const StaticData &staticData = StaticData::Instance();
+ staticData.InitializeBeforeSentenceProcessing(source);
+}
+
+Manager::~Manager()
+{
+ delete m_transOptColl;
+ delete m_search;
+
+ StaticData::Instance().CleanUpAfterSentenceProcessing();
+
+ clock_t end = clock();
+ float et = (end - m_start);
+ et /= (float)CLOCKS_PER_SEC;
+ VERBOSE(1, "Translation took " << et << " seconds" << endl);
+ VERBOSE(1, "Finished translating" << endl);
+}
+
+/**
+ * Main decoder loop that translates a sentence by expanding
+ * hypotheses stack by stack, until the end of the sentence.
+ */
+void Manager::ProcessSentence()
+{
+ // reset statistics
+ const StaticData &staticData = StaticData::Instance();
+ ResetSentenceStats(m_source);
+
+ // collect translation options for this sentence
+ vector <DecodeGraph*>
+ decodeStepVL = staticData.GetDecodeStepVL(m_source);
+ m_transOptColl->CreateTranslationOptions(decodeStepVL);
+
+ // some reporting on how long this took
+ clock_t gotOptions = clock();
+ float et = (gotOptions - m_start);
+ IFVERBOSE(2) { GetSentenceStats().AddTimeCollectOpts( gotOptions - m_start ); }
+ et /= (float)CLOCKS_PER_SEC;
+ VERBOSE(1, "Collecting options took " << et << " seconds" << endl);
+
+ // search for best translation with the specified algorithm
+ m_search->ProcessSentence();
+ VERBOSE(1, "Search took " << ((clock()-m_start)/(float)CLOCKS_PER_SEC) << " seconds" << endl);
+ RemoveAllInColl(decodeStepVL);
+}
+
+/**
+ * Print all derivations in search graph. Note: The number of derivations is exponential in the sentence length
+ *
+ */
+
+void Manager::PrintAllDerivations(long translationId ) const
+{
+ const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+
+ vector<const Hypothesis*> sortedPureHypo = hypoStackColl.back()->GetSortedList();
+
+ if (sortedPureHypo.size() == 0)
+ return;
+
+ float remainingScore = 0;
+ vector<const TargetPhrase*> remainingPhrases;
+
+ // add all pure paths
+ vector<const Hypothesis*>::const_iterator iterBestHypo;
+ for (iterBestHypo = sortedPureHypo.begin()
+ ; iterBestHypo != sortedPureHypo.end()
+ ; ++iterBestHypo)
+ {
+ printThisHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore);
+ printDivergentHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore);
+ }
+}
+
+
+void Manager::printDivergentHypothesis(long translationId, const Hypothesis* hypo, const vector <const TargetPhrase*> & remainingPhrases, float remainingScore ) const
+{
+ //Backtrack from the predecessor
+ if (hypo->GetId() > 0) {
+ vector <const TargetPhrase*> followingPhrases;
+ followingPhrases.push_back(& (hypo->GetCurrTargetPhrase()));
+ ///((Phrase) hypo->GetPrevHypo()->GetTargetPhrase());
+ followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
+ printDivergentHypothesis(translationId, hypo->GetPrevHypo(), followingPhrases , remainingScore + hypo->GetScore() - hypo->GetPrevHypo()->GetScore());
+ }
+
+ //Process the arcs
+ const ArcList *pAL = hypo->GetArcList();
+ if (pAL) {
+ const ArcList &arcList = *pAL;
+ // every possible Arc to replace this edge
+ ArcList::const_iterator iterArc;
+ for (iterArc = arcList.begin() ; iterArc != arcList.end() ; ++iterArc)
+ {
+ const Hypothesis *loserHypo = *iterArc;
+ const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
+ float arcScore = loserHypo->GetScore() - loserPrevHypo->GetScore();
+ vector <const TargetPhrase* > followingPhrases;
+ followingPhrases.push_back(&(loserHypo->GetCurrTargetPhrase()));
+ followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
+ printThisHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore);
+ printDivergentHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore);
+ }
+ }
+}
+
+
+void Manager::printThisHypothesis(long translationId, const Hypothesis* hypo, const vector <const TargetPhrase*> & remainingPhrases, float remainingScore ) const
+{
+
+ cerr << translationId << " ||| ";
+
+ //Yield of this hypothesis
+ hypo->ToStream(cerr);
+ for (size_t p = 0; p < remainingPhrases.size(); ++p) {
+ const TargetPhrase * phrase = remainingPhrases[p];
+ size_t size = phrase->GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++)
+ {
+ const Factor *factor = phrase->GetFactor(pos, 0);
+ cerr << *factor;
+ cerr << " ";
+ }
+ }
+
+ cerr << "||| " << hypo->GetScore() + remainingScore;
+ cerr << endl;
+}
+
+
+
+
+/**
+ * After decoding, the hypotheses in the stacks and additional arcs
+ * form a search graph that can be mined for n-best lists.
+ * The heavy lifting is done in the TrellisPath and TrellisPathCollection
+ * this function controls this for one sentence.
+ *
+ * \param count the number of n-best translations to produce
+ * \param ret holds the n-best list that was calculated
+ */
+void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) const
+{
+ if (count <= 0)
+ return;
+
+ const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+
+ vector<const Hypothesis*> sortedPureHypo = hypoStackColl.back()->GetSortedList();
+
+ if (sortedPureHypo.size() == 0)
+ return;
+
+ TrellisPathCollection contenders;
+
+ set<Phrase> distinctHyps;
+
+ // add all pure paths
+ vector<const Hypothesis*>::const_iterator iterBestHypo;
+ for (iterBestHypo = sortedPureHypo.begin()
+ ; iterBestHypo != sortedPureHypo.end()
+ ; ++iterBestHypo)
+ {
+ contenders.Add(new TrellisPath(*iterBestHypo));
+ }
+
+ // factor defines stopping point for distinct n-best list if too many candidates identical
+ size_t nBestFactor = StaticData::Instance().GetNBestFactor();
+ if (nBestFactor < 1) nBestFactor = 1000; // 0 = unlimited
+
+ // MAIN loop
+ for (size_t iteration = 0 ; (onlyDistinct ? distinctHyps.size() : ret.GetSize()) < count && contenders.GetSize() > 0 && (iteration < count * nBestFactor) ; iteration++)
+ {
+ // get next best from list of contenders
+ TrellisPath *path = contenders.pop();
+ assert(path);
+ if(onlyDistinct)
+ {
+ Phrase tgtPhrase = path->GetSurfacePhrase();
+ if (distinctHyps.insert(tgtPhrase).second)
+ ret.Add(path);
+ }
+ else
+ {
+ ret.Add(path);
+ }
+
+ // create deviations from current best
+ path->CreateDeviantPaths(contenders);
+
+ if(onlyDistinct)
+ {
+ const size_t nBestFactor = StaticData::Instance().GetNBestFactor();
+ if (nBestFactor > 0)
+ contenders.Prune(count * nBestFactor);
+ }
+ else
+ {
+ contenders.Prune(count);
+ }
+ }
+}
+
+
+
+
+void Manager::CalcDecoderStatistics() const
+{
+ const Hypothesis *hypo = GetBestHypothesis();
+ if (hypo != NULL)
+ {
+ GetSentenceStats().CalcFinalStats(*hypo);
+ IFVERBOSE(2) {
+ if (hypo != NULL) {
+ string buff;
+ string buff2;
+ TRACE_ERR( "Source and Target Units:"
+ << hypo->GetInput());
+ buff2.insert(0,"] ");
+ buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
+ buff2.insert(0,":");
+ buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());
+ buff2.insert(0,"[");
+
+ hypo = hypo->GetPrevHypo();
+ while (hypo != NULL) {
+ //dont print out the empty final hypo
+ buff.insert(0,buff2);
+ buff2.clear();
+ buff2.insert(0,"] ");
+ buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
+ buff2.insert(0,":");
+ buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());
+ buff2.insert(0,"[");
+ hypo = hypo->GetPrevHypo();
+ }
+ TRACE_ERR( buff << endl);
+ }
+ }
+ }
+}
+
+void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId)
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ const Hypothesis *prevHypo = hypo->GetPrevHypo();
+
+
+ outputWordGraphStream << "J=" << linkId++
+ << "\tS=" << prevHypo->GetId()
+ << "\tE=" << hypo->GetId()
+ << "\ta=";
+
+ // phrase table scores
+ const std::vector<PhraseDictionaryFeature*> &phraseTables = staticData.GetPhraseDictionaries();
+ std::vector<PhraseDictionaryFeature*>::const_iterator iterPhraseTable;
+ for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable)
+ {
+ const PhraseDictionaryFeature *phraseTable = *iterPhraseTable;
+ vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(phraseTable);
+
+ outputWordGraphStream << scores[0];
+ vector<float>::const_iterator iterScore;
+ for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore)
+ {
+ outputWordGraphStream << ", " << *iterScore;
+ }
+ }
+
+ // language model scores
+ outputWordGraphStream << "\tl=";
+ const LMList &lmList = staticData.GetAllLM();
+ LMList::const_iterator iterLM;
+ for (iterLM = lmList.begin() ; iterLM != lmList.end() ; ++iterLM)
+ {
+ LanguageModel *lm = *iterLM;
+ vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(lm);
+
+ outputWordGraphStream << scores[0];
+ vector<float>::const_iterator iterScore;
+ for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore)
+ {
+ outputWordGraphStream << ", " << *iterScore;
+ }
+ }
+
+ // re-ordering
+ outputWordGraphStream << "\tr=";
+
+ outputWordGraphStream << hypo->GetScoreBreakdown().GetScoreForProducer(staticData.GetDistortionScoreProducer());
+
+ // lexicalised re-ordering
+ const std::vector<LexicalReordering*> &lexOrderings = staticData.GetReorderModels();
+ std::vector<LexicalReordering*>::const_iterator iterLexOrdering;
+ for (iterLexOrdering = lexOrderings.begin() ; iterLexOrdering != lexOrderings.end() ; ++iterLexOrdering)
+ {
+ LexicalReordering *lexicalReordering = *iterLexOrdering;
+ vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(lexicalReordering);
+
+ outputWordGraphStream << scores[0];
+ vector<float>::const_iterator iterScore;
+ for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore)
+ {
+ outputWordGraphStream << ", " << *iterScore;
+ }
+ }
+
+ // words !!
+ outputWordGraphStream << "\tw=" << hypo->GetCurrTargetPhrase();
+
+ outputWordGraphStream << endl;
+}
+
+void Manager::GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ string fileName = staticData.GetParam("output-word-graph")[0];
+ bool outputNBest = Scan<bool>(staticData.GetParam("output-word-graph")[1]);
+ const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+
+ outputWordGraphStream << "VERSION=1.0" << endl
+ << "UTTERANCE=" << translationId << endl;
+
+ size_t linkId = 0;
+ size_t stackNo = 1;
+ std::vector < HypothesisStack* >::const_iterator iterStack;
+ for (iterStack = ++hypoStackColl.begin() ; iterStack != hypoStackColl.end() ; ++iterStack)
+ {
+ cerr << endl << stackNo++ << endl;
+ const HypothesisStack &stack = **iterStack;
+ HypothesisStack::const_iterator iterHypo;
+ for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+ {
+ const Hypothesis *hypo = *iterHypo;
+ OutputWordGraph(outputWordGraphStream, hypo, linkId);
+
+ if (outputNBest)
+ {
+ const ArcList *arcList = hypo->GetArcList();
+ if (arcList != NULL)
+ {
+ ArcList::const_iterator iterArcList;
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+ {
+ const Hypothesis *loserHypo = *iterArcList;
+ OutputWordGraph(outputWordGraphStream, loserHypo, linkId);
+ }
+ }
+ } //if (outputNBest)
+ } //for (iterHypo
+ } // for (iterStack
+}
+
+void OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const Hypothesis *hypo, const Hypothesis *recombinationHypo, int forward, double fscore)
+{
+ const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+ bool extendedFormat = StaticData::Instance().GetOutputSearchGraphExtended();
+ outputSearchGraphStream << translationId;
+
+ // special case: initial hypothesis
+ if ( hypo->GetId() == 0 )
+ {
+ outputSearchGraphStream << " hyp=0 stack=0";
+ if (!extendedFormat)
+ {
+ outputSearchGraphStream << " forward=" << forward << " fscore=" << fscore;
+ }
+ outputSearchGraphStream << endl;
+ return;
+ }
+
+ const Hypothesis *prevHypo = hypo->GetPrevHypo();
+
+ // output in traditional format
+ if (!extendedFormat)
+ {
+ outputSearchGraphStream << " hyp=" << hypo->GetId()
+ << " stack=" << hypo->GetWordsBitmap().GetNumWordsCovered()
+ << " back=" << prevHypo->GetId()
+ << " score=" << hypo->GetScore()
+ << " transition=" << (hypo->GetScore() - prevHypo->GetScore());
+
+ if (recombinationHypo != NULL)
+ outputSearchGraphStream << " recombined=" << recombinationHypo->GetId();
+
+ outputSearchGraphStream << " forward=" << forward << " fscore=" << fscore
+ << " covered=" << hypo->GetCurrSourceWordsRange().GetStartPos()
+ << "-" << hypo->GetCurrSourceWordsRange().GetEndPos()
+ << " out=" << hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder)
+ << endl;
+ return;
+ }
+
+ // output in extended format
+ if (recombinationHypo != NULL)
+ outputSearchGraphStream << " hyp=" << recombinationHypo->GetId();
+ else
+ outputSearchGraphStream << " hyp=" << hypo->GetId();
+
+ outputSearchGraphStream << " back=" << prevHypo->GetId();
+
+ ScoreComponentCollection scoreBreakdown = hypo->GetScoreBreakdown();
+ scoreBreakdown.MinusEquals( prevHypo->GetScoreBreakdown() );
+ outputSearchGraphStream << " [ ";
+ StaticData::Instance().GetScoreIndexManager().PrintLabeledScores( outputSearchGraphStream, scoreBreakdown );
+ outputSearchGraphStream << " ]";
+
+ outputSearchGraphStream << " out=" << hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) << endl;
+}
+
+void Manager::GetConnectedGraph(
+ std::map< int, bool >* pConnected,
+ std::vector< const Hypothesis* >* pConnectedList) const {
+ std::map < int, bool >& connected = *pConnected;
+ std::vector< const Hypothesis *>& connectedList = *pConnectedList;
+
+ // start with the ones in the final stack
+ const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+ const HypothesisStack &finalStack = *hypoStackColl.back();
+ HypothesisStack::const_iterator iterHypo;
+ for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo)
+ {
+ const Hypothesis *hypo = *iterHypo;
+ connected[ hypo->GetId() ] = true;
+ connectedList.push_back( hypo );
+ }
+
+ // move back from known connected hypotheses
+ for(size_t i=0; i<connectedList.size(); i++) {
+ const Hypothesis *hypo = connectedList[i];
+
+ // add back pointer
+ const Hypothesis *prevHypo = hypo->GetPrevHypo();
+ if (prevHypo->GetId() > 0 // don't add empty hypothesis
+ && connected.find( prevHypo->GetId() ) == connected.end()) // don't add already added
+ {
+ connected[ prevHypo->GetId() ] = true;
+ connectedList.push_back( prevHypo );
+ }
+
+ // add arcs
+ const ArcList *arcList = hypo->GetArcList();
+ if (arcList != NULL)
+ {
+ ArcList::const_iterator iterArcList;
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+ {
+ const Hypothesis *loserHypo = *iterArcList;
+ if (connected.find( loserHypo->GetId() ) == connected.end()) // don't add already added
+ {
+ connected[ loserHypo->GetId() ] = true;
+ connectedList.push_back( loserHypo );
+ }
+ }
+ }
+ }
+}
+
+void Manager::GetWinnerConnectedGraph(
+ std::map< int, bool >* pConnected,
+ std::vector< const Hypothesis* >* pConnectedList) const {
+ std::map < int, bool >& connected = *pConnected;
+ std::vector< const Hypothesis *>& connectedList = *pConnectedList;
+
+ // start with the ones in the final stack
+ const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+ const HypothesisStack &finalStack = *hypoStackColl.back();
+ HypothesisStack::const_iterator iterHypo;
+ for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo)
+ {
+ const Hypothesis *hypo = *iterHypo;
+ connected[ hypo->GetId() ] = true;
+ connectedList.push_back( hypo );
+ }
+
+ // move back from known connected hypotheses
+ for(size_t i=0; i<connectedList.size(); i++) {
+ const Hypothesis *hypo = connectedList[i];
+
+ // add back pointer
+ const Hypothesis *prevHypo = hypo->GetPrevHypo();
+ if (prevHypo->GetId() > 0 // don't add empty hypothesis
+ && connected.find( prevHypo->GetId() ) == connected.end()) // don't add already added
+ {
+ connected[ prevHypo->GetId() ] = true;
+ connectedList.push_back( prevHypo );
+ }
+
+ // add arcs
+ const ArcList *arcList = hypo->GetArcList();
+ if (arcList != NULL)
+ {
+ ArcList::const_iterator iterArcList;
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+ {
+ const Hypothesis *loserHypo = *iterArcList;
+ if (connected.find( loserHypo->GetPrevHypo()->GetId() ) == connected.end() && loserHypo->GetPrevHypo()->GetId() > 0) // don't add already added & don't add hyp 0
+ {
+ connected[ loserHypo->GetPrevHypo()->GetId() ] = true;
+ connectedList.push_back( loserHypo->GetPrevHypo() );
+ }
+ }
+ }
+ }
+}
+
+
+#ifdef HAVE_PROTOBUF
+
+void SerializeEdgeInfo(const Hypothesis* hypo, hgmert::Hypergraph_Edge* edge) {
+ hgmert::Rule* rule = edge->mutable_rule();
+ hypo->GetCurrTargetPhrase().WriteToRulePB(rule);
+ const Hypothesis* prev = hypo->GetPrevHypo();
+ // if the feature values are empty, they default to 0
+ if (!prev) return;
+ // score breakdown is an aggregate (forward) quantity, but the exported
+ // graph object just wants the feature values on the edges
+ const ScoreComponentCollection& scores = hypo->GetScoreBreakdown();
+ const ScoreComponentCollection& pscores = prev->GetScoreBreakdown();
+ for (unsigned int i = 0; i < scores.size(); ++i)
+ edge->add_feature_values((scores[i] - pscores[i]) * -1.0);
+}
+
+hgmert::Hypergraph_Node* GetHGNode(
+ const Hypothesis* hypo,
+ std::map< int, int>* i2hgnode,
+ hgmert::Hypergraph* hg,
+ int* hgNodeIdx) {
+ hgmert::Hypergraph_Node* hgnode;
+ std::map < int, int >::iterator idxi = i2hgnode->find(hypo->GetId());
+ if (idxi == i2hgnode->end()) {
+ *hgNodeIdx = ((*i2hgnode)[hypo->GetId()] = hg->nodes_size());
+ hgnode = hg->add_nodes();
+ } else {
+ *hgNodeIdx = idxi->second;
+ hgnode = hg->mutable_nodes(*hgNodeIdx);
+ }
+ return hgnode;
+}
+
+void Manager::SerializeSearchGraphPB(
+ long translationId,
+ std::ostream& outputStream) const {
+ using namespace hgmert;
+ std::map < int, bool > connected;
+ std::map < int, int > i2hgnode;
+ std::vector< const Hypothesis *> connectedList;
+ GetConnectedGraph(&connected, &connectedList);
+ connected[ 0 ] = true;
+ Hypergraph hg;
+ hg.set_is_sorted(false);
+ int num_feats = (*m_search->GetHypothesisStacks().back()->begin())->GetScoreBreakdown().size();
+ hg.set_num_features(num_feats);
+ StaticData::Instance().GetScoreIndexManager().SerializeFeatureNamesToPB(&hg);
+ Hypergraph_Node* goal = hg.add_nodes(); // idx=0 goal node must have idx 0
+ Hypergraph_Node* source = hg.add_nodes(); // idx=1
+ i2hgnode[-1] = 1; // source node
+ const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+ const HypothesisStack &finalStack = *hypoStackColl.back();
+ for (std::vector < HypothesisStack* >::const_iterator iterStack = hypoStackColl.begin();
+ iterStack != hypoStackColl.end() ; ++iterStack)
+ {
+ const HypothesisStack &stack = **iterStack;
+ HypothesisStack::const_iterator iterHypo;
+
+ for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+ {
+ const Hypothesis *hypo = *iterHypo;
+ bool is_goal = hypo->GetWordsBitmap().IsComplete();
+ if (connected.find( hypo->GetId() ) != connected.end())
+ {
+ int headNodeIdx;
+ Hypergraph_Node* headNode = GetHGNode(hypo, &i2hgnode, &hg, &headNodeIdx);
+ if (is_goal) {
+ Hypergraph_Edge* ge = hg.add_edges();
+ ge->set_head_node(0); // goal
+ ge->add_tail_nodes(headNodeIdx);
+ ge->mutable_rule()->add_trg_words("[X,1]");
+ }
+ Hypergraph_Edge* edge = hg.add_edges();
+ SerializeEdgeInfo(hypo, edge);
+ edge->set_head_node(headNodeIdx);
+ const Hypothesis* prev = hypo->GetPrevHypo();
+ int tailNodeIdx = 1; // source
+ if (prev)
+ tailNodeIdx = i2hgnode.find(prev->GetId())->second;
+ edge->add_tail_nodes(tailNodeIdx);
+
+ const ArcList *arcList = hypo->GetArcList();
+ if (arcList != NULL)
+ {
+ ArcList::const_iterator iterArcList;
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+ {
+ const Hypothesis *loserHypo = *iterArcList;
+ assert(connected[loserHypo->GetId()]);
+ Hypergraph_Edge* edge = hg.add_edges();
+ SerializeEdgeInfo(loserHypo, edge);
+ edge->set_head_node(headNodeIdx);
+ tailNodeIdx = i2hgnode.find(loserHypo->GetPrevHypo()->GetId())->second;
+ edge->add_tail_nodes(tailNodeIdx);
+ }
+ } // end if arcList empty
+ } // end if connected
+ } // end for iterHypo
+ } // end for iterStack
+ hg.SerializeToOstream(&outputStream);
+}
+#endif
+
+void Manager::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const
+{
+ std::map < int, bool > connected;
+ std::map < int, int > forward;
+ std::map < int, double > forwardScore;
+
+ // *** find connected hypotheses ***
+ std::vector< const Hypothesis *> connectedList;
+ GetConnectedGraph(&connected, &connectedList);
+
+ // ** compute best forward path for each hypothesis *** //
+
+ // forward cost of hypotheses on final stack is 0
+ const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+ const HypothesisStack &finalStack = *hypoStackColl.back();
+ HypothesisStack::const_iterator iterHypo;
+ for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo)
+ {
+ const Hypothesis *hypo = *iterHypo;
+ forwardScore[ hypo->GetId() ] = 0.0f;
+ forward[ hypo->GetId() ] = -1;
+ }
+
+ // compete for best forward score of previous hypothesis
+ std::vector < HypothesisStack* >::const_iterator iterStack;
+ for (iterStack = --hypoStackColl.end() ; iterStack != hypoStackColl.begin() ; --iterStack)
+ {
+ const HypothesisStack &stack = **iterStack;
+ HypothesisStack::const_iterator iterHypo;
+ for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+ {
+ const Hypothesis *hypo = *iterHypo;
+ if (connected.find( hypo->GetId() ) != connected.end())
+ {
+ // make a play for previous hypothesis
+ const Hypothesis *prevHypo = hypo->GetPrevHypo();
+ double fscore = forwardScore[ hypo->GetId() ] +
+ hypo->GetScore() - prevHypo->GetScore();
+ if (forwardScore.find( prevHypo->GetId() ) == forwardScore.end()
+ || forwardScore.find( prevHypo->GetId() )->second < fscore)
+ {
+ forwardScore[ prevHypo->GetId() ] = fscore;
+ forward[ prevHypo->GetId() ] = hypo->GetId();
+ }
+ // all arcs also make a play
+ const ArcList *arcList = hypo->GetArcList();
+ if (arcList != NULL)
+ {
+ ArcList::const_iterator iterArcList;
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+ {
+ const Hypothesis *loserHypo = *iterArcList;
+ // make a play
+ const Hypothesis *loserPrevHypo = loserHypo->GetPrevHypo();
+ double fscore = forwardScore[ hypo->GetId() ] +
+ loserHypo->GetScore() - loserPrevHypo->GetScore();
+ if (forwardScore.find( loserPrevHypo->GetId() ) == forwardScore.end()
+ || forwardScore.find( loserPrevHypo->GetId() )->second < fscore)
+ {
+ forwardScore[ loserPrevHypo->GetId() ] = fscore;
+ forward[ loserPrevHypo->GetId() ] = loserHypo->GetId();
+ }
+ } // end for arc list
+ } // end if arc list empty
+ } // end if hypo connected
+ } // end for hypo
+ } // end for stack
+
+ // *** output all connected hypotheses *** //
+
+ connected[ 0 ] = true;
+ for (iterStack = hypoStackColl.begin() ; iterStack != hypoStackColl.end() ; ++iterStack)
+ {
+ const HypothesisStack &stack = **iterStack;
+ HypothesisStack::const_iterator iterHypo;
+ for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+ {
+ const Hypothesis *hypo = *iterHypo;
+ if (connected.find( hypo->GetId() ) != connected.end())
+ {
+ OutputSearchGraph(translationId, outputSearchGraphStream, hypo, NULL, forward[ hypo->GetId() ], forwardScore[ hypo->GetId() ]);
+
+ const ArcList *arcList = hypo->GetArcList();
+ if (arcList != NULL)
+ {
+ ArcList::const_iterator iterArcList;
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+ {
+ const Hypothesis *loserHypo = *iterArcList;
+ OutputSearchGraph(translationId, outputSearchGraphStream, loserHypo, hypo, forward[ hypo->GetId() ], forwardScore[ hypo->GetId() ]);
+ }
+ } // end if arcList empty
+ } // end if connected
+ } // end for iterHypo
+ } // end for iterStack
+}
+
+ void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
+ std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, set< const Hypothesis* > >* pOutgoingHyps, vector< float>* pFwdBwdScores) const
+ {
+ std::map < int, bool > &connected = *pConnected;
+ std::vector< const Hypothesis *>& connectedList = *pConnectedList;
+ std::map < int, int > forward;
+ std::map < int, double > forwardScore;
+
+ std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps = *pOutgoingHyps;
+ vector< float> & estimatedScores = *pFwdBwdScores;
+
+ // *** find connected hypotheses ***
+ GetWinnerConnectedGraph(&connected, &connectedList);
+
+ // ** compute best forward path for each hypothesis *** //
+
+ // forward cost of hypotheses on final stack is 0
+ const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
+ const HypothesisStack &finalStack = *hypoStackColl.back();
+ HypothesisStack::const_iterator iterHypo;
+ for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo)
+ {
+ const Hypothesis *hypo = *iterHypo;
+ forwardScore[ hypo->GetId() ] = 0.0f;
+ forward[ hypo->GetId() ] = -1;
+ }
+
+ // compete for best forward score of previous hypothesis
+ std::vector < HypothesisStack* >::const_iterator iterStack;
+ for (iterStack = --hypoStackColl.end() ; iterStack != hypoStackColl.begin() ; --iterStack)
+ {
+ const HypothesisStack &stack = **iterStack;
+ HypothesisStack::const_iterator iterHypo;
+ for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
+ {
+ const Hypothesis *hypo = *iterHypo;
+ if (connected.find( hypo->GetId() ) != connected.end())
+ {
+ // make a play for previous hypothesis
+ const Hypothesis *prevHypo = hypo->GetPrevHypo();
+ double fscore = forwardScore[ hypo->GetId() ] +
+ hypo->GetScore() - prevHypo->GetScore();
+ if (forwardScore.find( prevHypo->GetId() ) == forwardScore.end()
+ || forwardScore.find( prevHypo->GetId() )->second < fscore)
+ {
+ forwardScore[ prevHypo->GetId() ] = fscore;
+ forward[ prevHypo->GetId() ] = hypo->GetId();
+ }
+ //store outgoing info
+ outgoingHyps[prevHypo].insert(hypo);
+
+ // all arcs also make a play
+ const ArcList *arcList = hypo->GetArcList();
+ if (arcList != NULL)
+ {
+ ArcList::const_iterator iterArcList;
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
+ {
+ const Hypothesis *loserHypo = *iterArcList;
+ // make a play
+ const Hypothesis *loserPrevHypo = loserHypo->GetPrevHypo();
+ double fscore = forwardScore[ hypo->GetId() ] +
+ loserHypo->GetScore() - loserPrevHypo->GetScore();
+ if (forwardScore.find( loserPrevHypo->GetId() ) == forwardScore.end()
+ || forwardScore.find( loserPrevHypo->GetId() )->second < fscore)
+ {
+ forwardScore[ loserPrevHypo->GetId() ] = fscore;
+ forward[ loserPrevHypo->GetId() ] = loserHypo->GetId();
+ }
+ //store outgoing info
+ outgoingHyps[loserPrevHypo].insert(hypo);
+
+
+ } // end for arc list
+ } // end if arc list empty
+ } // end if hypo connected
+ } // end for hypo
+ } // end for stack
+
+ for (std::vector< const Hypothesis *>::iterator it = connectedList.begin(); it != connectedList.end(); ++it) {
+ float estimatedScore = (*it)->GetScore() + forwardScore[(*it)->GetId()];
+ estimatedScores.push_back(estimatedScore);
+ }
+}
+
+
+const Hypothesis *Manager::GetBestHypothesis() const
+{
+ return m_search->GetBestHypothesis();
+}
+
+}
+
diff --git a/moses/src/Manager.h b/moses/src/Manager.h
new file mode 100644
index 000000000..96bbbcc51
--- /dev/null
+++ b/moses/src/Manager.h
@@ -0,0 +1,141 @@
+// $Id: Manager.h 2957 2010-03-08 15:28:40Z abarun $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_Manager_h
+#define moses_Manager_h
+
+#include <vector>
+#include <list>
+#include <ctime>
+#include "InputType.h"
+#include "Hypothesis.h"
+#include "StaticData.h"
+#include "TranslationOption.h"
+#include "TranslationOptionCollection.h"
+#include "TrellisPathList.h"
+#include "SquareMatrix.h"
+#include "WordsBitmap.h"
+#include "Search.h"
+#include "SearchCubePruning.h"
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+namespace Moses
+{
+
+class TrellisPath;
+class TranslationOptionCollection;
+
+/** The Manager class implements a stack decoding algorithm.
+ * Hypotheses are organized in stacks. One stack contains all hypothesis that have
+ * the same number of foreign words translated. The data structure for hypothesis
+ * stacks is the class HypothesisStack. The data structure for a hypothesis
+ * is the class Hypothesis.
+ *
+ * The main decoder loop in the function ProcessSentence() consists of the steps:
+ * - Create the list of possible translation options. In phrase-based decoding
+ * (and also the first mapping step in the factored model) is a phrase translation
+ * from the source to the target. Given a specific input sentence, only a limited
+ * number of phrase translation can be applied. For efficient lookup of the
+ * translation options later, these optuions are first collected in the function
+ * CreateTranslationOption (for more information check the class
+ * TranslationOptionCollection)
+ * - Create initial hypothesis: Hypothesis stack 0 contains only one empty hypothesis.
+ * - Going through stacks 0 ... (sentence_length-1):
+ * - The stack is pruned to the maximum size
+ * - Going through all hypotheses in the stack
+ * - Each hypothesis is expanded by ProcessOneHypothesis()
+ * - Expansion means applying a translation option to the hypothesis to create
+ * new hypotheses
+ * - What translation options may be applied depends on reordering limits and
+ * overlap with already translated words
+ * - With a applicable translation option and a hypothesis at hand, a new
+ * hypothesis can be created in ExpandHypothesis()
+ * - New hypothesis are either discarded (because they are too bad), added to
+ * the appropriate stack, or re-combined with existing hypotheses
+ **/
+
+class Manager
+{
+ Manager();
+ Manager(Manager const&);
+ void operator=(Manager const&);
+protected:
+ // data
+ InputType const& m_source; /**< source sentence to be translated */
+ TranslationOptionCollection *m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
+ Search *m_search;
+
+ HypothesisStack* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/
+ clock_t m_start; /**< starting time, used for logging */
+ size_t interrupted_flag;
+ void GetConnectedGraph(
+ std::map< int, bool >* pConnected,
+ std::vector< const Hypothesis* >* pConnectedList) const;
+ void GetWinnerConnectedGraph(
+ std::map< int, bool >* pConnected,
+ std::vector< const Hypothesis* >* pConnectedList) const;
+
+
+public:
+ Manager(InputType const& source, SearchAlgorithm searchAlgorithm);
+ ~Manager();
+
+ void ProcessSentence();
+ const Hypothesis *GetBestHypothesis() const;
+ const Hypothesis *GetActualBestHypothesis() const;
+ void CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct=0) const;
+ void PrintAllDerivations(long translationId) const;
+ void printDivergentHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase*> & remainingPhrases, float remainingScore ) const;
+ void printThisHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase* > & remainingPhrases, float remainingScore ) const;
+ void GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const;
+#ifdef HAVE_PROTOBUF
+ void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;
+#endif
+
+ void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
+ const InputType& GetSource() const {return m_source;}
+
+ /***
+ * to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
+ */
+ void CalcDecoderStatistics() const;
+ void ResetSentenceStats(const InputType& source)
+ {
+ m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
+ }
+ SentenceStats& GetSentenceStats() const
+ {
+ return *m_sentenceStats;
+ }
+
+ /***
+ *For Lattice MBR
+ */
+ void GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
+ std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, set < const Hypothesis* > >* pOutgoingHyps, vector< float>* pFwdBwdScores) const;
+
+ std::auto_ptr<SentenceStats> m_sentenceStats;
+};
+
+}
+#endif
diff --git a/moses/src/NGramCollection.cpp b/moses/src/NGramCollection.cpp
new file mode 100644
index 000000000..b73afbf5c
--- /dev/null
+++ b/moses/src/NGramCollection.cpp
@@ -0,0 +1,67 @@
+// $Id: NGramCollection.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "NGramCollection.h"
+#include "NGramNode.h"
+
+namespace Moses
+{
+NGramCollection::~NGramCollection()
+{
+ Collection::iterator iter;
+ for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter)
+ {
+ delete (iter->second);
+ }
+}
+
+void NGramCollection::Add(const Factor *factor, const NGramNode &ngramNode)
+{
+}
+
+NGramNode *NGramCollection::GetOrCreateNGram(const Factor *factor)
+{
+ Collection::iterator iter = m_collection.find(factor);
+ if (iter == m_collection.end())
+ {
+ return (m_collection[factor] = new NGramNode());
+ }
+ else
+ {
+ return (iter->second);
+ }
+}
+
+NGramNode *NGramCollection::GetNGram(const Factor *factor)
+{
+ Collection::iterator iter = m_collection.find(factor);
+ return (iter == m_collection.end()) ? NULL : (iter->second) ;
+}
+
+const NGramNode *NGramCollection::GetNGram(const Factor *factor) const
+{
+ Collection::const_iterator iter = m_collection.find(factor);
+ return (iter == m_collection.end()) ? NULL : (iter->second) ;
+}
+
+}
+
+
diff --git a/moses/src/NGramCollection.h b/moses/src/NGramCollection.h
new file mode 100644
index 000000000..b1f90a4a2
--- /dev/null
+++ b/moses/src/NGramCollection.h
@@ -0,0 +1,57 @@
+// $Id: NGramCollection.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_NGramCollection_h
+#define moses_NGramCollection_h
+
+#include <map>
+#include <vector>
+#include "NGramNode.h"
+
+namespace Moses
+{
+
+class Factor;
+
+typedef std::vector<const Factor*> FactorVector;
+
+class NGramCollection
+{
+protected:
+ typedef std::map<const Factor*, NGramNode*> Collection;
+ Collection m_collection;
+
+ void Add(const Factor *factor, const NGramNode &ngramNode);
+public:
+ NGramCollection()
+ {
+ }
+ ~NGramCollection();
+
+ NGramNode *GetOrCreateNGram(const Factor *factor);
+ NGramNode *GetNGram(const Factor *factor);
+ const NGramNode *GetNGram(const Factor *factor) const;
+
+};
+
+}
+
+#endif
diff --git a/moses/src/NGramNode.cpp b/moses/src/NGramNode.cpp
new file mode 100644
index 000000000..7cf2d7c5c
--- /dev/null
+++ b/moses/src/NGramNode.cpp
@@ -0,0 +1,26 @@
+
+#include "NGramNode.h"
+#include "NGramCollection.h"
+
+namespace Moses
+{
+
+NGramNode::NGramNode()
+{
+ m_map = new NGramCollection();
+}
+NGramNode::~NGramNode()
+{
+ delete m_map;
+}
+
+const NGramNode *NGramNode::GetNGram(const Factor *factor) const
+{
+ return m_map->GetNGram(factor);
+}
+NGramNode *NGramNode::GetNGram(const Factor *factor)
+{
+ return m_map->GetNGram(factor);
+}
+
+}
diff --git a/moses/src/NGramNode.h b/moses/src/NGramNode.h
new file mode 100644
index 000000000..89e2d4bf2
--- /dev/null
+++ b/moses/src/NGramNode.h
@@ -0,0 +1,79 @@
+// $Id: NGramNode.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_NGramNode_h
+#define moses_NGramNode_h
+
+#include "Factor.h"
+
+namespace Moses
+{
+
+class NGramCollection;
+
+class NGramNode
+{
+protected:
+ float m_score, m_logBackOff;
+ NGramCollection *m_map;
+ NGramNode *m_rootNGram;
+public:
+ NGramNode();
+ ~NGramNode();
+ NGramCollection *GetNGramColl()
+ {
+ return m_map;
+ }
+
+ const NGramNode *GetNGram(const Factor *factor) const;
+ NGramNode *GetNGram(const Factor *factor);
+
+ const NGramNode *GetRootNGram() const
+ {
+ return m_rootNGram;
+ }
+ void SetRootNGram(NGramNode *rootNGram)
+ {
+ m_rootNGram = rootNGram;
+ }
+
+ float GetScore() const
+ {
+ return m_score;
+ }
+ float GetLogBackOff() const
+ {
+ return m_logBackOff;
+ }
+ void SetScore(float score)
+ {
+ m_score = score;
+ }
+ void SetLogBackOff(float logBackOff)
+ {
+ m_logBackOff = logBackOff;
+ }
+
+};
+
+}
+
+#endif
diff --git a/moses/src/ObjectPool.h b/moses/src/ObjectPool.h
new file mode 100644
index 000000000..9fd64af1c
--- /dev/null
+++ b/moses/src/ObjectPool.h
@@ -0,0 +1,127 @@
+// $Id: ObjectPool.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/* ---------------------------------------------------------------- */
+/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
+/* Richard Zens */
+/* ---------------------------------------------------------------- */
+
+#ifndef moses_ObjectPool_h
+#define moses_ObjectPool_h
+
+#include <vector>
+#include <deque>
+#include <string>
+#include <iostream>
+#include <iterator>
+#include "Util.h"
+
+/***
+ * template class for pool of objects
+ * - useful if many small objects are frequently created and destroyed
+ * - allocates memory for N objects at a time
+ * - separates memory allocation from constructor/destructor calls
+ * - prevents memory leaks
+ */
+template<typename T> class ObjectPool {
+ public:
+ typedef T Object;
+ private:
+ std::string name;
+ size_t idx,dIdx,N;
+ std::vector<Object*> data;
+ std::vector<size_t> dataSize;
+ std::deque<Object*> freeObj;
+ int mode;
+ public:
+ static const int cleanUpOnDestruction=1;
+ static const int hasTrivialDestructor=2;
+
+ // constructor arguments:
+ // N: initial number of objects to allocate memory at a time
+ // m & cleanUpOnDestruction = clean up objects in destructor
+ // m & hasTrivialDestructor = the object type has a trivial destructor,
+ // i.e. no sub-object uses dynamically allocated memory
+ // note: not equivalent to empty destructor
+ // -> more efficient (destructor calls can be omitted),
+ // note: looks like memory leak, but is not
+ ObjectPool(std::string name_="T",size_t N_=100000,int m=cleanUpOnDestruction)
+ : name(name_),idx(0),dIdx(0),N(N_),mode(m) {allocate();}
+
+ // main accesss functions:
+ // get pointer to object via default or copy constructor
+ Object* get() {return new (getPtr()) Object;}
+ Object* get(const Object& x) {return new (getPtr()) Object(x);}
+
+ // get pointer to uninitialized memory,
+ // WARNING: use only if you know what you are doing !
+ // useful for non-default constructors, you have to use placement new
+ Object* getPtr() {
+ if(freeObj.size()) {
+ Object* rv=freeObj.back();freeObj.pop_back();rv->~Object();return rv;}
+ if(idx==dataSize[dIdx]) {idx=0; if(++dIdx==data.size()) allocate();}
+ return data[dIdx]+idx++;
+ }
+
+ // return object(s) to pool for reuse
+ // note: objects are not destroyed here, but in 'getPtr'/'destroyObjects',
+ // otherwise 'destroyObjects' would have to check the freeObj-stack
+ // before each destructor call
+ void freeObject(Object* x) {freeObj.push_back(x);}
+ template<class fwiter> void freeObjects(fwiter b,fwiter e) {
+ for(;b!=e;++b) this->free(*b);}
+
+ // destroy all objects, but do not free memory
+ void reset() {destroyObjects();idx=0;dIdx=0;freeObj.clear();}
+ // destroy all objects and free memory
+ void cleanUp() {
+ reset(); for(size_t i=0;i<data.size();++i) free(data[i]);
+ data.clear();dataSize.clear();
+ }
+
+ ~ObjectPool() {if(mode & cleanUpOnDestruction) cleanUp();}
+
+ void printInfo(std::ostream& out) const {
+ out<<"OPOOL ("<<name<<") info: "<<data.size()<<" "<<dataSize.size()<<" "
+ <<freeObj.size()<<"\n"<<idx<<" "<<dIdx<<" "<<N<<"\n";
+ std::copy(dataSize.begin(),dataSize.end(),
+ std::ostream_iterator<size_t>(out," "));
+ out<<"\n\n";
+ }
+
+
+ private:
+ void destroyObjects() {
+ if(mode & hasTrivialDestructor) return;
+ for(size_t i=0;i<=dIdx;++i) {
+ size_t lastJ= (i<dIdx ? dataSize[i] : idx);
+ for(size_t j=0;j<lastJ;++j) (data[i]+j)->~Object();}
+ }
+ // allocate memory for a N objects, for follow-up allocations,
+ // the block size is doubled every time
+ // if allocation fails, block size is reduced by 1/4
+ void allocate() {
+ try {
+ if(dataSize.empty()) dataSize.push_back(N);
+ else dataSize.push_back(dataSize.back()*2);
+ void *m=malloc(sizeof(Object)*dataSize.back());
+ while(!m) {
+ dataSize.back()=static_cast<size_t>(dataSize.back()*0.75);
+ m=malloc(sizeof(Object)*dataSize.back());
+ }
+ data.push_back(static_cast<Object*>(m));
+ }
+ catch (const std::exception& e) {
+ TRACE_ERR("caught std::exception: "<<e.what()
+ <<" in ObjectPool::allocate(), name: "<<name<<", last size: "
+ <<dataSize.back()<<"\n");
+ TRACE_ERR("OPOOL info: "<<data.size()<<" "<<dataSize.size()<<" "
+ <<freeObj.size()<<"\n"<<idx<<" "<<dIdx<<" "<<N<<"\n");
+ std::copy(dataSize.begin(),dataSize.end(),
+ std::ostream_iterator<size_t>(std::cerr," "));
+ TRACE_ERR("\n");
+ throw;
+ }
+ }
+};
+
+#endif
diff --git a/moses/src/PCNTools.cpp b/moses/src/PCNTools.cpp
new file mode 100644
index 000000000..35cd56a80
--- /dev/null
+++ b/moses/src/PCNTools.cpp
@@ -0,0 +1,138 @@
+#include "PCNTools.h"
+
+#include <iostream>
+#include <cstdlib>
+
+namespace PCN
+{
+
+const std::string chars = "'\\";
+const char& quote = chars[0];
+const char& slash = chars[1];
+
+// safe get
+inline char get(const std::string& in, int c) {
+ if (c < 0 || c >= (int)in.size()) return 0;
+ else return in[(size_t)c];
+}
+
+// consume whitespace
+inline void eatws(const std::string& in, int& c) {
+ while (get(in,c) == ' ') { c++; }
+}
+
+// from 'foo' return foo
+std::string getEscapedString(const std::string& in, int &c)
+{
+ eatws(in,c);
+ if (get(in,c++) != quote) return "ERROR";
+ std::string res;
+ char cur = 0;
+ do {
+ cur = get(in,c++);
+ if (cur == slash) { res += get(in,c++); }
+ else if (cur != quote) { res += cur; }
+ } while (get(in,c) != quote && (c < (int)in.size()));
+ c++;
+ eatws(in,c);
+ return res;
+}
+
+// basically atof
+float getFloat(const std::string& in, int &c)
+{
+ std::string tmp;
+ eatws(in,c);
+ while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',') {
+ tmp += get(in,c++);
+ }
+ eatws(in,c);
+ return atof(tmp.c_str());
+}
+
+// basically atof
+int getInt(const std::string& in, int &c)
+{
+ std::string tmp;
+ eatws(in,c);
+ while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',') {
+ tmp += get(in,c++);
+ }
+ eatws(in,c);
+ return atoi(tmp.c_str());
+}
+
+// parse ('foo', 0.23)
+CNAlt getCNAlt(const std::string& in, int &c)
+{
+ if (get(in,c++) != '(') { std::cerr << "PCN/PLF parse error: expected ( at start of cn alt block\n"; return CNAlt(); } // throw "expected (";
+ std::string word = getEscapedString(in,c);
+ if (get(in,c++) != ',') { std::cerr << "PCN/PLF parse error: expected , after string\n"; return CNAlt(); } // throw "expected , after string";
+ size_t cnNext = 1;
+ std::vector<float> probs;
+ probs.push_back(getFloat(in,c));
+ while (get(in,c) == ',') {
+ c++;
+ float val = getFloat(in,c);
+ probs.push_back(val);
+ }
+ //if we read more than one prob, this was a lattice, last item was column increment
+ if (probs.size()>1) {
+ cnNext = static_cast<size_t>(probs.back());
+ probs.pop_back();
+ if (cnNext < 1) { ; std::cerr << "PCN/PLF parse error: bad link length at last element of cn alt block\n"; return CNAlt(); } //throw "bad link length"
+ }
+ if (get(in,c++) != ')') { std::cerr << "PCN/PLF parse error: expected ) at end of cn alt block\n"; return CNAlt(); } // throw "expected )";
+ eatws(in,c);
+ return CNAlt(std::pair<std::string, std::vector<float> >(word,probs), cnNext);
+}
+
+// parse (('foo', 0.23), ('bar', 0.77))
+CNCol getCNCol(const std::string& in, int &c) {
+ CNCol res;
+ if (get(in,c++) != '(') return res; // error
+ eatws(in,c);
+ while (1) {
+ if (c > (int)in.size()) { break; }
+ if (get(in,c) == ')') {
+ c++;
+ eatws(in,c);
+ break;
+ }
+ if (get(in,c) == ',' && get(in,c+1) == ')') {
+ c+=2;
+ eatws(in,c);
+ break;
+ }
+ if (get(in,c) == ',') { c++; eatws(in,c); }
+ res.push_back(getCNAlt(in, c));
+ }
+ return res;
+}
+
+// parse ((('foo', 0.23), ('bar', 0.77)), (('a', 0.3), ('c', 0.7)))
+CN parsePCN(const std::string& in)
+{
+ CN res;
+ int c = 0;
+ if (in[c++] != '(') return res; // error
+ while (1) {
+ if (c > (int)in.size()) { break; }
+ if (get(in,c) == ')') {
+ c++;
+ eatws(in,c);
+ break;
+ }
+ if (get(in,c) == ',' && get(in,c+1) == ')') {
+ c+=2;
+ eatws(in,c);
+ break;
+ }
+ if (get(in,c) == ',') { c++; eatws(in,c); }
+ res.push_back(getCNCol(in, c));
+ }
+ return res;
+}
+
+}
+
diff --git a/moses/src/PCNTools.h b/moses/src/PCNTools.h
new file mode 100644
index 000000000..bd9a838d4
--- /dev/null
+++ b/moses/src/PCNTools.h
@@ -0,0 +1,46 @@
+// $Id: StaticData.h 992 2006-11-21 23:06:30Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_PCNTools
+#define moses_PCNTools
+
+#include <vector>
+#include <string>
+#include <utility>
+#include <cstdlib>
+
+/** A couple of utilities to read .pcn files. A python-compatible format
+ * for encoding confusion networks.
+ */
+namespace PCN {
+
+ typedef std::pair<std::pair<std::string, std::vector<float> >, size_t> CNAlt;
+ typedef std::vector<CNAlt> CNCol;
+ typedef std::vector<CNCol> CN;
+
+ /** Given a string ((('foo',0.1),('bar',0.9)),...) representation of a
+ * confusion net in PCN format, return a CN object
+ */
+ CN parsePCN(const std::string& in);
+
+};
+
+#endif
diff --git a/moses/src/PDTAimp.h b/moses/src/PDTAimp.h
new file mode 100644
index 000000000..26b992e6e
--- /dev/null
+++ b/moses/src/PDTAimp.h
@@ -0,0 +1,546 @@
+// $Id: PDTAimp.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+
+#ifndef moses_PDTAimp_h
+#define moses_PDTAimp_h
+
+#include "StaticData.h" // needed for factor splitter
+
+namespace Moses
+{
+
+inline bool existsFile(const char* filePath) {
+ struct stat mystat;
+ return (stat(filePath,&mystat)==0);
+}
+
+double addLogScale(double x,double y)
+{
+ if(x>y) return addLogScale(y,x); else return x+log(1.0+exp(y-x));
+}
+
+double Exp(double x)
+{
+ return exp(x);
+}
+
+class PDTAimp
+{
+ // only these classes are allowed to instantiate this class
+ friend class PhraseDictionaryTreeAdaptor;
+
+protected:
+ PDTAimp(PhraseDictionaryTreeAdaptor *p,unsigned nis)
+ : m_languageModels(0),m_weightWP(0.0),m_dict(0),
+ m_obj(p),useCache(1),m_numInputScores(nis),totalE(0),distinctE(0) {}
+
+public:
+ std::vector<float> m_weights;
+ LMList const* m_languageModels;
+ float m_weightWP;
+ std::vector<FactorType> m_input,m_output;
+ PhraseDictionaryTree *m_dict;
+ typedef std::vector<TargetPhraseCollection const*> vTPC;
+ mutable vTPC m_tgtColls;
+
+ typedef std::map<Phrase,TargetPhraseCollection const*> MapSrc2Tgt;
+ mutable MapSrc2Tgt m_cache;
+ PhraseDictionaryTreeAdaptor *m_obj;
+ int useCache;
+
+ std::vector<vTPC> m_rangeCache;
+ unsigned m_numInputScores;
+
+ UniqueObjectManager<Phrase> uniqSrcPhr;
+
+ size_t totalE,distinctE;
+ std::vector<size_t> path1Best,pathExplored;
+ std::vector<double> pathCN;
+
+ ~PDTAimp()
+ {
+ CleanUp();
+ delete m_dict;
+
+ if (StaticData::Instance().GetVerboseLevel() >= 2)
+ {
+
+ TRACE_ERR("tgt candidates stats: total="<<totalE<<"; distinct="
+ <<distinctE<<" ("<<distinctE/(0.01*totalE)<<"); duplicates="
+ <<totalE-distinctE<<" ("<<(totalE-distinctE)/(0.01*totalE)
+ <<")\n");
+
+ TRACE_ERR("\npath statistics\n");
+
+ if(path1Best.size())
+ {
+ TRACE_ERR("1-best: ");
+ std::copy(path1Best.begin()+1,path1Best.end(),
+ std::ostream_iterator<size_t>(std::cerr," \t"));
+ TRACE_ERR("\n");
+ }
+ if(pathCN.size())
+ {
+ TRACE_ERR("CN (full): ");
+ std::transform(pathCN.begin()+1
+ ,pathCN.end()
+ ,std::ostream_iterator<double>(std::cerr," \t")
+ ,Exp);
+ TRACE_ERR("\n");
+ }
+ if(pathExplored.size())
+ {
+ TRACE_ERR("CN (explored): ");
+ std::copy(pathExplored.begin()+1,pathExplored.end(),
+ std::ostream_iterator<size_t>(std::cerr," \t"));
+ TRACE_ERR("\n");
+ }
+ }
+
+ }
+
+ void Factors2String(Word const& w,std::string& s) const
+ {
+ s=w.GetString(m_input,false);
+ }
+
+ void CleanUp()
+ {
+ assert(m_dict);
+ m_dict->FreeMemory();
+ for(size_t i=0;i<m_tgtColls.size();++i) delete m_tgtColls[i];
+ m_tgtColls.clear();
+ m_cache.clear();
+ m_rangeCache.clear();
+ uniqSrcPhr.clear();
+ }
+
+ void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
+ {
+ cerr << "AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)" << endl;
+ assert(GetTargetPhraseCollection(source)==0);
+
+ VERBOSE(2, "adding unk source phrase "<<source<<"\n");
+ std::pair<MapSrc2Tgt::iterator,bool> p
+ =m_cache.insert(std::make_pair(source,static_cast<TargetPhraseCollection const*>(0)));
+ if(p.second || p.first->second==0)
+ {
+ TargetPhraseCollection *ptr=new TargetPhraseCollection;
+ ptr->Add(new TargetPhrase(targetPhrase));
+ p.first->second=ptr;
+ m_tgtColls.push_back(ptr);
+ }
+ else VERBOSE(2, "WARNING: you added an already existing phrase!\n");
+ }
+
+ TargetPhraseCollection const*
+ GetTargetPhraseCollection(Phrase const &src) const
+ {
+
+ assert(m_dict);
+ if(src.GetSize()==0) return 0;
+
+ std::pair<MapSrc2Tgt::iterator,bool> piter;
+ if(useCache)
+ {
+ piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollection const*>(0)));
+ if(!piter.second) return piter.first->second;
+ }
+ else if (m_cache.size())
+ {
+ MapSrc2Tgt::const_iterator i=m_cache.find(src);
+ return (i!=m_cache.end() ? i->second : 0);
+ }
+
+ std::vector<std::string> srcString(src.GetSize());
+ // convert source Phrase into vector of strings
+ for(size_t i=0;i<srcString.size();++i)
+ {
+ Factors2String(src.GetWord(i),srcString[i]);
+ }
+
+ // get target phrases in string representation
+ std::vector<StringTgtCand> cands;
+ std::vector<StringWordAlignmentCand> swacands;
+ std::vector<StringWordAlignmentCand> twacands;
+// m_dict->GetTargetCandidates(srcString,cands);
+ m_dict->GetTargetCandidates(srcString,cands,swacands,twacands);
+ if(cands.empty())
+ {
+ return 0;
+ }
+
+ std::vector<TargetPhrase> tCands;tCands.reserve(cands.size());
+ std::vector<std::pair<float,size_t> > costs;costs.reserve(cands.size());
+
+ // convert into TargetPhrases
+ for(size_t i=0;i<cands.size();++i)
+ {
+ TargetPhrase targetPhrase(Output);
+
+ StringTgtCand::first_type const& factorStrings=cands[i].first;
+ StringTgtCand::second_type const& probVector=cands[i].second;
+ //StringWordAlignmentCand::second_type const& swaVector=swacands[i].second;
+ //StringWordAlignmentCand::second_type const& twaVector=twacands[i].second;
+
+ std::vector<float> scoreVector(probVector.size());
+ std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
+ TransformScore);
+ std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
+ FloorScore);
+ CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,&src);
+ //CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,swaVector,twaVector,&src);
+ costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
+ tCands.push_back(targetPhrase);
+ }
+
+ TargetPhraseCollection *rv;
+ rv=PruneTargetCandidates(tCands,costs);
+ if(rv->IsEmpty())
+ {
+ delete rv;
+ return 0;
+ }
+ else
+ {
+ if(useCache) piter.first->second=rv;
+ m_tgtColls.push_back(rv);
+ return rv;
+ }
+
+ }
+
+
+
+ void Create(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ , const LMList &languageModels
+ , float weightWP
+ )
+ {
+
+ // set my members
+ m_dict=new PhraseDictionaryTree(weight.size()-m_numInputScores);
+ m_input=input;
+ m_output=output;
+ m_languageModels=&languageModels;
+ m_weightWP=weightWP;
+ m_weights=weight;
+
+
+
+ std::string binFname=filePath+".binphr.idx";
+ if(!existsFile(binFname.c_str())) {
+ TRACE_ERR( "bin ttable does not exist -> create it\n");
+ InputFileStream in(filePath);
+ m_dict->Create(in,filePath);
+ }
+ TRACE_ERR( "reading bin ttable\n");
+// m_dict->Read(filePath);
+ bool res=m_dict->Read(filePath);
+ if (!res) {
+ stringstream strme;
+ strme << "bin ttable was read in a wrong way\n";
+ UserMessage::Add(strme.str());
+ exit(1);
+ }
+ }
+
+ typedef PhraseDictionaryTree::PrefixPtr PPtr;
+ typedef unsigned short Position;
+ typedef std::pair<Position,Position> Range;
+ struct State {
+ PPtr ptr;
+ Range range;
+ std::vector<float> scores;
+ Phrase src;
+
+ State() : range(0,0),scores(0),src(Input) {}
+ State(Position b,Position e,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
+ : ptr(v),range(b,e),scores(sv),src(Input) {}
+ State(Range const& r,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
+ : ptr(v),range(r),scores(sv),src(Input) {}
+
+ Position begin() const {return range.first;}
+ Position end() const {return range.second;}
+ std::vector<float> GetScores() const {return scores;}
+
+ friend std::ostream& operator<<(std::ostream& out,State const& s) {
+ out<<" R=("<<s.begin()<<","<<s.end()<<"),";
+ for(std::vector<float>::const_iterator scoreIterator = s.GetScores().begin();scoreIterator<s.GetScores().end();scoreIterator++) {
+ out<<", "<<*scoreIterator;
+ }
+ out<<")";
+ return out;
+ }
+
+ };
+
+
+
+ void CreateTargetPhrase(TargetPhrase& targetPhrase,
+ StringTgtCand::first_type const& factorStrings,
+ StringTgtCand::second_type const& scoreVector,
+ Phrase const* srcPtr=0) const
+ {
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ for(size_t k=0;k<factorStrings.size();++k)
+ {
+ std::vector<std::string> factors=TokenizeMultiCharSeparator(*factorStrings[k],StaticData::Instance().GetFactorDelimiter());
+ Word& w=targetPhrase.AddWord();
+ for(size_t l=0;l<m_output.size();++l)
+ w[m_output[l]]= factorCollection.AddFactor(Output, m_output[l], factors[l]);
+ }
+ targetPhrase.SetScore(m_obj->GetFeature(), scoreVector, m_weights, m_weightWP, *m_languageModels);
+ targetPhrase.SetSourcePhrase(srcPtr);
+
+// targetPhrase.CreateAlignmentInfo("???", "???", 44);
+ }
+
+
+
+
+ TargetPhraseCollection* PruneTargetCandidates(std::vector<TargetPhrase> const & tCands,
+ std::vector<std::pair<float,size_t> >& costs) const
+ {
+ // convert into TargetPhraseCollection
+ TargetPhraseCollection *rv=new TargetPhraseCollection;
+
+ // set limit to tableLimit or actual size, whatever is smaller
+ std::vector<std::pair<float,size_t> >::iterator nth =
+ costs.begin() + ((m_obj->m_tableLimit>0 && // 0 indicates no limit
+ m_obj->m_tableLimit < costs.size()) ?
+ m_obj->m_tableLimit : costs.size());
+
+ // find the nth phrase according to future cost
+ std::nth_element(costs.begin(),nth ,costs.end());
+
+ // add n top phrases to the return list
+ for(std::vector<std::pair<float,size_t> >::iterator
+ it = costs.begin(); it != nth; ++it)
+ rv->Add(new TargetPhrase(tCands[it->second]));
+
+ return rv;
+ }
+
+ // POD for target phrase scores
+ struct TScores {
+ float total;
+ StringTgtCand::second_type trans;
+ Phrase const* src;
+
+ TScores() : total(0.0),src(0) {}
+ };
+
+ void CacheSource(ConfusionNet const& src)
+ {
+ assert(m_dict);
+ const size_t srcSize=src.GetSize();
+
+ std::vector<size_t> exploredPaths(srcSize+1,0);
+ std::vector<double> exPathsD(srcSize+1,-1.0);
+
+ // collect some statistics
+ std::vector<size_t> cnDepths(srcSize,0);
+ for(size_t i=0;i<srcSize;++i) cnDepths[i]=src[i].size();
+
+ for(size_t len=1;len<=srcSize;++len)
+ for(size_t i=0;i<=srcSize-len;++i)
+ {
+ double pd=0.0; for(size_t k=i;k<i+len;++k) pd+=log(1.0*cnDepths[k]);
+ exPathsD[len]=(exPathsD[len]>=0.0 ? addLogScale(pd,exPathsD[len]) : pd);
+ }
+
+ // update global statistics
+ if(pathCN.size()<=srcSize) pathCN.resize(srcSize+1,-1.0);
+ for(size_t len=1;len<=srcSize;++len)
+ pathCN[len]=pathCN[len]>=0.0 ? addLogScale(pathCN[len],exPathsD[len]) : exPathsD[len];
+
+ if(path1Best.size()<=srcSize) path1Best.resize(srcSize+1,0);
+ for(size_t len=1;len<=srcSize;++len) path1Best[len]+=srcSize-len+1;
+
+
+ if (StaticData::Instance().GetVerboseLevel() >= 2 && exPathsD.size())
+ {
+ TRACE_ERR("path stats for current CN: \nCN (full): ");
+ std::transform(exPathsD.begin()+1
+ ,exPathsD.end()
+ ,std::ostream_iterator<double>(std::cerr," ")
+ ,Exp);
+ TRACE_ERR("\n");
+ }
+
+ typedef StringTgtCand::first_type sPhrase;
+ typedef std::map<StringTgtCand::first_type,TScores> E2Costs;
+
+ std::map<Range,E2Costs> cov2cand;
+ std::vector<State> stack;
+ for(Position i=0 ; i < srcSize ; ++i)
+ stack.push_back(State(i, i, m_dict->GetRoot(), std::vector<float>(m_numInputScores,0.0)));
+
+ while(!stack.empty())
+ {
+ State curr(stack.back());
+ stack.pop_back();
+
+ assert(curr.end()<srcSize);
+ const ConfusionNet::Column &currCol=src[curr.end()];
+ // in a given column, loop over all possibilities
+ for(size_t colidx=0;colidx<currCol.size();++colidx)
+ {
+ const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
+ std::string s;
+ Factors2String(w,s);
+ bool isEpsilon=(s=="" || s==EPSILON);
+
+ //assert that we have the right number of link params in this CN option
+ assert(currCol[colidx].second.size() >= m_numInputScores);
+
+ // do not start with epsilon (except at first position)
+ if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue;
+
+ // At a given node in the prefix tree, look to see if w defines an edge to
+ // another node (Extend). Stay at the same node if w==EPSILON
+ PPtr nextP = (isEpsilon ? curr.ptr : m_dict->Extend(curr.ptr,s));
+
+ if(nextP) // w is a word that should be considered
+ {
+ Range newRange(curr.begin(),curr.end()+src.GetColumnIncrement(curr.end(),colidx));
+
+ //add together the link scores from the current state and the new arc
+ float inputScoreSum = 0;
+ std::vector<float> newInputScores(m_numInputScores,0.0);
+ if (m_numInputScores) {
+ std::transform(currCol[colidx].second.begin(), currCol[colidx].second.end(),
+ curr.GetScores().begin(),
+ newInputScores.begin(),
+ std::plus<float>());
+
+
+ //we need to sum up link weights (excluding realWordCount, which isn't in numLinkParams)
+ //if the sum is too low, then we won't expand this.
+ //TODO: dodgy! shouldn't we consider weights here? what about zero-weight params?
+ inputScoreSum = std::accumulate(newInputScores.begin(),newInputScores.begin()+m_numInputScores,0.0);
+ }
+
+ Phrase newSrc(curr.src);
+ if(!isEpsilon) newSrc.AddWord(w);
+ if(newRange.second<srcSize && inputScoreSum>LOWEST_SCORE)
+ {
+ // if there is more room to grow, add a new state onto the queue
+ // to be explored that represents [begin, curEnd+)
+ stack.push_back(State(newRange,nextP,newInputScores));
+ stack.back().src=newSrc;
+ }
+
+ std::vector<StringTgtCand> tcands;
+ // now, look up the target candidates (aprx. TargetPhraseCollection) for
+ // the current path through the CN
+ m_dict->GetTargetCandidates(nextP,tcands);
+
+ if(newRange.second>=exploredPaths.size()+newRange.first)
+ exploredPaths.resize(newRange.second-newRange.first+1,0);
+ ++exploredPaths[newRange.second-newRange.first];
+
+ totalE+=tcands.size();
+
+ if(tcands.size())
+ {
+ E2Costs& e2costs=cov2cand[newRange];
+ Phrase const* srcPtr=uniqSrcPhr(newSrc);
+ for(size_t i=0;i<tcands.size();++i)
+ {
+ //put input scores in first - already logged, just drop in directly
+ std::vector<float> nscores(newInputScores);
+
+ //resize to include phrase table scores
+ nscores.resize(m_numInputScores+tcands[i].second.size(),0.0f);
+
+ //put in phrase table scores, logging as we insert
+ std::transform(tcands[i].second.begin(),tcands[i].second.end(),nscores.begin() + m_numInputScores,TransformScore);
+
+ assert(nscores.size()==m_weights.size());
+
+ //tally up
+ float score=std::inner_product(nscores.begin(), nscores.end(), m_weights.begin(), 0.0f);
+
+ //count word penalty
+ score-=tcands[i].first.size() * m_weightWP;
+
+ std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(tcands[i].first,TScores()));
+
+ if(p.second) ++distinctE;
+
+ TScores & scores=p.first->second;
+ if(p.second || scores.total<score)
+ {
+ scores.total=score;
+ scores.trans=nscores;
+ scores.src=srcPtr;
+ }
+ }
+ }
+ }
+ }
+ } // end while(!stack.empty())
+
+
+ if (StaticData::Instance().GetVerboseLevel() >= 2 && exploredPaths.size())
+ {
+ TRACE_ERR("CN (explored): ");
+ std::copy(exploredPaths.begin()+1,exploredPaths.end(),
+ std::ostream_iterator<size_t>(std::cerr," "));
+ TRACE_ERR("\n");
+ }
+
+ if(pathExplored.size()<exploredPaths.size())
+ pathExplored.resize(exploredPaths.size(),0);
+ for(size_t len=1;len<=srcSize;++len)
+ pathExplored[len]+=exploredPaths[len];
+
+
+ m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
+
+ for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin();i!=cov2cand.end();++i)
+ {
+ assert(i->first.first<m_rangeCache.size());
+ assert(i->first.second>0);
+ assert(static_cast<size_t>(i->first.second-1)<m_rangeCache[i->first.first].size());
+ assert(m_rangeCache[i->first.first][i->first.second-1]==0);
+
+ std::vector<TargetPhrase> tCands;tCands.reserve(i->second.size());
+ std::vector<std::pair<float,size_t> > costs;costs.reserve(i->second.size());
+
+ for(E2Costs::const_iterator j=i->second.begin();j!=i->second.end();++j)
+ {
+ TScores const & scores=j->second;
+ TargetPhrase targetPhrase(Output);
+ CreateTargetPhrase(targetPhrase,j->first,scores.trans,scores.src);
+ costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
+ tCands.push_back(targetPhrase);
+ //std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
+ }
+
+ TargetPhraseCollection *rv=PruneTargetCandidates(tCands,costs);
+
+ if(rv->IsEmpty())
+ delete rv;
+ else
+ {
+ m_rangeCache[i->first.first][i->first.second-1]=rv;
+ m_tgtColls.push_back(rv);
+ }
+ }
+ // free memory
+ m_dict->FreeMemory();
+ }
+
+
+ size_t GetNumInputScores() const {return m_numInputScores;}
+};
+
+}
+#endif
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
new file mode 100644
index 000000000..0ee5e05f4
--- /dev/null
+++ b/moses/src/Parameter.cpp
@@ -0,0 +1,592 @@
+// $Id: Parameter.cpp 2977 2010-03-15 13:08:58Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <iostream>
+#include <iterator>
+#include <fstream>
+#include <sstream>
+#include <algorithm>
+#include "Parameter.h"
+#include "Util.h"
+#include "InputFileStream.h"
+#include "UserMessage.h"
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+using namespace std;
+
+namespace Moses
+{
+/** define allowed parameters */
+Parameter::Parameter()
+{
+ AddParam("beam-threshold", "b", "threshold for threshold pruning");
+ AddParam("config", "f", "location of the configuration file");
+ AddParam("drop-unknown", "du", "drop unknown words instead of copying them");
+ AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");
+ AddParam("generation-file", "location and properties of the generation table");
+ AddParam("global-lexical-file", "gl", "discriminatively trained global lexical translation model file");
+ AddParam("input-factors", "list of factors in the input");
+ AddParam("input-file", "i", "location of the input file to be translated");
+ AddParam("inputtype", "text (0), confusion network (1), word lattice (2) (default = 0)");
+ AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
+ AddParam("include-alignment-in-n-best", "include word alignment in the n-best list. default is false");
+ AddParam("lmodel-file", "location and properties of the language models");
+ AddParam("lmodel-dub", "dictionary upper bounds of language models");
+ AddParam("lmstats", "L", "(1/0) compute LM backoff statistics for each translation hypothesis");
+ AddParam("mapping", "description of decoding steps");
+ AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
+ AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
+ AddParam("max-phrase-length", "maximum phrase length (default 20)");
+ AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
+ AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
+ AddParam("output-factors", "list of factors in the output");
+ AddParam("phrase-drop-allowed", "da", "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison
+ AddParam("report-all-factors", "report all factors in output, not just first");
+ AddParam("report-segmentation", "t", "report phrase segmentation in the output");
+ AddParam("stack", "s", "maximum stack size for histogram pruning");
+ AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
+ AddParam("translation-details", "T", "for each best translation hypothesis, print out details about what sourcce spans were used, dropped");
+ AddParam("ttable-file", "location and properties of the translation tables");
+ AddParam("ttable-limit", "ttl", "maximum number of translation table entries per input phrase");
+ AddParam("translation-option-threshold", "tot", "threshold for translation options relative to best for input phrase");
+ AddParam("early-discarding-threshold", "edt", "threshold for constructing hypotheses based on estimate cost");
+ AddParam("verbose", "v", "verbosity level of the logging");
+ AddParam("weight-d", "d", "weight(s) for distortion (reordering components)");
+ AddParam("weight-generation", "g", "weight(s) for generation components");
+ AddParam("weight-i", "I", "weight(s) for word insertion - used for parameters from confusion network and lattice input links");
+ AddParam("weight-l", "lm", "weight(s) for language models");
+ AddParam("weight-lex", "lex", "weight for global lexical model");
+ AddParam("weight-t", "tm", "weights for translation model components");
+ AddParam("weight-w", "w", "weight for word penalty");
+ AddParam("weight-u", "u", "weight for unknown word penalty");
+ AddParam("weight-e", "e", "weight for word deletion");
+ AddParam("weight-file", "wf", "file containing labeled weights");
+ AddParam("output-factors", "list if factors in the output");
+ AddParam("cache-path", "?");
+ AddParam("distortion-limit", "dl", "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)");
+ AddParam("monotone-at-punctuation", "mp", "do not reorder over punctuation");
+ AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
+ AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
+ AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'");
+ AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
+ AddParam("lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation");
+ AddParam("mbr-size", "number of translation candidates considered in MBR decoding (default 200)");
+ AddParam("mbr-scale", "scaling factor to convert log linear score probability in MBR decoding (default 1.0)");
+ AddParam("lmbr-thetas", "theta(s) for lattice mbr calculation");
+ AddParam("lmbr-pruning-factor", "average number of nodes/word wanted in pruned lattice");
+ AddParam("lmbr-p", "unigram precision value for lattice mbr");
+ AddParam("lmbr-r", "ngram precision decay value for lattice mbr");
+ AddParam("lattice-hypo-set", "to use lattice as hypo set during lattice MBR");
+ AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
+ AddParam("persistent-cache-size", "maximum size of cache for translation options (default 10,000 input phrases)");
+ AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
+ AddParam("output-word-graph", "owg", "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos");
+ AddParam("time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
+ AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
+ AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
+#ifdef HAVE_PROTOBUF
+ AddParam("output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");
+#endif
+ AddParam("cube-pruning-pop-limit", "cbp", "How many hypotheses should be popped for each stack. (default = 1000)");
+ AddParam("cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)");
+ AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing. (default = 0)");
+ AddParam("constraint", "Location of the file with target sentences to produce constraining the search");
+ AddParam("use-alignment-info", "Use word-to-word alignment: actually it is only used to output the word-to-word alignment. Word-to-word alignments are taken from the phrase table if any. Default is false.");
+ AddParam("print-alignment-info", "Output word-to-word alignment into the log file. Word-to-word alignments are takne from the phrase table if any. Default is false");
+ AddParam("print-alignment-info-in-n-best", "Include word-to-word alignment in the n-best list. Word-to-word alignments are takne from the phrase table if any. Default is false");
+ AddParam("link-param-count", "Number of parameters on word links when using confusion networks or lattices (default = 1)");
+ AddParam("description", "Source language, target language, description");
+ /*******************************Loading DPR model**********************************************/
+ AddParam("DPR-file","DPR-file","Model file for the DPR model");
+ AddParam("wDPR","wDPR","weight for the DPR model");
+ AddParam("class-DPR","class-DPR","the number of orientations for the DPR model");
+ /*******************************Loading DPR model**********************************************/
+
+}
+
+Parameter::~Parameter()
+{
+}
+
+/** initialize a parameter, sub of constructor */
+void Parameter::AddParam(const string &paramName, const string &description)
+{
+ m_valid[paramName] = true;
+ m_description[paramName] = description;
+}
+
+/** initialize a parameter (including abbreviation), sub of constructor */
+void Parameter::AddParam(const string &paramName, const string &abbrevName, const string &description)
+{
+ m_valid[paramName] = true;
+ m_valid[abbrevName] = true;
+ m_abbreviation[paramName] = abbrevName;
+ m_description[paramName] = description;
+}
+
+/** print descriptions of all parameters */
+void Parameter::Explain() {
+ cerr << "Usage:" << endl;
+ for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++)
+ {
+ const string paramName = iterParam->first;
+ const string paramDescription = iterParam->second;
+ cerr << "\t-" << paramName;
+ PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName );
+ if ( iterAbbr != m_abbreviation.end() )
+ cerr << " (" << iterAbbr->second << ")";
+ cerr << ": " << paramDescription << endl;
+ }
+}
+
+/** check whether an item on the command line is a switch or a value
+ * \param token token on the command line to checked **/
+
+bool Parameter::isOption(const char* token) {
+ if (! token) return false;
+ std::string tokenString(token);
+ size_t length = tokenString.size();
+ if (length > 0 && tokenString.substr(0,1) != "-") return false;
+ if (length > 1 && tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;
+ return false;
+}
+
+/** load all parameters from the configuration file and the command line switches */
+bool Parameter::LoadParam(const string &filePath)
+{
+ const char *argv[] = {"executable", "-f", filePath.c_str() };
+ return LoadParam(3, (char**) argv);
+}
+
+/** load all parameters from the configuration file and the command line switches */
+bool Parameter::LoadParam(int argc, char* argv[])
+{
+ // config file (-f) arg mandatory
+ string configPath;
+ if ( (configPath = FindParam("-f", argc, argv)) == ""
+ && (configPath = FindParam("-config", argc, argv)) == "")
+ {
+ PrintCredit();
+
+ UserMessage::Add("No configuration file was specified. Use -config or -f");
+ return false;
+ }
+ else
+ {
+ if (!ReadConfigFile(configPath))
+ {
+ UserMessage::Add("Could not read "+configPath);
+ return false;
+ }
+ }
+
+ // overwrite parameters with values from switches
+ for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++)
+ {
+ const string paramName = iterParam->first;
+ OverwriteParam("-" + paramName, paramName, argc, argv);
+ }
+
+ // ... also shortcuts
+ for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); iterParam != m_abbreviation.end(); iterParam++)
+ {
+ const string paramName = iterParam->first;
+ const string paramShortName = iterParam->second;
+ OverwriteParam("-" + paramShortName, paramName, argc, argv);
+ }
+
+ // logging of parameters that were set in either config or switch
+ int verbose = 1;
+ if (m_setting.find("verbose") != m_setting.end() &&
+ m_setting["verbose"].size() > 0)
+ verbose = Scan<int>(m_setting["verbose"][0]);
+ if (verbose >= 1) { // only if verbose
+ TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);
+ for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ; iterParam != m_setting.end(); iterParam++) {
+ TRACE_ERR( "\t" << iterParam->first << ": ");
+ for ( size_t i = 0; i < iterParam->second.size(); i++ )
+ TRACE_ERR( iterParam->second[i] << " ");
+ TRACE_ERR( endl);
+ }
+ }
+
+ // check for illegal parameters
+ bool noErrorFlag = true;
+ for (int i = 0 ; i < argc ; i++)
+ {
+ if (isOption(argv[i]))
+ {
+ string paramSwitch = (string) argv[i];
+ string paramName = paramSwitch.substr(1);
+ if (m_valid.find(paramName) == m_valid.end())
+ {
+ UserMessage::Add("illegal switch: " + paramSwitch);
+ noErrorFlag = false;
+ }
+ }
+ }
+
+ // check if parameters make sense
+ return Validate() && noErrorFlag;
+}
+
+/** check that parameter settings make sense */
+bool Parameter::Validate()
+{
+ bool noErrorFlag = true;
+
+ // required parameters
+ if (m_setting["ttable-file"].size() == 0)
+ {
+ UserMessage::Add("No phrase translation table (ttable-file)");
+ noErrorFlag = false;
+ }
+
+ if (m_setting["lmodel-dub"].size() > 0)
+ {
+ if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size())
+ {
+ stringstream errorMsg("");
+ errorMsg << "Config and parameters specify "
+ << static_cast<int>(m_setting["lmodel-file"].size())
+ << " language model files (lmodel-file), but "
+ << static_cast<int>(m_setting["lmodel-dub"].size())
+ << " LM upperbounds (lmodel-dub)"
+ << endl;
+ UserMessage::Add(errorMsg.str());
+ noErrorFlag = false;
+ }
+ }
+
+ if (m_setting["lmodel-file"].size() != m_setting["weight-l"].size())
+ {
+ stringstream errorMsg("");
+ errorMsg << "Config and parameters specify "
+ << static_cast<int>(m_setting["lmodel-file"].size())
+ << " language model files (lmodel-file), but "
+ << static_cast<int>(m_setting["weight-l"].size())
+ << " weights (weight-l)";
+ errorMsg << endl << "You might be giving '-lmodel-file TYPE FACTOR ORDER FILENAME' but you should be giving these four as a single argument, i.e. '-lmodel-file \"TYPE FACTOR ORDER FILENAME\"'";
+ UserMessage::Add(errorMsg.str());
+ noErrorFlag = false;
+ }
+
+ // do files exist?
+ // phrase tables
+ if (noErrorFlag)
+ {
+ std::vector<std::string> ext;
+ // standard phrase table extension (i.e. full name has to be specified)
+ // raw tables in either un compressed or compressed form
+ ext.push_back("");
+ ext.push_back(".gz");
+ // alternative file extension for binary phrase table format:
+ ext.push_back(".binphr.idx");
+ noErrorFlag = FilesExist("ttable-file", 3,ext);
+ }
+ // language model
+// if (noErrorFlag)
+// noErrorFlag = FilesExist("lmodel-file", 3);
+ // input file
+ if (noErrorFlag && m_setting["input-file"].size() == 1)
+ {
+ noErrorFlag = FileExists(m_setting["input-file"][0]);
+ }
+ // generation tables
+ if (noErrorFlag)
+ {
+ std::vector<std::string> ext;
+ //raw tables in either un compressed or compressed form
+ ext.push_back("");
+ ext.push_back(".gz");
+ noErrorFlag = FilesExist("generation-file", 3, ext);
+ }
+ // distortion
+ if (noErrorFlag)
+ {
+ std::vector<std::string> ext;
+ //raw tables in either un compressed or compressed form
+ ext.push_back("");
+ ext.push_back(".gz");
+ //prefix tree format
+ ext.push_back(".binlexr.idx");
+ noErrorFlag = FilesExist("distortion-file", 3, ext);
+ }
+ return noErrorFlag;
+}
+
+/** check whether a file exists */
+bool Parameter::FilesExist(const string &paramName, size_t tokenizeIndex,std::vector<std::string> const& extensions)
+{
+ typedef std::vector<std::string> StringVec;
+ StringVec::const_iterator iter;
+
+ PARAM_MAP::const_iterator iterParam = m_setting.find(paramName);
+ if (iterParam == m_setting.end())
+ { // no param. therefore nothing to check
+ return true;
+ }
+ const StringVec &pathVec = (*iterParam).second;
+ for (iter = pathVec.begin() ; iter != pathVec.end() ; ++iter)
+ {
+ StringVec vec = Tokenize(*iter);
+ if (tokenizeIndex >= vec.size())
+ {
+ stringstream errorMsg("");
+ errorMsg << "Expected at least " << (tokenizeIndex+1) << " tokens per emtry in '"
+ << paramName << "', but only found "
+ << vec.size();
+ UserMessage::Add(errorMsg.str());
+ return false;
+ }
+ const string &pathStr = vec[tokenizeIndex];
+
+ bool fileFound=0;
+ for(size_t i=0;i<extensions.size() && !fileFound;++i)
+ {
+ fileFound|=FileExists(pathStr + extensions[i]);
+ }
+ if(!fileFound)
+ {
+ stringstream errorMsg("");
+ errorMsg << "File " << pathStr << " does not exist";
+ UserMessage::Add(errorMsg.str());
+ return false;
+ }
+ }
+ return true;
+}
+
+/** look for a switch in arg, update parameter */
+// TODO arg parsing like this does not belong in the library, it belongs
+// in moses-cmd
+string Parameter::FindParam(const string &paramSwitch, int argc, char* argv[])
+{
+ for (int i = 0 ; i < argc ; i++)
+ {
+ if (string(argv[i]) == paramSwitch)
+ {
+ if (i+1 < argc)
+ {
+ return argv[i+1];
+ } else {
+ stringstream errorMsg("");
+ errorMsg << "Option " << paramSwitch << " requires a parameter!";
+ UserMessage::Add(errorMsg.str());
+ // TODO return some sort of error, not the empty string
+ }
+ }
+ }
+ return "";
+}
+
+/** update parameter settings with command line switches
+ * \param paramSwitch (potentially short) name of switch
+ * \param paramName full name of parameter
+ * \param argc number of arguments on command line
+ * \param argv values of paramters on command line */
+void Parameter::OverwriteParam(const string &paramSwitch, const string &paramName, int argc, char* argv[])
+{
+ int startPos = -1;
+ for (int i = 0 ; i < argc ; i++)
+ {
+ if (string(argv[i]) == paramSwitch)
+ {
+ startPos = i+1;
+ break;
+ }
+ }
+ if (startPos < 0)
+ return;
+
+ int index = 0;
+ m_setting[paramName]; // defines the parameter, important for boolean switches
+ while (startPos < argc && (!isOption(argv[startPos])))
+ {
+ if (m_setting[paramName].size() > (size_t)index)
+ m_setting[paramName][index] = argv[startPos];
+ else
+ m_setting[paramName].push_back(argv[startPos]);
+ index++;
+ startPos++;
+ }
+}
+
+
+/** read parameters from a configuration file */
+bool Parameter::ReadConfigFile( string filePath )
+{
+ InputFileStream inFile(filePath);
+ string line, paramName;
+ while(getline(inFile, line))
+ {
+ // comments
+ size_t comPos = line.find_first_of("#");
+ if (comPos != string::npos)
+ line = line.substr(0, comPos);
+ // trim leading and trailing spaces/tabs
+ line = Trim(line);
+
+ if (line[0]=='[')
+ { // new parameter
+ for (size_t currPos = 0 ; currPos < line.size() ; currPos++)
+ {
+ if (line[currPos] == ']')
+ {
+ paramName = line.substr(1, currPos - 1);
+ break;
+ }
+ }
+ }
+ else if (line != "")
+ { // add value to parameter
+ m_setting[paramName].push_back(line);
+ }
+ }
+ return true;
+}
+
+struct Credit
+{
+ string name, contact, currentPursuits, areaResponsibility;
+
+ Credit(string name, string contact, string currentPursuits, string areaResponsibility)
+ {
+ this->name = name ;
+ this->contact = contact ;
+ this->currentPursuits = currentPursuits ;
+ this->areaResponsibility = areaResponsibility;
+ }
+
+ bool operator<(const Credit &other) const
+ {
+ if (areaResponsibility.size() != 0 && other.areaResponsibility.size() ==0)
+ return true;
+ if (areaResponsibility.size() == 0 && other.areaResponsibility.size() !=0)
+ return false;
+
+ return name < other.name;
+ }
+
+};
+
+std::ostream& operator<<(std::ostream &os, const Credit &credit)
+{
+ os << credit.name;
+ if (credit.contact != "")
+ os << "\n contact: " << credit.contact;
+ if (credit.currentPursuits != "")
+ os << "\n " << credit.currentPursuits;
+ if (credit.areaResponsibility != "")
+ os << "\n I'll answer question on: " << credit.areaResponsibility;
+ os << endl;
+ return os;
+}
+
+void Parameter::PrintCredit()
+{
+ vector<Credit> everyone;
+
+ everyone.push_back(Credit("Nicola Bertoldi"
+ , "911"
+ , ""
+ , "scripts & other stuff"));
+ everyone.push_back(Credit("Ondrej Bojar"
+ , ""
+ , "czech this out!"
+ , ""));
+ everyone.push_back(Credit("Chris Callison-Burch"
+ , "anytime, anywhere"
+ , "international playboy"
+ , ""));
+ everyone.push_back(Credit("Alexandra Constantin"
+ , ""
+ , "eu sunt varza"
+ , ""));
+ everyone.push_back(Credit("Brooke Cowan"
+ , "brooke@csail.mit.edu"
+ , "if you're going to san francisco, be sure to wear a flower in your hair"
+ , ""));
+ everyone.push_back(Credit("Chris Dyer"
+ , "can't. i'll be out driving my mustang"
+ , "driving my mustang"
+ , ""));
+ everyone.push_back(Credit("Marcello Federico"
+ , "federico at itc at it"
+ , "Researcher at ITC-irst, Trento, Italy"
+ , "IRST language model"));
+ everyone.push_back(Credit("Evan Herbst"
+ , "Small college in upstate New York"
+ , ""
+ , ""));
+ everyone.push_back(Credit("Philipp Koehn"
+ , "only between 2 and 4am"
+ , ""
+ , "Nothing fazes this dude"));
+ everyone.push_back(Credit("Christine Moran"
+ , "weird building at MIT"
+ , ""
+ , ""));
+ everyone.push_back(Credit("Wade Shen"
+ , "via morse code"
+ , "buying another laptop"
+ , ""));
+ everyone.push_back(Credit("Richard Zens"
+ , "richard at aachen dot de"
+ , ""
+ , "ambiguous source input, confusion networks, confusing source code"));
+ everyone.push_back(Credit("Hieu Hoang", "http://www.hoang.co.uk/hieu/"
+ , "phd student at Edinburgh Uni. Original Moses developer"
+ , "general queries/ flames on Moses. Doing stuff on async factored translation, so anything on that as well"));
+
+ sort(everyone.begin(), everyone.end());
+
+
+ cerr << "Moses - A beam search decoder for phrase-based statistical machine translation models" << endl
+ << "Copyright (C) 2006 University of Edinburgh" << endl << endl
+
+ << "This library is free software; you can redistribute it and/or" << endl
+ << "modify it under the terms of the GNU Lesser General Public" << endl
+ << "License as published by the Free Software Foundation; either" << endl
+ << "version 2.1 of the License, or (at your option) any later version." << endl << endl
+
+ << "This library is distributed in the hope that it will be useful," << endl
+ << "but WITHOUT ANY WARRANTY; without even the implied warranty of" << endl
+ << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU" << endl
+ << "Lesser General Public License for more details." << endl << endl
+
+ << "You should have received a copy of the GNU Lesser General Public" << endl
+ << "License along with this library; if not, write to the Free Software" << endl
+ << "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA" << endl << endl
+ << "***********************************************************************" << endl << endl
+ << "Built on " << __DATE__ << endl << endl
+ << "CREDITS" << endl << endl;
+
+ ostream_iterator<Credit> out(cerr, "\n");
+ copy(everyone.begin(), everyone.end(), out);
+ cerr << endl << endl;
+}
+
+}
+
+
diff --git a/moses/src/Parameter.h b/moses/src/Parameter.h
new file mode 100644
index 000000000..9d8ade5aa
--- /dev/null
+++ b/moses/src/Parameter.h
@@ -0,0 +1,81 @@
+// $Id: Parameter.h 2977 2010-03-15 13:08:58Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#pragma once
+
+#include <string>
+#include <map>
+#include <vector>
+#include "TypeDef.h"
+
+namespace Moses
+{
+
+typedef std::vector<std::string> PARAM_VEC;
+typedef std::map<std::string, PARAM_VEC > PARAM_MAP;
+typedef std::map<std::string, bool> PARAM_BOOL;
+typedef std::map<std::string, std::string > PARAM_STRING;
+
+/** Handles parameter values set in config file or on command line.
+ * Process raw parameter data (names and values as strings) for StaticData
+ * to parse; to get useful values, see StaticData. */
+class Parameter
+{
+protected:
+ PARAM_MAP m_setting;
+ PARAM_BOOL m_valid;
+ PARAM_STRING m_abbreviation;
+ PARAM_STRING m_description;
+
+ std::string FindParam(const std::string &paramSwitch, int argc, char* argv[]);
+ void OverwriteParam(const std::string &paramSwitch, const std::string &paramName, int argc, char* argv[]);
+ bool ReadConfigFile( std::string filePath );
+ bool FilesExist(const std::string &paramName, size_t tokenizeIndex,std::vector<std::string> const& fileExtension=std::vector<std::string>(1,""));
+ bool isOption(const char* token);
+ bool Validate();
+
+ void AddParam(const std::string &paramName, const std::string &description);
+ void AddParam(const std::string &paramName, const std::string &abbrevName, const std::string &description);
+
+ void PrintCredit();
+
+public:
+ Parameter();
+ ~Parameter();
+ bool LoadParam(int argc, char* argv[]);
+ bool LoadParam(const std::string &filePath);
+ void Explain();
+
+ /** return a vector of strings holding the whitespace-delimited values on the ini-file line corresponding to the given parameter name */
+ const PARAM_VEC &GetParam(const std::string &paramName)
+ {
+ return m_setting[paramName];
+ }
+ /** check if parameter is defined (either in moses.ini or as switch) */
+ bool isParamSpecified(const std::string &paramName)
+ {
+ return m_setting.find( paramName ) != m_setting.end();
+ }
+
+};
+
+}
+
diff --git a/moses/src/PartialTranslOptColl.cpp b/moses/src/PartialTranslOptColl.cpp
new file mode 100644
index 000000000..2f6fefbdf
--- /dev/null
+++ b/moses/src/PartialTranslOptColl.cpp
@@ -0,0 +1,104 @@
+// $Id: PartialTranslOptColl.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "PartialTranslOptColl.h"
+#include <algorithm>
+
+namespace Moses
+{
+/** constructor, intializes counters and thresholds */
+PartialTranslOptColl::PartialTranslOptColl()
+{
+ m_bestScore = -std::numeric_limits<float>::infinity();
+ m_worstScore = -std::numeric_limits<float>::infinity();
+ m_maxSize = StaticData::Instance().GetMaxNoPartTransOpt();
+ m_totalPruned = 0;
+}
+
+
+/** add a partial translation option to the collection (without pruning) */
+void PartialTranslOptColl::AddNoPrune(TranslationOption *partialTranslOpt)
+{
+ partialTranslOpt->CalcScore();
+ if (partialTranslOpt->GetFutureScore() >= m_worstScore)
+ {
+ m_list.push_back(partialTranslOpt);
+ if (partialTranslOpt->GetFutureScore() > m_bestScore)
+ m_bestScore = partialTranslOpt->GetFutureScore();
+ }
+ else
+ {
+ m_totalPruned++;
+ delete partialTranslOpt;
+ }
+}
+
+/** add a partial translation option to the collection, prune if necessary.
+ * This is done similar to the Prune() in TranslationOptionCollection */
+
+void PartialTranslOptColl::Add(TranslationOption *partialTranslOpt)
+{
+ // add
+ AddNoPrune( partialTranslOpt );
+
+ // done if not too large (lazy pruning, only if twice as large as max)
+ if ( m_list.size() > 2 * m_maxSize ) {
+ Prune();
+ }
+}
+
+
+/** helper, used by pruning */
+bool ComparePartialTranslationOption(const TranslationOption *a, const TranslationOption *b)
+{
+ return a->GetFutureScore() > b->GetFutureScore();
+}
+
+/** pruning, remove partial translation options, if list too big */
+void PartialTranslOptColl::Prune()
+{
+ // done if not too big
+ if ( m_list.size() <= m_maxSize ) {
+ return;
+ }
+
+ // TRACE_ERR( "pruning partial translation options from size " << m_list.size() << std::endl);
+
+ // find nth element
+ nth_element(m_list.begin(),
+ m_list.begin() + m_maxSize,
+ m_list.end(),
+ ComparePartialTranslationOption);
+
+ m_worstScore = m_list[ m_maxSize-1 ]->GetFutureScore();
+ // delete the rest
+ for (size_t i = m_maxSize ; i < m_list.size() ; ++i)
+ {
+ delete m_list[i];
+ m_totalPruned++;
+ }
+ m_list.resize(m_maxSize);
+ // TRACE_ERR( "pruned to size " << m_list.size() << ", total pruned: " << m_totalPruned << std::endl);
+}
+
+}
+
+
diff --git a/moses/src/PartialTranslOptColl.h b/moses/src/PartialTranslOptColl.h
new file mode 100644
index 000000000..8d3d7a749
--- /dev/null
+++ b/moses/src/PartialTranslOptColl.h
@@ -0,0 +1,85 @@
+// $Id: PartialTranslOptColl.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_PartialTranslOptColl_h
+#define moses_PartialTranslOptColl_h
+
+#include <list>
+#include <iostream>
+#include "TranslationOption.h"
+#include "Util.h"
+#include "StaticData.h"
+#include "FactorTypeSet.h"
+
+namespace Moses
+{
+
+/** Contains partial translation options, while these are constructed in the class TranslationOption.
+ * The factored translation model allows for multiple translation and
+ * generation steps during a single Hypothesis expansion. For efficiency,
+ * all these expansions are precomputed and stored as TranslationOption.
+ * The expansion process itself may be still explode, so efficient handling
+ * of partial translation options during expansion is required.
+ * This class assists in this tasks by implementing pruning.
+ * This implementation is similar to the one in HypothesisStack. */
+
+class PartialTranslOptColl
+{
+ protected:
+ std::vector<TranslationOption*> m_list;
+ float m_bestScore; /**< score of the best translation option */
+ float m_worstScore; /**< score of the worse translation option */
+ size_t m_maxSize; /**< maximum number of translation options allowed */
+ size_t m_totalPruned; /**< number of options pruned */
+
+public:
+ PartialTranslOptColl();
+
+ /** destructor, cleans out list */
+ ~PartialTranslOptColl()
+ {
+ RemoveAllInColl( m_list );
+ }
+
+ void AddNoPrune(TranslationOption *partialTranslOpt);
+ void Add(TranslationOption *partialTranslOpt);
+ void Prune();
+
+ /** returns list of translation options */
+ const std::vector<TranslationOption*>& GetList() const {
+ return m_list;
+ }
+
+ /** clear out the list */
+ void DetachAll()
+ {
+ m_list.clear();
+ }
+
+ /** return number of pruned partial hypotheses */
+ size_t GetPrunedCount() {
+ return m_totalPruned;
+ }
+
+};
+
+}
+#endif
diff --git a/moses/src/Phrase.cpp b/moses/src/Phrase.cpp
new file mode 100644
index 000000000..eca5a1f34
--- /dev/null
+++ b/moses/src/Phrase.cpp
@@ -0,0 +1,387 @@
+ // $Id: Phrase.cpp 2841 2010-02-03 10:23:32Z abarun $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cassert>
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include "memory.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "StaticData.h" // GetMaxNumFactors
+
+using namespace std;
+
+namespace Moses
+{
+Phrase::Phrase(const Phrase &copy)
+:m_direction(copy.m_direction)
+,m_phraseSize(copy.m_phraseSize)
+,m_arraySize(copy.m_arraySize)
+,m_words(copy.m_words)
+{
+}
+
+Phrase& Phrase::operator=(const Phrase& x)
+{
+ if(this!=&x)
+ {
+
+ m_direction=x.m_direction;
+ m_phraseSize=x.m_phraseSize;
+ m_arraySize=x.m_arraySize;
+
+ m_words = x.m_words;
+ }
+ return *this;
+}
+
+
+Phrase::Phrase(FactorDirection direction)
+ : m_direction(direction)
+ , m_phraseSize(0)
+ , m_arraySize(ARRAY_SIZE_INCR)
+ , m_words(ARRAY_SIZE_INCR)
+{
+}
+
+Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords)
+:m_direction(direction)
+,m_phraseSize(0)
+,m_arraySize(ARRAY_SIZE_INCR)
+,m_words(ARRAY_SIZE_INCR)
+{
+ for (size_t currPos = 0 ; currPos < mergeWords.size() ; currPos++)
+ {
+ AddWord(*mergeWords[currPos]);
+ }
+}
+
+Phrase::~Phrase()
+{
+}
+
+void Phrase::MergeFactors(const Phrase &copy)
+{
+ assert(GetSize() == copy.GetSize());
+ size_t size = GetSize();
+ const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());
+ for (size_t currPos = 0 ; currPos < size ; currPos++)
+ {
+ for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
+ {
+ FactorType factorType = static_cast<FactorType>(currFactor);
+ const Factor *factor = copy.GetFactor(currPos, factorType);
+ if (factor != NULL)
+ SetFactor(currPos, factorType, factor);
+ }
+ }
+}
+
+void Phrase::MergeFactors(const Phrase &copy, FactorType factorType)
+{
+ assert(GetSize() == copy.GetSize());
+ for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)
+ SetFactor(currPos, factorType, copy.GetFactor(currPos, factorType));
+}
+
+void Phrase::MergeFactors(const Phrase &copy, const std::vector<FactorType>& factorVec)
+{
+ assert(GetSize() == copy.GetSize());
+ for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)
+ for (std::vector<FactorType>::const_iterator i = factorVec.begin();
+ i != factorVec.end(); ++i)
+ {
+ SetFactor(currPos, *i, copy.GetFactor(currPos, *i));
+ }
+}
+
+
+Phrase Phrase::GetSubString(const WordsRange &wordsRange) const
+{
+ Phrase retPhrase(m_direction);
+
+ for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++)
+ {
+ Word &word = retPhrase.AddWord();
+ word = GetWord(currPos);
+ }
+
+ return retPhrase;
+}
+
+std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const
+{
+ Phrase retPhrase(m_direction);
+ stringstream strme;
+ for (size_t pos = 0 ; pos < GetSize() ; pos++)
+ {
+ strme << GetWord(pos).GetString(factorsToPrint, true);
+ }
+
+ return strme.str();
+}
+
+Word &Phrase::AddWord()
+{
+ if ((m_phraseSize+1) % ARRAY_SIZE_INCR == 0)
+ { // need to expand array
+ m_arraySize += ARRAY_SIZE_INCR;
+ m_words.resize(m_arraySize);
+ }
+
+ return m_words[m_phraseSize++];
+}
+
+void Phrase::Append(const Phrase &endPhrase){
+
+ for (size_t i = 0; i < endPhrase.GetSize();i++){
+ AddWord(endPhrase.GetWord(i));
+ }
+}
+
+vector< vector<string> > Phrase::Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter)
+{
+ bool isMultiCharDelimiter = factorDelimiter.size() > 1;
+ // parse
+ vector< vector<string> > phraseVector;
+ vector<string> annotatedWordVector = Tokenize(phraseString);
+ // KOMMA|none ART|Def.Z NN|Neut.NotGen.Sg VVFIN|none
+ // to
+ // "KOMMA|none" "ART|Def.Z" "NN|Neut.NotGen.Sg" "VVFIN|none"
+
+ for (size_t phrasePos = 0 ; phrasePos < annotatedWordVector.size() ; phrasePos++)
+ {
+ string &annotatedWord = annotatedWordVector[phrasePos];
+ vector<string> factorStrVector;
+ if (isMultiCharDelimiter) {
+ factorStrVector = TokenizeMultiCharSeparator(annotatedWord, factorDelimiter);
+ } else {
+ factorStrVector = Tokenize(annotatedWord, factorDelimiter);
+ }
+ // KOMMA|none
+ // to
+ // "KOMMA" "none"
+ if (factorStrVector.size() != factorOrder.size()) {
+ TRACE_ERR( "[ERROR] Malformed input at " << /*StaticData::Instance().GetCurrentInputPosition() <<*/ std::endl
+ << " Expected input to have words composed of " << factorOrder.size() << " factor(s) (form FAC1|FAC2|...)" << std::endl
+ << " but instead received input with " << factorStrVector.size() << " factor(s).\n");
+ abort();
+ }
+ phraseVector.push_back(factorStrVector);
+ }
+ return phraseVector;
+}
+
+void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
+ , const vector< vector<string> > &phraseVector)
+{
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++)
+ {
+ // add word this phrase
+ Word &word = AddWord();
+ for (size_t currFactorIndex= 0 ; currFactorIndex < factorOrder.size() ; currFactorIndex++)
+ {
+ FactorType factorType = factorOrder[currFactorIndex];
+ const string &factorStr = phraseVector[phrasePos][currFactorIndex];
+ const Factor *factor = factorCollection.AddFactor(m_direction, factorType, factorStr);
+ word[factorType] = factor;
+ }
+ }
+}
+
+void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
+ , const string &phraseString
+ , const string &factorDelimiter)
+{
+ vector< vector<string> > phraseVector = Parse(phraseString, factorOrder, factorDelimiter);
+ CreateFromString(factorOrder, phraseVector);
+}
+
+bool Phrase::operator < (const Phrase &compare) const
+{
+#ifdef min
+#undef min
+#endif
+ size_t thisSize = GetSize()
+ ,compareSize = compare.GetSize();
+
+ // decide by using length. quick decision
+ if (thisSize != compareSize)
+ {
+ return thisSize < compareSize;
+ }
+ else
+ {
+ size_t minSize = std::min( thisSize , compareSize );
+
+ const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());
+ // taken from word.Compare()
+ for (size_t i = 0 ; i < maxNumFactors ; i++)
+ {
+ FactorType factorType = static_cast<FactorType>(i);
+
+ for (size_t currPos = 0 ; currPos < minSize ; currPos++)
+ {
+ const Factor *thisFactor = GetFactor(currPos, factorType)
+ ,*compareFactor = compare.GetFactor(currPos, factorType);
+
+ if (thisFactor != NULL && compareFactor != NULL)
+ {
+ const int result = thisFactor->Compare(*compareFactor);
+ if (result == 0)
+ {
+ continue;
+ }
+ else
+ {
+ return (result < 0);
+ }
+ }
+ }
+ }
+
+ // identical
+ return false;
+ }
+}
+
+bool Phrase::operator== (const Phrase &compare) const
+{
+ return !(*this < compare) && !(compare < *this);
+}
+
+bool Phrase::Contains(const vector< vector<string> > &subPhraseVector
+ , const vector<FactorType> &inputFactor) const
+{
+ const size_t subSize = subPhraseVector.size()
+ ,thisSize= GetSize();
+ if (subSize > thisSize)
+ return false;
+
+ // try to match word-for-word
+ for (size_t currStartPos = 0 ; currStartPos < (thisSize - subSize + 1) ; currStartPos++)
+ {
+ bool match = true;
+
+ for (size_t currFactorIndex = 0 ; currFactorIndex < inputFactor.size() ; currFactorIndex++)
+ {
+ FactorType factorType = inputFactor[currFactorIndex];
+ for (size_t currSubPos = 0 ; currSubPos < subSize ; currSubPos++)
+ {
+ size_t currThisPos = currSubPos + currStartPos;
+ const string &subStr = subPhraseVector[currSubPos][currFactorIndex]
+ ,&thisStr = GetFactor(currThisPos, factorType)->GetString();
+ if (subStr != thisStr)
+ {
+ match = false;
+ break;
+ }
+ }
+ if (!match)
+ break;
+ }
+
+ if (match)
+ return true;
+ }
+ return false;
+}
+
+bool Phrase::IsCompatible(const Phrase &inputPhrase) const
+{
+ if (inputPhrase.GetSize() != GetSize())
+ {
+ return false;
+ }
+
+ const size_t size = GetSize();
+
+ const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());
+ for (size_t currPos = 0 ; currPos < size ; currPos++)
+ {
+ for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
+ {
+ FactorType factorType = static_cast<FactorType>(currFactor);
+ const Factor *thisFactor = GetFactor(currPos, factorType)
+ ,*inputFactor = inputPhrase.GetFactor(currPos, factorType);
+ if (thisFactor != NULL && inputFactor != NULL && thisFactor != inputFactor)
+ return false;
+ }
+ }
+ return true;
+
+}
+
+bool Phrase::IsCompatible(const Phrase &inputPhrase, FactorType factorType) const
+{
+ if (inputPhrase.GetSize() != GetSize()) { return false; }
+ for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)
+ {
+ if (GetFactor(currPos, factorType) != inputPhrase.GetFactor(currPos, factorType))
+ return false;
+ }
+ return true;
+}
+
+bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const
+{
+ if (inputPhrase.GetSize() != GetSize()) { return false; }
+ for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)
+ {
+ for (std::vector<FactorType>::const_iterator i = factorVec.begin();
+ i != factorVec.end(); ++i)
+ {
+ if (GetFactor(currPos, *i) != inputPhrase.GetFactor(currPos, *i))
+ return false;
+ }
+ }
+ return true;
+}
+
+void Phrase::InitializeMemPool()
+{
+}
+
+void Phrase::FinalizeMemPool()
+{
+}
+
+TO_STRING_BODY(Phrase);
+
+// friend
+ostream& operator<<(ostream& out, const Phrase& phrase)
+{
+// out << "(size " << phrase.GetSize() << ") ";
+ for (size_t pos = 0 ; pos < phrase.GetSize() ; pos++)
+ {
+ const Word &word = phrase.GetWord(pos);
+ out << word;
+ }
+ return out;
+}
+
+}
+
+
diff --git a/moses/src/Phrase.h b/moses/src/Phrase.h
new file mode 100644
index 000000000..885d7bdbc
--- /dev/null
+++ b/moses/src/Phrase.h
@@ -0,0 +1,178 @@
+// $Id: Phrase.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_Phrase_h
+#define moses_Phrase_h
+
+#include <iostream>
+#include <vector>
+#include <list>
+#include <string>
+#include "Word.h"
+#include "WordsBitmap.h"
+#include "TypeDef.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+class Phrase
+{
+ friend std::ostream& operator<<(std::ostream&, const Phrase&);
+ private:
+
+ FactorDirection m_direction; /** Reusing Direction enum to really mean which language
+ Input = Source, Output = Target.
+ Not really used, but nice to know for debugging purposes
+ */
+ size_t m_phraseSize; //number of words
+ size_t m_arraySize; /** current size of vector m_words. This number is equal or bigger
+ than m_phraseSize. Used for faster allocation of m_word */
+ std::vector<Word> m_words;
+
+public:
+ /** No longer does anything as not using mem pool for Phrase class anymore */
+ static void InitializeMemPool();
+ static void FinalizeMemPool();
+
+ /** copy constructor */
+ Phrase(const Phrase &copy);
+ Phrase& operator=(const Phrase&);
+
+ /** create empty phrase
+ * \param direction = language (Input = Source, Output = Target)
+ */
+ Phrase(FactorDirection direction);
+ /** create phrase from vectors of words */
+ Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords);
+
+ /** destructor */
+ virtual ~Phrase();
+
+ /** parse a string from phrase table or sentence input and create a 2D vector of strings
+ * \param phraseString string to parse
+ * \param factorOrder factors in the parse string. This argument is not fully used, only as a check to make ensure
+ * number of factors is what was promised
+ * \param factorDelimiter what char to use to separate factor strings from each other. Usually use '|'. Can be multi-char
+ */
+ static std::vector< std::vector<std::string> > Parse(
+ const std::string &phraseString
+ , const std::vector<FactorType> &factorOrder
+ , const std::string& factorDelimiter);
+ /** Fills phrase with words from 2D string vector
+ * \param factorOrder factor types of each element in 2D string vector
+ * \param phraseVector 2D string vector
+ */
+ void CreateFromString(const std::vector<FactorType> &factorOrder
+ , const std::vector< std::vector<std::string> > &phraseVector);
+ /** Fills phrase with words from format string, typically from phrase table or sentence input
+ * \param factorOrder factor types of each element in 2D string vector
+ * \param phraseString formatted input string to parse
+ * \param factorDelimiter delimiter, as used by Parse()
+ */
+ void CreateFromString(const std::vector<FactorType> &factorOrder
+ , const std::string &phraseString
+ , const std::string &factorDelimiter);
+
+ /** copy factors from the other phrase to this phrase.
+ IsCompatible() must be run beforehand to ensure incompatible factors aren't overwritten
+ */
+ void MergeFactors(const Phrase &copy);
+ //! copy a single factor (specified by factorType)
+ void MergeFactors(const Phrase &copy, FactorType factorType);
+ //! copy all factors specified in factorVec and none others
+ void MergeFactors(const Phrase &copy, const std::vector<FactorType>& factorVec);
+
+ /** compare 2 phrases to ensure no factors are lost if the phrases are merged
+ * must run IsCompatible() to ensure incompatible factors aren't being overwritten
+ */
+ bool IsCompatible(const Phrase &inputPhrase) const;
+ bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const;
+ bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const;
+
+ //! really means what language. Input = Source, Output = Target
+ inline FactorDirection GetDirection() const
+ {
+ return m_direction;
+ }
+
+ //! number of words
+ inline size_t GetSize() const
+ {
+ return m_phraseSize;
+ }
+
+ //! word at a particular position
+ inline const Word &GetWord(size_t pos) const
+ {
+ return m_words[pos];
+ }
+ inline Word &GetWord(size_t pos)
+ {
+ return m_words[pos];
+ }
+ //! particular factor at a particular position
+ inline const Factor *GetFactor(size_t pos, FactorType factorType) const
+ {
+ const Word &ptr = m_words[pos];
+ return ptr[factorType];
+ }
+ inline void SetFactor(size_t pos, FactorType factorType, const Factor *factor)
+ {
+ Word &ptr = m_words[pos];
+ ptr[factorType] = factor;
+ }
+
+ //! whether the 2D vector is a substring of this phrase
+ bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
+ , const std::vector<FactorType> &inputFactor) const;
+
+ //! create an empty word at the end of the phrase
+ Word &AddWord();
+ //! create copy of input word at the end of the phrase
+ void AddWord(const Word &newWord)
+ {
+ AddWord() = newWord;
+ }
+ //! create new phrase class that is a substring of this phrase
+ Phrase GetSubString(const WordsRange &wordsRange) const;
+
+ //! return a string rep of the phrase. Each factor is separated by the factor delimiter as specified in StaticData class
+ std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
+
+ TO_STRING();
+
+ /** transitive comparison between 2 phrases
+ * used to insert & find phrase in dictionary
+ */
+ bool operator< (const Phrase &compare) const;
+
+ bool operator== (const Phrase &compare) const;
+
+
+ /** appends a phrase at the end of current phrase **/
+ void Append(const Phrase &endPhrase);
+};
+
+
+}
+#endif
diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp
new file mode 100644
index 000000000..a31431597
--- /dev/null
+++ b/moses/src/PhraseDictionary.cpp
@@ -0,0 +1,145 @@
+// $Id: PhraseDictionary.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "PhraseDictionary.h"
+#include "PhraseDictionaryTreeAdaptor.h"
+#include "StaticData.h"
+#include "InputType.h"
+#include "TranslationOption.h"
+
+namespace Moses {
+
+const TargetPhraseCollection *PhraseDictionary::
+GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const
+{
+ return GetTargetPhraseCollection(src.GetSubString(range));
+}
+
+PhraseDictionaryFeature::PhraseDictionaryFeature
+ ( size_t numScoreComponent
+ , unsigned numInputScores
+ , const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ , size_t tableLimit):
+ m_numScoreComponent(numScoreComponent),
+ m_numInputScores(numInputScores),
+ m_input(input),
+ m_output(output),
+ m_filePath(filePath),
+ m_weight(weight),
+ m_tableLimit(tableLimit)
+ {
+ const StaticData& staticData = StaticData::Instance();
+ const_cast<ScoreIndexManager&>(staticData.GetScoreIndexManager()).AddScoreProducer(this);
+
+
+ //if we're using an in-memory phrase table, then load it now, otherwise wait
+ if (!FileExists(filePath+".binphr.idx"))
+ { // memory phrase table
+ VERBOSE(2,"using standard phrase tables" << endl);
+ if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
+ m_filePath += ".gz";
+ VERBOSE(2,"Using gzipped file" << endl);
+ }
+ if (staticData.GetInputType() != SentenceInput)
+ {
+ UserMessage::Add("Must use binary phrase table for this input type");
+ assert(false);
+ }
+
+ PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent,this);
+ assert(pdm->Load(m_input
+ , m_output
+ , m_filePath
+ , m_weight
+ , m_tableLimit
+ , staticData.GetAllLM()
+ , staticData.GetWeightWordPenalty()));
+ m_memoryDictionary.reset(pdm);
+ }
+ else
+ {
+ //don't initialise the tree dictionary until it's required
+ }
+
+
+ }
+
+ PhraseDictionary* PhraseDictionaryFeature::GetDictionary
+ (const InputType& source) {
+ PhraseDictionary* dict = NULL;
+ if (m_memoryDictionary.get()) {
+ dict = m_memoryDictionary.get();
+ } else {
+ if (!m_treeDictionary.get()) {
+ //load the tree dictionary for this thread
+ const StaticData& staticData = StaticData::Instance();
+ PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this);
+ assert(pdta->Load(
+ m_input
+ , m_output
+ , m_filePath
+ , m_weight
+ , m_tableLimit
+ , staticData.GetAllLM()
+ , staticData.GetWeightWordPenalty()));
+ m_treeDictionary.reset(pdta);
+ }
+ dict = m_treeDictionary.get();
+ }
+ dict->InitializeForInput(source);
+ return dict;
+ }
+
+
+
+PhraseDictionaryFeature::~PhraseDictionaryFeature() {}
+
+
+
+std::string PhraseDictionaryFeature::GetScoreProducerDescription() const
+{
+ return "PhraseModel";
+}
+
+size_t PhraseDictionaryFeature::GetNumScoreComponents() const
+{
+ return m_numScoreComponent;
+}
+
+size_t PhraseDictionaryFeature::GetNumInputScores() const
+{
+ return m_numInputScores;
+}
+
+bool PhraseDictionaryFeature::ComputeValueInTranslationOption() const {
+ return true;
+}
+
+ const PhraseDictionaryFeature* PhraseDictionary::GetFeature() const {
+ return m_feature;
+ }
+
+}
+
diff --git a/moses/src/PhraseDictionary.h b/moses/src/PhraseDictionary.h
new file mode 100644
index 000000000..8b74d5920
--- /dev/null
+++ b/moses/src/PhraseDictionary.h
@@ -0,0 +1,133 @@
+// $Id: PhraseDictionary.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_PhraseDictionary_h
+#define moses_PhraseDictionary_h
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <list>
+#include <vector>
+#include <string>
+
+#ifdef WITH_THREADS
+#include <boost/thread/tss.hpp>
+#endif
+
+#include "Phrase.h"
+#include "TargetPhrase.h"
+#include "Dictionary.h"
+#include "TargetPhraseCollection.h"
+#include "FeatureFunction.h"
+
+namespace Moses
+{
+
+class StaticData;
+class InputType;
+class WordsRange;
+
+class PhraseDictionaryFeature;
+/**
+ * Abstract base class for phrase dictionaries (tables).
+ **/
+class PhraseDictionary: public Dictionary {
+ public:
+ PhraseDictionary(size_t numScoreComponent, const PhraseDictionaryFeature* feature):
+ Dictionary(numScoreComponent), m_tableLimit(0), m_feature(feature) {}
+ //! table limit number.
+ size_t GetTableLimit() const { return m_tableLimit; }
+ DecodeType GetDecodeType() const { return Translate; }
+ const PhraseDictionaryFeature* GetFeature() const;
+ /** set/change translation weights and recalc weighted score for each translation.
+ * TODO This may be redundant now we use ScoreCollection
+ */
+ virtual void SetWeightTransModel(const std::vector<float> &weightT)=0;
+
+ //! find list of translations that can translates src. Only for phrase input
+ virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0;
+ //! find list of translations that can translates a portion of src. Used by confusion network decoding
+ virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;
+ //! Create entry for translation of source to targetPhrase
+ virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0;
+ virtual void InitializeForInput(InputType const& source) = 0;
+
+
+ protected:
+ size_t m_tableLimit;
+ const PhraseDictionaryFeature* m_feature;
+};
+
+
+/**
+ * Represents a feature derived from a phrase table.
+ */
+class PhraseDictionaryFeature : public StatelessFeatureFunction
+{
+
+
+ public:
+ PhraseDictionaryFeature( size_t numScoreComponent
+ , unsigned numInputScores
+ , const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ , size_t tableLimit);
+
+ virtual ~PhraseDictionaryFeature();
+
+ virtual bool ComputeValueInTranslationOption() const;
+
+ std::string GetScoreProducerDescription() const;
+ std::string GetScoreProducerWeightShortName() const
+ {
+ return "tm";
+ }
+ size_t GetNumScoreComponents() const;
+
+ size_t GetNumInputScores() const;
+
+ PhraseDictionary* GetDictionary(const InputType& source);
+
+ private:
+ size_t m_numScoreComponent;
+ unsigned m_numInputScores;
+ std::vector<FactorType> m_input;
+ std::vector<FactorType> m_output;
+ std::string m_filePath;
+ std::vector<float> m_weight;
+ size_t m_tableLimit;
+ //Only instantiate one of these
+ std::auto_ptr<PhraseDictionary> m_memoryDictionary;
+ #ifdef WITH_THREADS
+ boost::thread_specific_ptr<PhraseDictionary> m_treeDictionary;
+ #else
+ std::auto_ptr<PhraseDictionary> m_treeDictionary;
+ #endif
+
+};
+
+
+
+}
+#endif
diff --git a/moses/src/PhraseDictionaryDynSuffixArray.cpp b/moses/src/PhraseDictionaryDynSuffixArray.cpp
new file mode 100644
index 000000000..027064bb1
--- /dev/null
+++ b/moses/src/PhraseDictionaryDynSuffixArray.cpp
@@ -0,0 +1,494 @@
+#include "PhraseDictionaryDynSuffixArray.h"
+#include "DynSAInclude/utils.h"
+#include "FactorCollection.h"
+#include "StaticData.h"
+#include "TargetPhrase.h"
+#include <iomanip>
+
+namespace Moses {
+PhraseDictionaryDynSuffixArray::PhraseDictionaryDynSuffixArray(size_t numScoreComponent,
+ PhraseDictionaryFeature* feature): PhraseDictionary(numScoreComponent, feature),
+ maxPhraseLength_(StaticData::Instance().GetMaxPhraseLength()), maxSampleSize_(500) {
+ srcSA_ = 0;
+ trgSA_ = 0;
+ srcCrp_ = new vector<wordID_t>();
+ trgCrp_ = new vector<wordID_t>();
+ vocab_ = new Vocab(false);
+ scoreCmp_ = 0;
+}
+PhraseDictionaryDynSuffixArray::~PhraseDictionaryDynSuffixArray(){
+ if(srcSA_) delete srcSA_;
+ if(trgSA_) delete trgSA_;
+ if(vocab_) delete vocab_;
+ if(srcCrp_) delete srcCrp_;
+ if(trgCrp_) delete trgCrp_;
+ if(scoreCmp_) delete scoreCmp_;
+}
+bool PhraseDictionaryDynSuffixArray::Load(string source, string target, string alignments
+ , const std::vector<float> &weight
+ , size_t tableLimit
+ , const LMList &languageModels
+ , float weightWP)
+{
+
+ m_weight = weight;
+ m_tableLimit = tableLimit;
+ m_languageModels = &languageModels;
+ m_weightWP = weightWP;
+ scoreCmp_ = new ScoresComp(m_weight);
+ InputFileStream sourceStrme(source);
+ InputFileStream targetStrme(target);
+ cerr << "Loading source and target parallel corpus...\n";
+ loadCorpus(sourceStrme, *srcCrp_, srcSntBreaks_);
+ loadCorpus(targetStrme, *trgCrp_, trgSntBreaks_);
+ assert(srcSntBreaks_.size() == trgSntBreaks_.size());
+ LoadVocabLookup();
+
+ // build suffix arrays and auxilliary arrays
+ cerr << "Building Source Suffix Array...\n";
+ srcSA_ = new DynSuffixArray(srcCrp_);
+ if(!srcSA_) return false;
+ /*cerr << "Building Target Suffix Array...\n";
+ trgSA_ = new DynSuffixArray(trgCrp_);
+ if(!trgSA_) return false;*/
+
+ InputFileStream alignStrme(alignments);
+ cerr << "Loading Alignment File...\n";
+ loadRawAlignments(alignStrme);
+ //loadAlignments(alignStrme);
+ return true;
+}
+int PhraseDictionaryDynSuffixArray::loadRawAlignments(InputFileStream& align) {
+ // stores the alignments in the raw file format
+ string line;
+ vector<int> vtmp;
+ while(getline(align, line)) {
+ Utils::splitToInt(line, vtmp, "- ");
+ assert(vtmp.size() % 2 == 0);
+ vector<short> vAlgn; // store as short ints for memory
+ iterate(vtmp, itr) vAlgn.push_back(short(*itr));
+ rawAlignments_.push_back(vAlgn);
+ }
+ return rawAlignments_.size();
+}
+int PhraseDictionaryDynSuffixArray::loadAlignments(InputFileStream& align) {
+ string line;
+ vector<int> vtmp;
+ int sntIndex(0);
+
+ while(getline(align, line)) {
+ Utils::splitToInt(line, vtmp, "- ");
+ assert(vtmp.size() % 2 == 0);
+
+ int sourceSize = GetSourceSentenceSize(sntIndex);
+ int targetSize = GetTargetSentenceSize(sntIndex);
+
+ SentenceAlignment curSnt(sntIndex, sourceSize, targetSize); // initialize empty sentence
+ for(int i=0; i < (int)vtmp.size(); i+=2) {
+ int sourcePos = vtmp[i];
+ int targetPos = vtmp[i+1];
+ assert(sourcePos < sourceSize);
+ assert(targetPos < targetSize);
+
+ curSnt.alignedList[sourcePos].push_back(targetPos); // list of target nodes for each source word
+ curSnt.numberAligned[targetPos]++; // cnt of how many source words connect to this target word
+ }
+ curSnt.srcSnt = srcCrp_ + sntIndex; // point source and target sentence
+ curSnt.trgSnt = trgCrp_ + sntIndex;
+ alignments_.push_back(curSnt);
+
+ sntIndex++;
+ }
+ return alignments_.size();
+}
+SentenceAlignment PhraseDictionaryDynSuffixArray::getSentenceAlignment(const int sntIndex, bool trg2Src) const {
+ // retrieves the alignments in the format used by SentenceAlignment.extract()
+ int sntGiven = trg2Src ? GetTargetSentenceSize(sntIndex) : GetSourceSentenceSize(sntIndex);
+ int sntExtract = trg2Src ? GetSourceSentenceSize(sntIndex) : GetTargetSentenceSize(sntIndex);
+ vector<short> alignment = rawAlignments_.at(sntIndex);
+ SentenceAlignment curSnt(sntIndex, sntGiven, sntExtract); // initialize empty sentence
+ for(int i=0; i < alignment.size(); i+=2) {
+ int sourcePos = alignment[i];
+ int targetPos = alignment[i+1];
+ if(trg2Src) {
+ curSnt.alignedList[targetPos].push_back(sourcePos); // list of target nodes for each source word
+ curSnt.numberAligned[sourcePos]++; // cnt of how many source words connect to this target word
+ }
+ else {
+ curSnt.alignedList[sourcePos].push_back(targetPos); // list of target nodes for each source word
+ curSnt.numberAligned[targetPos]++; // cnt of how many source words connect to this target word
+ }
+ }
+ curSnt.srcSnt = srcCrp_ + sntIndex; // point source and target sentence
+ curSnt.trgSnt = trgCrp_ + sntIndex;
+
+ return curSnt;
+}
+bool PhraseDictionaryDynSuffixArray::extractPhrases(const int& sntIndex, const int& wordIndex,
+ const int& sourceSize, vector<PhrasePair*>& phrasePairs, bool trg2Src) const {
+ /* extractPhrases() can extract the matching phrases for both directions by using the trg2Src
+ * parameter */
+ SentenceAlignment curSnt = getSentenceAlignment(sntIndex, trg2Src);
+ // get span of phrase in source sentence
+ int beginSentence = srcSntBreaks_[sntIndex];
+ int rightIdx = wordIndex - beginSentence
+ ,leftIdx = rightIdx - sourceSize + 1;
+ return curSnt.Extract(maxPhraseLength_, phrasePairs, leftIdx, rightIdx); // extract all phrase Alignments in sentence
+}
+void PhraseDictionaryDynSuffixArray::LoadVocabLookup()
+{
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ Vocab::Word2Id::const_iterator iter;
+ for (iter = vocab_->vocabStart(); iter != vocab_->vocabEnd(); ++iter)
+ {
+ const word_t &str = iter->first;
+ wordID_t arrayId = iter->second;
+ //const Factor *factor = factorCollection.AddFactor(Input, 0, str, false);
+ const Factor *factor = factorCollection.AddFactor(Input, 0, str);
+ vocabLookup_[factor] = arrayId;
+ vocabLookupRev_[arrayId] = factor;
+ }
+
+}
+void PhraseDictionaryDynSuffixArray::InitializeForInput(const InputType& input)
+{
+ assert(&input == &input);
+}
+void PhraseDictionaryDynSuffixArray::CleanUp() {
+ wordPairCache_.clear();
+}
+void PhraseDictionaryDynSuffixArray::SetWeightTransModel(const std::vector<float, std::allocator<float> >&) {
+ return;
+}
+int PhraseDictionaryDynSuffixArray::loadCorpus(InputFileStream& corpus, vector<wordID_t>& cArray,
+ vector<wordID_t>& sntArray) {
+ string line, word;
+ int sntIdx(0);
+ corpus.seekg(0);
+ while(getline(corpus, line)) {
+ sntArray.push_back(sntIdx);
+ std::istringstream ss(line.c_str());
+ while(ss >> word) {
+ ++sntIdx;
+ cArray.push_back(vocab_->getWordID(word));
+ }
+ }
+ //cArray.push_back(Vocab::kOOVWordID); // signify end of corpus
+ return cArray.size();
+}
+bool PhraseDictionaryDynSuffixArray::getLocalVocabIDs(const Phrase& src, SAPhrase &output) const {
+ // looks up the SA vocab ids for the current src phrase
+ size_t phraseSize = src.GetSize();
+ for (size_t pos = 0; pos < phraseSize; ++pos) {
+ const Word &word = src.GetWord(pos);
+ const Factor *factor = word.GetFactor(0);
+ std::map<const Factor *, wordID_t>::const_iterator iterLookup;
+ iterLookup = vocabLookup_.find(factor);
+
+ if (iterLookup == vocabLookup_.end())
+ { // oov
+ return false;
+ }
+ else
+ {
+ wordID_t arrayId = iterLookup->second;
+ output.SetId(pos, arrayId);
+ //cerr << arrayId << " ";
+ }
+ }
+ return true;
+}
+pair<float, float> PhraseDictionaryDynSuffixArray::getLexicalWeight(const PhrasePair& phrasepair) const {
+ float srcLexWeight(1.0), trgLexWeight(1.0);
+ map<pair<wordID_t, wordID_t>, float> targetProbs; // collect sum of target probs given source words
+ //const SentenceAlignment& alignment = alignments_[phrasepair.m_sntIndex];
+ const SentenceAlignment& alignment = getSentenceAlignment(phrasepair.m_sntIndex);
+ std::map<pair<wordID_t, wordID_t>, pair<float, float> >::const_iterator itrCache;
+ // for each source word
+ for(int srcIdx = phrasepair.m_startSource; srcIdx <= phrasepair.m_endSource; ++srcIdx) {
+ float srcSumPairProbs(0);
+ wordID_t srcWord = srcCrp_->at(srcIdx + srcSntBreaks_[phrasepair.m_sntIndex]); // localIDs
+ const vector<int>& srcWordAlignments = alignment.alignedList.at(srcIdx);
+ if(srcWordAlignments.size() == 0) { // get p(NULL|src)
+ pair<wordID_t, wordID_t> wordpair = std::make_pair(srcWord, Vocab::kOOVWordID);
+ itrCache = wordPairCache_.find(wordpair);
+ if(itrCache == wordPairCache_.end()) { // if not in cache
+ cacheWordProbs(srcWord);
+ itrCache = wordPairCache_.find(wordpair); // search cache again
+ }
+ assert(itrCache != wordPairCache_.end());
+ srcSumPairProbs += itrCache->second.first;
+ targetProbs[wordpair] = itrCache->second.second;
+ }
+ else { // extract p(trg|src)
+ for(int i = 0; i < srcWordAlignments.size(); ++i) { // for each aligned word
+ int trgIdx = srcWordAlignments[i];
+ wordID_t trgWord = trgCrp_->at(trgIdx + trgSntBreaks_[phrasepair.m_sntIndex]);
+ // get probability of this source->target word pair
+ pair<wordID_t, wordID_t> wordpair = std::make_pair(srcWord, trgWord);
+ itrCache = wordPairCache_.find(wordpair);
+ if(itrCache == wordPairCache_.end()) { // if not in cache
+ cacheWordProbs(srcWord);
+ itrCache = wordPairCache_.find(wordpair); // search cache again
+ }
+ assert(itrCache != wordPairCache_.end());
+ srcSumPairProbs += itrCache->second.first;
+ targetProbs[wordpair] = itrCache->second.second;
+ }
+ }
+ float srcNormalizer = srcWordAlignments.size() < 2 ? 1.0 : 1.0 / float(srcWordAlignments.size());
+ srcLexWeight *= (srcNormalizer * srcSumPairProbs);
+ } // end for each source word
+ for(int trgIdx = phrasepair.m_startTarget; trgIdx <= phrasepair.m_endTarget; ++trgIdx) {
+ float trgSumPairProbs(0);
+ wordID_t trgWord = trgCrp_->at(trgIdx + trgSntBreaks_[phrasepair.m_sntIndex]);
+ iterate(targetProbs, trgItr) {
+ if(trgItr->first.second == trgWord)
+ trgSumPairProbs += trgItr->second;
+ }
+ if(trgSumPairProbs == 0) continue; // currently don't store target-side SA
+ int noAligned = alignment.numberAligned.at(trgIdx);
+ float trgNormalizer = noAligned < 2 ? 1.0 : 1.0 / float(noAligned);
+ trgLexWeight *= (trgNormalizer * trgSumPairProbs);
+ }
+ // TODO::Need to get p(NULL|trg)
+ return pair<float, float>(srcLexWeight, trgLexWeight);
+}
+void PhraseDictionaryDynSuffixArray::cacheWordProbs(wordID_t srcWord) const {
+ std::map<wordID_t, int> counts;
+ vector<wordID_t> vword(1, srcWord), wrdIndices;
+ assert(srcSA_->getCorpusIndex(&vword, &wrdIndices));
+ vector<int> sntIndexes = getSntIndexes(wrdIndices, 1);
+ float denom(0);
+ // for each occurrence of this word
+ for(int snt = 0; snt < sntIndexes.size(); ++snt) {
+ int sntIdx = sntIndexes.at(snt); // get corpus index for sentence
+ assert(sntIdx != -1);
+ int srcWrdSntIdx = wrdIndices.at(snt) - srcSntBreaks_.at(sntIdx); // get word index in sentence
+ const vector<int> srcAlg = getSentenceAlignment(sntIdx).alignedList.at(srcWrdSntIdx); // list of target words for this source word
+ //const vector<int>& srcAlg = alignments_.at(sntIdx).alignedList.at(srcWrdSntIdx); // list of target words for this source word
+ if(srcAlg.size() == 0) {
+ ++counts[Vocab::kOOVWordID]; // if not alligned then align to NULL word
+ ++denom;
+ }
+ else { //get target words aligned to srcword in this sentence
+ for(int i=0; i < srcAlg.size(); ++i) {
+ wordID_t trgWord = trgCrp_->at(srcAlg[i] + trgSntBreaks_[sntIdx]);
+ ++counts[trgWord];
+ ++denom;
+ }
+ }
+ }
+ // now we've gotten counts of all target words aligned to this source word
+ // get probs and cache all pairs
+ for(std::map<wordID_t, int>::const_iterator itrCnt = counts.begin();
+ itrCnt != counts.end(); ++itrCnt) {
+ pair<wordID_t, wordID_t> wordPair = std::make_pair(srcWord, itrCnt->first);
+ float srcTrgPrb = float(itrCnt->second) / float(denom); // gives p(src->trg)
+ float trgSrcPrb = float(itrCnt->second) / float(counts.size()); // gives p(trg->src)
+ wordPairCache_[wordPair] = pair<float, float>(srcTrgPrb, trgSrcPrb);
+ }
+}
+SAPhrase PhraseDictionaryDynSuffixArray::trgPhraseFromSntIdx(const PhrasePair& phrasepair) const {
+// takes sentence indexes and looks up vocab IDs
+ SAPhrase phraseIds(phrasepair.GetTargetSize());
+ int sntIndex = phrasepair.m_sntIndex;
+ int id(-1), pos(0);
+ for(int i=phrasepair.m_startTarget; i <= phrasepair.m_endTarget; ++i) { // look up trg words
+ id = trgCrp_->at(trgSntBreaks_[sntIndex] + i);
+ phraseIds.SetId(pos++, id);
+ }
+ return phraseIds;
+}
+
+TargetPhrase* PhraseDictionaryDynSuffixArray::getMosesFactorIDs(const SAPhrase& phrase) const {
+ TargetPhrase* targetPhrase = new TargetPhrase(Output);
+ std::map<wordID_t, const Factor *>::const_iterator rIterLookup;
+ for(int i=0; i < phrase.words.size(); ++i) { // look up trg words
+ rIterLookup = vocabLookupRev_.find(phrase.words[i]);
+ assert(rIterLookup != vocabLookupRev_.end());
+ const Factor* factor = rIterLookup->second;
+ Word word;
+ word.SetFactor(0, factor);
+ targetPhrase->AddWord(word);
+ }
+
+ // scoring
+ return targetPhrase;
+}
+const TargetPhraseCollection *PhraseDictionaryDynSuffixArray::GetTargetPhraseCollection(const Phrase& src) const {
+ //cout << "\n" << src << "\n";
+ TargetPhraseCollection *ret = new TargetPhraseCollection();
+ size_t sourceSize = src.GetSize();
+ SAPhrase localIDs(sourceSize);
+ if(!getLocalVocabIDs(src, localIDs)) return ret;
+ float totalTrgPhrases(0);
+ std::map<SAPhrase, int> phraseCounts;
+ std::map<SAPhrase, pair<float, float> > lexicalWeights;
+ std::map<SAPhrase, pair<float, float> >::iterator itrLexW;
+ vector<unsigned> wrdIndices(0);
+ // extract sentence IDs from SA and return rightmost index of phrases
+ if(!srcSA_->getCorpusIndex(&(localIDs.words), &wrdIndices)) return ret;
+ if(wrdIndices.size() > maxSampleSize_)
+ wrdIndices = sampleSelection(wrdIndices);
+ vector<int> sntIndexes = getSntIndexes(wrdIndices, sourceSize);
+ // for each sentence with this phrase
+ for(int snt = 0; snt < sntIndexes.size(); ++snt) {
+ vector<PhrasePair*> phrasePairs; // to store all phrases possible from current sentence
+ int sntIndex = sntIndexes.at(snt); // get corpus index for sentence
+ if(sntIndex == -1) continue; // bad flag set by getSntIndexes()
+ extractPhrases(sntIndex, wrdIndices[snt], sourceSize, phrasePairs);
+ //cerr << "extracted " << phrasePairs.size() << endl;
+ totalTrgPhrases += phrasePairs.size(); // keep track of count of each extracted phrase pair
+ vector<PhrasePair*>::iterator iterPhrasePair;
+ for (iterPhrasePair = phrasePairs.begin(); iterPhrasePair != phrasePairs.end(); ++iterPhrasePair) {
+ SAPhrase phrase = trgPhraseFromSntIdx(**iterPhrasePair);
+ phraseCounts[phrase]++; // count each unique phrase
+ pair<float, float> lexWeight = getLexicalWeight(**iterPhrasePair); // get lexical weighting for this phrase pair
+ itrLexW = lexicalWeights.find(phrase); // check if phrase already has lexical weight attached
+ if((itrLexW != lexicalWeights.end()) && (itrLexW->second.first < lexWeight.first))
+ itrLexW->second = lexWeight; // if this lex weight is greater save it
+ else lexicalWeights[phrase] = lexWeight; // else save
+ }
+ // done with sentence. delete SA phrase pairs
+ RemoveAllInColl(phrasePairs);
+ } // done with all sentences
+ // convert to moses phrase pairs
+ const int maxReturn = 20;
+ std::map<SAPhrase, int>::const_iterator iterPhrases;
+ std::multimap<Scores, const SAPhrase*, ScoresComp> phraseScores (*scoreCmp_);
+ // get scores of all phrases
+ for(iterPhrases = phraseCounts.begin(); iterPhrases != phraseCounts.end(); ++iterPhrases) {
+ float trg2SrcMLE = float(iterPhrases->second) / totalTrgPhrases;
+ itrLexW = lexicalWeights.find(iterPhrases->first);
+ assert(itrLexW != lexicalWeights.end());
+ Scores scoreVector(3);
+ scoreVector[0] = trg2SrcMLE;
+ scoreVector[1] = itrLexW->second.first;
+ scoreVector[2] = 2.718; // exp(1);
+ phraseScores.insert(pair<Scores, const SAPhrase*>(scoreVector, &iterPhrases->first));
+ }
+ // return top scoring phrases
+ std::multimap<Scores, const SAPhrase*, ScoresComp>::reverse_iterator ritr;
+ for(ritr = phraseScores.rbegin(); ritr != phraseScores.rend(); ++ritr) {
+ Scores scoreVector = ritr->first;
+ Moses::TargetPhrase *targetPhrase = getMosesFactorIDs(*ritr->second);
+ //std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),NegateScore);
+ std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
+ targetPhrase->SetScore(m_feature, scoreVector, m_weight, m_weightWP, *m_languageModels);
+ //cout << *targetPhrase << "\t" << std::setprecision(8) << scoreVector[2] << endl;
+ ret->Add(targetPhrase);
+ if(ret->GetSize() == maxReturn) break;
+ }
+ ret->NthElement(m_tableLimit); // sort the phrases for the dcoder
+ return ret;
+}
+vector<int> PhraseDictionaryDynSuffixArray::getSntIndexes(vector<unsigned>& wrdIndices,
+ const int sourceSize) const
+{
+ vector<unsigned>::const_iterator vit;
+ vector<int> sntIndexes;
+ for(int i=0; i < wrdIndices.size(); ++i) {
+ vit = std::upper_bound(srcSntBreaks_.begin(), srcSntBreaks_.end(), wrdIndices[i]);
+ int index = int(vit - srcSntBreaks_.begin()) - 1;
+ // check for phrases that cross sentence boundaries
+ if(wrdIndices[i] - sourceSize + 1 < srcSntBreaks_.at(index))
+ sntIndexes.push_back(-1); // set bad flag
+ else
+ sntIndexes.push_back(index); // store the index of the sentence in the corpus
+ }
+ return sntIndexes;
+}
+vector<unsigned> PhraseDictionaryDynSuffixArray::sampleSelection(vector<unsigned> sample) const {
+ int size = sample.size();
+ //if(size < maxSampleSize_) return sample;
+ vector<unsigned> subSample;
+ int jump = size / maxSampleSize_;
+ for(int i=0; i < size; i+=jump)
+ subSample.push_back(sample.at(i));
+ return subSample;
+}
+void PhraseDictionaryDynSuffixArray::save(string fname) {
+ // save vocab, SAs, corpus, alignments
+}
+void PhraseDictionaryDynSuffixArray::load(string fname) {
+ // read vocab, SAs, corpus, alignments
+}
+
+SentenceAlignment::SentenceAlignment(int sntIndex, int sourceSize, int targetSize)
+ :m_sntIndex(sntIndex)
+ ,numberAligned(targetSize, 0)
+ ,alignedList(sourceSize)
+{
+ for(int i=0; i < sourceSize; ++i) {
+ vector<int> trgWrd;
+ alignedList[i] = trgWrd;
+ }
+}
+bool SentenceAlignment::Extract(int maxPhraseLength, vector<PhrasePair*> &ret, int startSource, int endSource) const
+{
+ // foreign = target, F=T
+ // english = source, E=S
+ int countTarget = numberAligned.size();
+
+ int minTarget = 9999;
+ int maxTarget = -1;
+ vector< int > usedTarget = numberAligned;
+ for(int sourcePos = startSource; sourcePos <= endSource; sourcePos++)
+ {
+ for(int ind=0; ind < (int)alignedList[sourcePos].size();ind++)
+ {
+ int targetPos = alignedList[sourcePos][ind];
+ // cout << "point (" << targetPos << ", " << sourcePos << ")\n";
+ if (targetPos<minTarget) { minTarget = targetPos; }
+ if (targetPos>maxTarget) { maxTarget = targetPos; }
+ usedTarget[ targetPos ]--;
+ } // for(int ind=0;ind<sentence
+ } // for(int sourcePos=startSource
+
+ // cout << "f projected ( " << minTarget << "-" << maxTarget << ", " << startSource << "," << endSource << ")\n";
+
+ if (maxTarget >= 0 && // aligned to any foreign words at all
+ maxTarget-minTarget < maxPhraseLength)
+ { // foreign phrase within limits
+
+ // check if foreign words are aligned to out of bound english words
+ bool out_of_bounds = false;
+ for(int targetPos=minTarget; targetPos <= maxTarget && !out_of_bounds; targetPos++)
+ {
+ if (usedTarget[targetPos]>0)
+ {
+ // cout << "ouf of bounds: " << targetPos << "\n";
+ out_of_bounds = true;
+ }
+ }
+
+ // cout << "doing if for ( " << minTarget << "-" << maxTarget << ", " << startSource << "," << endSource << ")\n";
+ if (!out_of_bounds)
+ {
+ // start point of foreign phrase may retreat over unaligned
+ for(int startTarget = minTarget;
+ (startTarget >= 0 &&
+ startTarget > maxTarget-maxPhraseLength && // within length limit
+ (startTarget==minTarget || numberAligned[startTarget]==0)); // unaligned
+ startTarget--)
+ {
+ // end point of foreign phrase may advance over unaligned
+ for (int endTarget=maxTarget;
+ (endTarget<countTarget &&
+ endTarget<startTarget+maxPhraseLength && // within length limit
+ (endTarget==maxTarget || numberAligned[endTarget]==0)); // unaligned
+ endTarget++)
+ {
+ PhrasePair *phrasePair = new PhrasePair(startTarget,endTarget,startSource,endSource, m_sntIndex);
+ ret.push_back(phrasePair);
+ } // for (int endTarget=maxTarget;
+ } // for(int startTarget=minTarget;
+ } // if (!out_of_bounds)
+ } // if (maxTarget >= 0 &&
+ return (ret.size() > 0);
+
+}
+
+}// end namepsace
diff --git a/moses/src/PhraseDictionaryDynSuffixArray.h b/moses/src/PhraseDictionaryDynSuffixArray.h
new file mode 100644
index 000000000..38883a3fd
--- /dev/null
+++ b/moses/src/PhraseDictionaryDynSuffixArray.h
@@ -0,0 +1,125 @@
+#ifndef moses_PhraseDictionaryDynSuffixArray_h
+#define moses_PhraseDictionaryDynSuffixArray_h
+
+#include "PhraseDictionary.h"
+#include "DynSuffixArray.h"
+#include "DynSAInclude/vocab.h"
+#include "DynSAInclude/types.h"
+#include "DynSAInclude/utils.h"
+#include "InputFileStream.h"
+namespace Moses {
+
+class SAPhrase
+{
+public:
+ vector<wordID_t> words;
+
+ SAPhrase(size_t phraseSize)
+ :words(phraseSize)
+ {}
+
+ void SetId(size_t pos, wordID_t id)
+ {
+ assert(pos < words.size());
+ words[pos] = id;
+ }
+ bool operator<(const SAPhrase& phr2) const
+ { return words < phr2.words; }
+};
+
+class PhrasePair
+{
+public:
+ int m_startTarget, m_endTarget, m_startSource, m_endSource, m_sntIndex;
+ PhrasePair(int startTarget, int endTarget, int startSource, int endSource, int sntIndex)
+ : m_startTarget(startTarget)
+ , m_endTarget(endTarget)
+ , m_startSource(startSource)
+ , m_endSource(endSource)
+ , m_sntIndex(sntIndex)
+ {}
+
+ size_t GetTargetSize() const
+ { return m_endTarget - m_startTarget + 1; }
+};
+
+class SentenceAlignment
+{
+public:
+ SentenceAlignment(int sntIndex, int sourceSize, int targetSize);
+ int m_sntIndex;
+ vector<wordID_t>* trgSnt;
+ vector<wordID_t>* srcSnt;
+ vector<int> numberAligned;
+ vector< vector<int> > alignedList;
+ bool Extract(int maxPhraseLength, vector<PhrasePair*> &ret, int startSource, int endSource) const;
+};
+class ScoresComp {
+public:
+ ScoresComp(const vector<float>& weights): m_weights(weights) {}
+ bool operator()(const Scores& s1, const Scores& s2) const {
+ float score1(1), score2(1);
+ int idx1(0), idx2(0);
+ iterate(s1, itr) score1 += (*itr * m_weights.at(idx1++));
+ iterate(s2, itr) score2 += (*itr * m_weights.at(idx2++));
+ return score1 < score2;
+ }
+private:
+ const vector<float>& m_weights;
+};
+
+class PhraseDictionaryDynSuffixArray: public PhraseDictionary {
+public:
+ PhraseDictionaryDynSuffixArray(size_t numScoreComponent, PhraseDictionaryFeature* feature);
+ ~PhraseDictionaryDynSuffixArray();
+ bool Load(string source, string target, string alignments
+ , const vector<float> &weight
+ , size_t tableLimit
+ , const LMList &languageModels
+ , float weightWP);
+ void LoadVocabLookup();
+ void save(string);
+ void load(string);
+ // functions below required by base class
+ void SetWeightTransModel(const vector<float, std::allocator<float> >&);
+ const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
+ void InitializeForInput(const InputType& i);
+ void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase){}
+ void CleanUp();
+private:
+ DynSuffixArray* srcSA_;
+ DynSuffixArray* trgSA_;
+ vector<wordID_t>* srcCrp_;
+ vector<wordID_t>* trgCrp_;
+ vector<unsigned> srcSntBreaks_, trgSntBreaks_;
+ Vocab* vocab_;
+ ScoresComp* scoreCmp_;
+ vector<SentenceAlignment> alignments_;
+ vector<vector<short> > rawAlignments_;
+ vector<float> m_weight;
+ size_t m_tableLimit;
+ const LMList *m_languageModels;
+ float m_weightWP;
+ std::map<const Factor *, wordID_t> vocabLookup_;
+ std::map<wordID_t, const Factor *> vocabLookupRev_;
+ mutable std::map<pair<wordID_t, wordID_t>, pair<float, float> > wordPairCache_;
+ const int maxPhraseLength_, maxSampleSize_;
+ int loadCorpus(InputFileStream&, vector<wordID_t>&, vector<wordID_t>&);
+ int loadAlignments(InputFileStream& aligs);
+ int loadRawAlignments(InputFileStream& aligs);
+ bool extractPhrases(const int&, const int&, const int&, vector<PhrasePair*>&, bool=false) const;
+ SentenceAlignment getSentenceAlignment(const int, bool=false) const;
+ vector<unsigned> sampleSelection(vector<unsigned>) const;
+ vector<int> getSntIndexes(vector<unsigned>&, const int) const;
+ TargetPhrase* getMosesFactorIDs(const SAPhrase&) const;
+ SAPhrase trgPhraseFromSntIdx(const PhrasePair&) const;
+ bool getLocalVocabIDs(const Phrase&, SAPhrase &) const;
+ void cacheWordProbs(wordID_t) const;
+ pair<float, float> getLexicalWeight(const PhrasePair&) const;
+ int GetSourceSentenceSize(size_t sentenceId) const
+ { return (sentenceId==srcSntBreaks_.size()-1) ? srcCrp_->size() - srcSntBreaks_.at(sentenceId) : srcSntBreaks_.at(sentenceId+1) - srcSntBreaks_.at(sentenceId); }
+ int GetTargetSentenceSize(size_t sentenceId) const
+ { return (sentenceId==trgSntBreaks_.size()-1) ? trgCrp_->size() - trgSntBreaks_.at(sentenceId) : trgSntBreaks_.at(sentenceId+1) - trgSntBreaks_.at(sentenceId); }
+};
+} // end namespace
+#endif
diff --git a/moses/src/PhraseDictionaryMemory.cpp b/moses/src/PhraseDictionaryMemory.cpp
new file mode 100644
index 000000000..facef367f
--- /dev/null
+++ b/moses/src/PhraseDictionaryMemory.cpp
@@ -0,0 +1,222 @@
+// $Id: PhraseDictionaryMemory.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <algorithm>
+#include <sys/stat.h>
+#include "PhraseDictionaryMemory.h"
+#include "FactorCollection.h"
+#include "Word.h"
+#include "Util.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+#include "WordsRange.h"
+#include "UserMessage.h"
+
+using namespace std;
+
+namespace Moses
+{
+bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const string &filePath
+ , const vector<float> &weight
+ , size_t tableLimit
+ , const LMList &languageModels
+ , float weightWP)
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ m_tableLimit = tableLimit;
+
+ //factors
+ m_inputFactors = FactorMask(input);
+ m_outputFactors = FactorMask(output);
+ VERBOSE(2,"PhraseDictionaryMemory: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
+
+ // data from file
+ InputFileStream inFile(filePath);
+
+ // create hash file if necessary
+ ofstream tempFile;
+ string tempFilePath;
+
+ vector< vector<string> > phraseVector;
+ string line, prevSourcePhrase = "";
+ size_t count = 0;
+ size_t line_num = 0;
+ size_t numElement = NOT_FOUND; // 3=old format, 5=async format which include word alignment info
+
+ while(getline(inFile, line))
+ {
+ ++line_num;
+ vector<string> tokens = TokenizeMultiCharSeparator( line , "|||" );
+
+ if (numElement == NOT_FOUND)
+ { // init numElement
+ numElement = tokens.size();
+ assert(numElement == 3 || numElement == 5);
+ }
+
+ if (tokens.size() != numElement)
+ {
+ stringstream strme;
+ strme << "Syntax error at " << filePath << ":" << line_num;
+ UserMessage::Add(strme.str());
+ abort();
+ }
+
+ string sourcePhraseString, targetPhraseString;
+ string scoreString;
+ string sourceAlignString, targetAlignString;
+
+ sourcePhraseString=tokens[0];
+ targetPhraseString=tokens[1];
+ if (numElement==3){
+ scoreString=tokens[2];
+ }
+ else{
+ sourceAlignString=tokens[2];
+ targetAlignString=tokens[3];
+ scoreString=tokens[4];
+ }
+
+ bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
+ if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
+ TRACE_ERR( filePath << ":" << line_num << ": pt entry contains empty target, skipping\n");
+ continue;
+ }
+
+ const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+ if (sourcePhraseString != prevSourcePhrase)
+ phraseVector = Phrase::Parse(sourcePhraseString, input, factorDelimiter);
+
+ vector<float> scoreVector = Tokenize<float>(scoreString);
+ if (scoreVector.size() != m_numScoreComponent)
+ {
+ stringstream strme;
+ strme << "Size of scoreVector != number (" <<scoreVector.size() << "!=" <<m_numScoreComponent<<") of score components on line " << line_num;
+ UserMessage::Add(strme.str());
+ abort();
+ }
+// assert(scoreVector.size() == m_numScoreComponent);
+
+ // source
+ Phrase sourcePhrase(Input);
+ sourcePhrase.CreateFromString( input, phraseVector);
+ //target
+ TargetPhrase targetPhrase(Output);
+ targetPhrase.SetSourcePhrase(&sourcePhrase);
+ targetPhrase.CreateFromString( output, targetPhraseString, factorDelimiter);
+
+
+
+ // component score, for n-best output
+ std::vector<float> scv(scoreVector.size());
+ std::transform(scoreVector.begin(),scoreVector.end(),scv.begin(),TransformScore);
+ std::transform(scv.begin(),scv.end(),scv.begin(),FloorScore);
+ targetPhrase.SetScore(m_feature, scv, weight, weightWP, languageModels);
+
+ AddEquivPhrase(sourcePhrase, targetPhrase);
+
+ count++;
+ }
+
+ // sort each target phrase collection
+ m_collection.Sort(m_tableLimit);
+
+ return true;
+}
+
+TargetPhraseCollection *PhraseDictionaryMemory::CreateTargetPhraseCollection(const Phrase &source)
+{
+ const size_t size = source.GetSize();
+
+ PhraseDictionaryNode *currNode = &m_collection;
+ for (size_t pos = 0 ; pos < size ; ++pos)
+ {
+ const Word& word = source.GetWord(pos);
+ currNode = currNode->GetOrCreateChild(word);
+ if (currNode == NULL)
+ return NULL;
+ }
+
+ return currNode->CreateTargetPhraseCollection();
+}
+
+void PhraseDictionaryMemory::AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
+{
+ TargetPhraseCollection &phraseColl = *CreateTargetPhraseCollection(source);
+ phraseColl.Add(new TargetPhrase(targetPhrase));
+}
+
+const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollection(const Phrase &source) const
+{ // exactly like CreateTargetPhraseCollection, but don't create
+ const size_t size = source.GetSize();
+
+ const PhraseDictionaryNode *currNode = &m_collection;
+ for (size_t pos = 0 ; pos < size ; ++pos)
+ {
+ const Word& word = source.GetWord(pos);
+ currNode = currNode->GetChild(word);
+ if (currNode == NULL)
+ return NULL;
+ }
+
+ return currNode->GetTargetPhraseCollection();
+}
+
+PhraseDictionaryMemory::~PhraseDictionaryMemory()
+{
+}
+
+void PhraseDictionaryMemory::SetWeightTransModel(const vector<float> &weightT)
+{
+ PhraseDictionaryNode::iterator iterDict;
+ for (iterDict = m_collection.begin() ; iterDict != m_collection.end() ; ++iterDict)
+ {
+ PhraseDictionaryNode &phraseDictionaryNode = iterDict->second;
+ // recursively set weights in nodes
+ phraseDictionaryNode.SetWeightTransModel(this, weightT);
+ }
+}
+
+TO_STRING_BODY(PhraseDictionaryMemory);
+
+// friend
+ostream& operator<<(ostream& out, const PhraseDictionaryMemory& phraseDict)
+{
+ const PhraseDictionaryNode &coll = phraseDict.m_collection;
+ PhraseDictionaryNode::const_iterator iter;
+ for (iter = coll.begin() ; iter != coll.end() ; ++iter)
+ {
+ const Word &word = (*iter).first;
+ out << word;
+ }
+ return out;
+}
+
+
+}
+
diff --git a/moses/src/PhraseDictionaryMemory.h b/moses/src/PhraseDictionaryMemory.h
new file mode 100644
index 000000000..6943cda0f
--- /dev/null
+++ b/moses/src/PhraseDictionaryMemory.h
@@ -0,0 +1,72 @@
+// $Id: PhraseDictionaryMemory.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_PhraseDictionaryMemory_h
+#define moses_PhraseDictionaryMemory_h
+
+#include "PhraseDictionary.h"
+#include "PhraseDictionaryNode.h"
+
+namespace Moses
+{
+
+/*** Implementation of a phrase table in a trie. Looking up a phrase of
+ * length n words requires n look-ups to find the TargetPhraseCollection.
+ */
+class PhraseDictionaryMemory : public PhraseDictionary
+{
+ typedef PhraseDictionary MyBase;
+ friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryMemory&);
+
+protected:
+ PhraseDictionaryNode m_collection;
+
+ TargetPhraseCollection *CreateTargetPhraseCollection(const Phrase &source);
+
+public:
+ PhraseDictionaryMemory(size_t numScoreComponent, PhraseDictionaryFeature* feature)
+ : PhraseDictionary(numScoreComponent,feature) {}
+ virtual ~PhraseDictionaryMemory();
+
+ bool Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ , size_t tableLimit
+ , const LMList &languageModels
+ , float weightWP);
+
+ const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &source) const;
+
+ void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
+
+ // for mert
+ void SetWeightTransModel(const std::vector<float> &weightT);
+ virtual void InitializeForInput(InputType const&)
+ {/* Don't do anything source specific here as this object is shared between threads.*/}
+
+ TO_STRING();
+
+};
+
+}
+#endif
diff --git a/moses/src/PhraseDictionaryNode.cpp b/moses/src/PhraseDictionaryNode.cpp
new file mode 100644
index 000000000..2d3dcbc5a
--- /dev/null
+++ b/moses/src/PhraseDictionaryNode.cpp
@@ -0,0 +1,94 @@
+// $Id: PhraseDictionaryNode.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "PhraseDictionaryNode.h"
+#include "TargetPhrase.h"
+#include "PhraseDictionaryMemory.h"
+
+namespace Moses
+{
+PhraseDictionaryNode::~PhraseDictionaryNode()
+{
+ delete m_targetPhraseCollection;
+}
+
+void PhraseDictionaryNode::Sort(size_t tableLimit)
+{
+ // recusively sort
+ NodeMap::iterator iter;
+ for (iter = m_map.begin() ; iter != m_map.end() ; ++iter)
+ {
+ iter->second.Sort(tableLimit);
+ }
+
+ // sort TargetPhraseCollection in this node
+ if (m_targetPhraseCollection != NULL)
+ m_targetPhraseCollection->NthElement(tableLimit);
+}
+
+PhraseDictionaryNode *PhraseDictionaryNode::GetOrCreateChild(const Word &word)
+{
+ NodeMap::iterator iter = m_map.find(word);
+ if (iter != m_map.end())
+ return &iter->second; // found it
+
+ // can't find node. create a new 1
+ return &(m_map[word] = PhraseDictionaryNode());
+}
+
+const PhraseDictionaryNode *PhraseDictionaryNode::GetChild(const Word &word) const
+{
+ NodeMap::const_iterator iter = m_map.find(word);
+ if (iter != m_map.end())
+ return &iter->second; // found it
+
+ // don't return anything
+ return NULL;
+}
+
+void PhraseDictionaryNode::SetWeightTransModel(
+ const PhraseDictionaryMemory *phraseDictionary,
+ const std::vector<float> &weightT)
+{
+ // recursively set weights
+ NodeMap::iterator iterNodeMap;
+ for (iterNodeMap = m_map.begin() ; iterNodeMap != m_map.end() ; ++iterNodeMap)
+ {
+ iterNodeMap->second.SetWeightTransModel(phraseDictionary, weightT);
+ }
+
+ // set wieghts for this target phrase
+ if (m_targetPhraseCollection == NULL)
+ return;
+
+ TargetPhraseCollection::iterator iterTargetPhrase;
+ for (iterTargetPhrase = m_targetPhraseCollection->begin();
+ iterTargetPhrase != m_targetPhraseCollection->end();
+ ++iterTargetPhrase)
+ {
+ TargetPhrase &targetPhrase = **iterTargetPhrase;
+ targetPhrase.SetWeights(phraseDictionary->GetFeature(), weightT);
+ }
+
+}
+
+}
+
diff --git a/moses/src/PhraseDictionaryNode.h b/moses/src/PhraseDictionaryNode.h
new file mode 100644
index 000000000..d58124416
--- /dev/null
+++ b/moses/src/PhraseDictionaryNode.h
@@ -0,0 +1,85 @@
+// $Id: PhraseDictionaryNode.h 2939 2010-02-24 11:15:44Z jfouet $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_PhraseDictionaryNode_h
+#define moses_PhraseDictionaryNode_h
+
+#include <map>
+#include <vector>
+#include <iterator>
+#include "Word.h"
+#include "TargetPhraseCollection.h"
+
+namespace Moses
+{
+
+class PhraseDictionaryMemory;
+class PhraseDictionaryFeature;
+
+/** One node of the PhraseDictionaryMemory structure
+*/
+class PhraseDictionaryNode
+{
+ typedef std::map<Word, PhraseDictionaryNode> NodeMap;
+
+ // only these classes are allowed to instantiate this class
+ friend class PhraseDictionaryMemory;
+ friend class std::map<Word, PhraseDictionaryNode>;
+
+protected:
+ NodeMap m_map;
+ TargetPhraseCollection *m_targetPhraseCollection;
+
+ PhraseDictionaryNode()
+ :m_targetPhraseCollection(NULL)
+ {}
+public:
+ ~PhraseDictionaryNode();
+
+ void Sort(size_t tableLimit);
+ PhraseDictionaryNode *GetOrCreateChild(const Word &word);
+ const PhraseDictionaryNode *GetChild(const Word &word) const;
+ const TargetPhraseCollection *GetTargetPhraseCollection() const
+ {
+ return m_targetPhraseCollection;
+ }
+ TargetPhraseCollection *CreateTargetPhraseCollection()
+ {
+ if (m_targetPhraseCollection == NULL)
+ m_targetPhraseCollection = new TargetPhraseCollection();
+ return m_targetPhraseCollection;
+ }
+ // for mert
+ void SetWeightTransModel(const PhraseDictionaryMemory *phraseDictionary
+ , const std::vector<float> &weightT);
+
+ // iterators
+ typedef NodeMap::iterator iterator;
+ typedef NodeMap::const_iterator const_iterator;
+ const_iterator begin() const { return m_map.begin(); }
+ const_iterator end() const { return m_map.end(); }
+ iterator begin() { return m_map.begin(); }
+ iterator end() { return m_map.end(); }
+};
+
+}
+#endif
diff --git a/moses/src/PhraseDictionaryTree.cpp b/moses/src/PhraseDictionaryTree.cpp
new file mode 100644
index 000000000..f3c4b5307
--- /dev/null
+++ b/moses/src/PhraseDictionaryTree.cpp
@@ -0,0 +1,715 @@
+// $Id: PhraseDictionaryTree.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+// vim:tabstop=2
+#include "PhraseDictionaryTree.h"
+#include <map>
+#include <cassert>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <string>
+
+namespace Moses
+{
+
+template<typename T>
+std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
+{
+ out<<x.size()<<" ";
+ typename std::vector<T>::const_iterator iend=x.end();
+ for(typename std::vector<T>::const_iterator i=x.begin();i!=iend;++i)
+ out<<*i<<' ';
+ return out;
+}
+
+
+class TgtCand {
+ IPhrase e;
+ Scores sc;
+ WordAlignments m_sourceAlignment, m_targetAlignment;
+public:
+ TgtCand() {}
+
+ TgtCand(const IPhrase& a, const Scores& b
+ , const WordAlignments &sourceAlignment, const WordAlignments &targetAlignment)
+ : e(a)
+ , sc(b)
+ , m_sourceAlignment(sourceAlignment)
+ , m_targetAlignment(targetAlignment)
+ {}
+
+ TgtCand(const IPhrase& a,const Scores& b) : e(a),sc(b) {}
+
+ TgtCand(FILE* f) {readBin(f);}
+
+
+ void writeBin(FILE* f) const
+ {
+ fWriteVector(f,e);
+ fWriteVector(f,sc);
+ }
+
+ void readBin(FILE* f)
+ {
+ fReadVector(f,e);
+ fReadVector(f,sc);
+ }
+
+ void writeBinWithAlignment(FILE* f) const
+ {
+ fWriteVector(f,e);
+ fWriteVector(f,sc);
+ fWriteStringVector(f, m_sourceAlignment);
+ fWriteStringVector(f, m_targetAlignment);
+ }
+
+ void readBinWithAlignment(FILE* f)
+ {
+ fReadVector(f,e);
+ fReadVector(f,sc);
+ fReadStringVector(f, m_sourceAlignment);
+ fReadStringVector(f, m_targetAlignment);
+ }
+
+ const IPhrase& GetPhrase() const {return e;}
+ const Scores& GetScores() const {return sc;}
+ const WordAlignments& GetSourceAlignment() const {return m_sourceAlignment;}
+ const WordAlignments& GetTargetAlignment() const {return m_targetAlignment;}
+};
+
+
+class TgtCands : public std::vector<TgtCand> {
+ typedef std::vector<TgtCand> MyBase;
+public:
+ TgtCands() : MyBase() {}
+
+ void writeBin(FILE* f) const
+ {
+ unsigned s=size();
+ fWrite(f,s);
+ for(size_t i=0;i<s;++i) MyBase::operator[](i).writeBin(f);
+ }
+
+ void writeBinWithAlignment(FILE* f) const
+ {
+ unsigned s=size();
+ fWrite(f,s);
+ for(size_t i=0;i<s;++i) MyBase::operator[](i).writeBinWithAlignment(f);
+ }
+
+ void readBin(FILE* f)
+ {
+ unsigned s;fRead(f,s);resize(s);
+ for(size_t i=0;i<s;++i) MyBase::operator[](i).readBin(f);
+ }
+
+ void readBinWithAlignment(FILE* f)
+ {
+ unsigned s;fRead(f,s);resize(s);
+ for(size_t i=0;i<s;++i) MyBase::operator[](i).readBinWithAlignment(f);
+ }
+};
+
+
+PhraseDictionaryTree::PrefixPtr::operator bool() const
+{
+ return imp && imp->isValid();
+}
+
+typedef LVoc<std::string> WordVoc;
+
+static WordVoc* ReadVoc(const std::string& filename) {
+ static std::map<std::string,WordVoc*> vocs;
+#ifdef HAVE_THREADS
+ boost::mutex mutex;
+ boost::mutex::scoped_lock lock(mutex);
+#endif
+ std::map<std::string,WordVoc*>::iterator vi = vocs.find(filename);
+ if (vi == vocs.end()) {
+ WordVoc* voc = new WordVoc();
+ voc->Read(filename);
+ vocs[filename] = voc;
+ }
+ return vocs[filename];
+}
+
+
+struct PDTimp {
+ typedef PrefixTreeF<LabelId,OFF_T> PTF;
+ typedef FilePtr<PTF> CPT;
+ typedef std::vector<CPT> Data;
+
+
+ Data data;
+ std::vector<OFF_T> srcOffsets;
+
+ FILE *os,*ot;
+ WordVoc* sv;
+ WordVoc* tv;
+
+ ObjectPool<PPimp> pPool;
+ // a comparison with the Boost MemPools might be useful
+
+ bool usewordalign;
+ bool printwordalign;
+
+ PDTimp() : os(0),ot(0), usewordalign(false), printwordalign(false) {PTF::setDefault(InvalidOffT);}
+ ~PDTimp() {if(os) fClose(os);if(ot) fClose(ot);FreeMemory();}
+
+ inline void UseWordAlignment(bool a){ usewordalign=a; }
+ inline bool UseWordAlignment(){ return usewordalign; };
+
+ inline void PrintWordAlignment(bool a){ printwordalign=a; };
+ inline bool PrintWordAlignment(){ return printwordalign; };
+
+ void FreeMemory()
+ {
+ for(Data::iterator i=data.begin();i!=data.end();++i) (*i).free();
+ pPool.reset();
+ }
+
+ int Read(const std::string& fn);
+
+ void GetTargetCandidates(const IPhrase& f,TgtCands& tgtCands)
+ {
+ if(f.empty()) return;
+ if(f[0]>=data.size()) return;
+ if(!data[f[0]]) return;
+ assert(data[f[0]]->findKey(f[0])<data[f[0]]->size());
+ OFF_T tCandOffset=data[f[0]]->find(f);
+ if(tCandOffset==InvalidOffT) return;
+ fSeek(ot,tCandOffset);
+
+ if (UseWordAlignment()) tgtCands.readBinWithAlignment(ot);
+ else tgtCands.readBin(ot);
+ }
+
+ typedef PhraseDictionaryTree::PrefixPtr PPtr;
+
+ void GetTargetCandidates(PPtr p,TgtCands& tgtCands)
+ {
+ assert(p);
+ if(p.imp->isRoot()) return;
+ OFF_T tCandOffset=p.imp->ptr()->getData(p.imp->idx);
+ if(tCandOffset==InvalidOffT) return;
+ fSeek(ot,tCandOffset);
+ if (UseWordAlignment()) tgtCands.readBinWithAlignment(ot);
+ else tgtCands.readBin(ot);
+ }
+
+ void PrintTgtCand(const TgtCands& tcands,std::ostream& out) const;
+
+ // convert target candidates from internal data structure to the external one
+ void ConvertTgtCand(const TgtCands& tcands,std::vector<StringTgtCand>& rv) const
+ {
+ for(TgtCands::const_iterator i=tcands.begin();i!=tcands.end();++i)
+ {
+ const IPhrase& iphrase=i->GetPhrase();
+ std::vector<std::string const*> vs;
+ vs.reserve(iphrase.size());
+ for(size_t j=0;j<iphrase.size();++j)
+ vs.push_back(&tv->symbol(iphrase[j]));
+ rv.push_back(StringTgtCand(vs,i->GetScores()));
+ }
+ }
+
+ // convert target candidates from internal data structure to the external one
+ void ConvertTgtCand(const TgtCands& tcands,std::vector<StringTgtCand>& rv,
+ std::vector<StringWordAlignmentCand>& swa,
+ std::vector<StringWordAlignmentCand>& twa) const
+ {
+ for(TgtCands::const_iterator i=tcands.begin();i!=tcands.end();++i)
+ {
+ const IPhrase& iphrase=i->GetPhrase();
+
+ std::vector<std::string const*> vs;
+ vs.reserve(iphrase.size());
+ for(size_t j=0;j<iphrase.size();++j)
+ vs.push_back(&tv->symbol(iphrase[j]));
+ rv.push_back(StringTgtCand(vs,i->GetScores()));
+ swa.push_back(StringWordAlignmentCand(vs,(i->GetSourceAlignment())));
+ twa.push_back(StringWordAlignmentCand(vs,(i->GetTargetAlignment())));
+ }
+ }
+
+ PPtr GetRoot()
+ {
+ return PPtr(pPool.get(PPimp(0,0,1)));
+ }
+
+ PPtr Extend(PPtr p,const std::string& w)
+ {
+ assert(p);
+ if(w.empty() || w==EPSILON) return p;
+
+ LabelId wi=sv->index(w);
+
+ if(wi==InvalidLabelId) return PPtr(); // unknown word
+ else if(p.imp->isRoot())
+ {
+ if(wi<data.size() && data[wi])
+ {
+ assert(data[wi]->findKeyPtr(wi));
+ return PPtr(pPool.get(PPimp(data[wi],data[wi]->findKey(wi),0)));
+ }
+ }
+ else if(PTF const* nextP=p.imp->ptr()->getPtr(p.imp->idx))
+ {
+ return PPtr(pPool.get(PPimp(nextP,nextP->findKey(wi),0)));
+ }
+
+ return PPtr();
+ }
+};
+
+
+////////////////////////////////////////////////////////////
+//
+// member functions of PDTimp
+//
+////////////////////////////////////////////////////////////
+
+int PDTimp::Read(const std::string& fn)
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ std::string ifs, ift, ifi, ifsv, iftv;
+
+ if (staticData.UseAlignmentInfo()){//asking for word-to-word alignment
+ if (!FileExists(fn+".binphr.srctree.wa") || !FileExists(fn+".binphr.tgtdata.wa")){
+ // ERROR
+ std::stringstream strme;
+ strme << "You are asking for word alignment but the binary phrase table does not contain any alignment info. Please check if you had generated the correct phrase table with word alignment (.wa)\n";
+ UserMessage::Add(strme.str());
+ return false;
+ }
+ ifs=fn+".binphr.srctree.wa";
+ ift=fn+".binphr.tgtdata.wa";
+ ifi=fn+".binphr.idx";
+ ifsv=fn+".binphr.srcvoc";
+ iftv=fn+".binphr.tgtvoc";
+ UseWordAlignment(true);
+ }
+ else{
+ if (!FileExists(fn+".binphr.srctree") || !FileExists(fn+".binphr.tgtdata")){
+ // ERROR
+ std::stringstream strme;
+ strme << "You are asking binary phrase table without word alignments but the file do not exist. Please check if you had generated the correct phrase table without word alignment (" << (fn+".binphr.srctree") << "," << (fn+".binphr.tgtdata")<< ")\n";
+ UserMessage::Add(strme.str());
+ return false;
+ }
+
+ ifs=fn+".binphr.srctree";
+ ift=fn+".binphr.tgtdata";
+ ifi=fn+".binphr.idx";
+ ifsv=fn+".binphr.srcvoc";
+ iftv=fn+".binphr.tgtvoc";
+
+ UseWordAlignment(false);
+ }
+
+ FILE *ii=fOpen(ifi.c_str(),"rb");
+ fReadVector(ii,srcOffsets);
+ fClose(ii);
+
+ os=fOpen(ifs.c_str(),"rb");
+ ot=fOpen(ift.c_str(),"rb");
+
+ data.resize(srcOffsets.size());
+ for(size_t i=0;i<data.size();++i)
+ data[i]=CPT(os,srcOffsets[i]);
+
+ sv = ReadVoc(ifsv);
+ tv = ReadVoc(iftv);
+ //sv.Read(ifsv);
+ //tv.Read(iftv);
+
+ TRACE_ERR("binary phrasefile loaded, default OFF_T: "<<PTF::getDefault()
+ <<"\n");
+ return 1;
+}
+
+void PDTimp::PrintTgtCand(const TgtCands& tcand,std::ostream& out) const
+{
+ for(size_t i=0;i<tcand.size();++i)
+ {
+
+ Scores sc=tcand[i].GetScores();
+ WordAlignments srcAlign=tcand[i].GetSourceAlignment();
+ WordAlignments trgAlign=tcand[i].GetTargetAlignment();
+
+ const IPhrase& iphr=tcand[i].GetPhrase();
+
+ out << i << " -- " << sc << " -- ";
+ for(size_t j=0;j<iphr.size();++j) out << tv->symbol(iphr[j])<<" ";
+ out<< " -- ";
+ for (size_t j=0;j<srcAlign.size();j++) out << " " << srcAlign[j];
+ out << " -- ";
+ for (size_t j=0;j<trgAlign.size();j++) out << " " << trgAlign[j];
+ out << std::endl;
+ }
+}
+
+////////////////////////////////////////////////////////////
+//
+// member functions of PhraseDictionaryTree
+//
+////////////////////////////////////////////////////////////
+
+PhraseDictionaryTree::PhraseDictionaryTree(size_t numScoreComponent)
+ : Dictionary(numScoreComponent),imp(new PDTimp)
+{
+ if(sizeof(OFF_T)!=8)
+ {
+ TRACE_ERR("ERROR: size of type 'OFF_T' has to be 64 bit!\n"
+ "In gcc, use compiler settings '-D_FILE_OFFSET_BITS=64 -D_LARGE_FILES'\n"
+ " -> abort \n\n");
+ abort();
+ }
+}
+
+PhraseDictionaryTree::~PhraseDictionaryTree()
+{
+ delete imp;
+}
+
+void PhraseDictionaryTree::UseWordAlignment(bool a){ imp->UseWordAlignment(a); };
+bool PhraseDictionaryTree::UseWordAlignment(){ return imp->UseWordAlignment(); };
+
+void PhraseDictionaryTree::PrintWordAlignment(bool a){ imp->PrintWordAlignment(a); };
+bool PhraseDictionaryTree::PrintWordAlignment(){ return imp->PrintWordAlignment(); };
+
+void PhraseDictionaryTree::FreeMemory() const
+{
+ imp->FreeMemory();
+}
+
+void PhraseDictionaryTree::
+GetTargetCandidates(const std::vector<std::string>& src,
+ std::vector<StringTgtCand>& rv) const
+{
+ IPhrase f(src.size());
+ for(size_t i=0;i<src.size();++i)
+ {
+ f[i]=imp->sv->index(src[i]);
+ if(f[i]==InvalidLabelId) return;
+ }
+
+ TgtCands tgtCands;
+ imp->GetTargetCandidates(f,tgtCands);
+ imp->ConvertTgtCand(tgtCands,rv);
+}
+
+void PhraseDictionaryTree::
+GetTargetCandidates(const std::vector<std::string>& src,
+ std::vector<StringTgtCand>& rv,
+ std::vector<StringWordAlignmentCand>& swa,
+ std::vector<StringWordAlignmentCand>& twa) const
+{
+ IPhrase f(src.size());
+ for(size_t i=0;i<src.size();++i)
+ {
+ f[i]=imp->sv->index(src[i]);
+ if(f[i]==InvalidLabelId) return;
+ }
+
+ TgtCands tgtCands;
+ imp->GetTargetCandidates(f,tgtCands);
+ imp->ConvertTgtCand(tgtCands,rv,swa,twa);
+}
+
+
+void PhraseDictionaryTree::
+PrintTargetCandidates(const std::vector<std::string>& src,
+ std::ostream& out) const
+{
+ IPhrase f(src.size());
+ for(size_t i=0;i<src.size();++i)
+ {
+ f[i]=imp->sv->index(src[i]);
+ if(f[i]==InvalidLabelId)
+ {
+ TRACE_ERR("the source phrase '"<<src<<"' contains an unknown word '"
+ <<src[i]<<"'\n");
+ return;
+ }
+ }
+
+ TgtCands tcand;
+ imp->GetTargetCandidates(f,tcand);
+ imp->PrintTgtCand(tcand,out);
+}
+
+int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
+{
+ std::string line;
+ size_t count = 0;
+
+ std::string ofn(out+".binphr.srctree"),
+ oft(out+".binphr.tgtdata"),
+ ofi(out+".binphr.idx"),
+ ofsv(out+".binphr.srcvoc"),
+ oftv(out+".binphr.tgtvoc");
+
+ if (PrintWordAlignment()){
+ ofn+=".wa";
+ oft+=".wa";
+ }
+
+ FILE *os=fOpen(ofn.c_str(),"wb"),
+ *ot=fOpen(oft.c_str(),"wb");
+
+ typedef PrefixTreeSA<LabelId,OFF_T> PSA;
+ PSA *psa=new PSA;PSA::setDefault(InvalidOffT);
+
+ LabelId currFirstWord=InvalidLabelId;
+ IPhrase currF;
+ TgtCands tgtCands;
+ std::vector<OFF_T> vo;
+ size_t lnc=0;
+ size_t numElement = NOT_FOUND; // 3=old format, 5=async format which include word alignment info
+ imp->sv = new WordVoc();
+ imp->tv = new WordVoc();
+
+ while(getline(inFile, line))
+ {
+ ++lnc;
+
+ std::vector<std::string> tokens = TokenizeMultiCharSeparator( line , "|||" );
+
+ if (numElement == NOT_FOUND)
+ { // init numElement
+ numElement = tokens.size();
+ assert(numElement == 3 || numElement == 5);
+ }
+
+ if (tokens.size() != numElement)
+ {
+ std::stringstream strme;
+ strme << "Syntax error at line " << lnc << " : " << line;
+ UserMessage::Add(strme.str());
+ abort();
+ }
+
+ std::string sourcePhraseString, targetPhraseString;
+ std::string scoreString;
+ std::string sourceAlignString, targetAlignString;
+
+ sourcePhraseString=tokens[0];
+ targetPhraseString=tokens[1];
+ if (numElement==3){
+ scoreString=tokens[2];
+ }
+ else{
+ sourceAlignString=tokens[2];
+ targetAlignString=tokens[3];
+ scoreString=tokens[4];
+ }
+
+
+ IPhrase f,e;
+ Scores sc;
+ WordAlignments sourceAlignment, targetAlignment;
+
+ std::vector<std::string> wordVec = Tokenize(sourcePhraseString);
+ for (size_t i = 0 ; i < wordVec.size() ; ++i)
+ f.push_back(imp->sv->add(wordVec[i]));
+
+ wordVec = Tokenize(targetPhraseString);
+ for (size_t i = 0 ; i < wordVec.size() ; ++i)
+ e.push_back(imp->tv->add(wordVec[i]));
+
+
+
+ //change "()" into "(-1)" for both source and target word-to-word alignments
+ std::string emtpyAlignStr="()";
+ std::string replaceAlignStr="(-1)";
+ sourceAlignString=Replace(sourceAlignString,emtpyAlignStr,replaceAlignStr);
+ targetAlignString=Replace(targetAlignString,emtpyAlignStr,replaceAlignStr);
+
+ //remove all "(" from both source and target word-to-word alignments
+ emtpyAlignStr="(";
+ replaceAlignStr="";
+ sourceAlignString=Replace(sourceAlignString,emtpyAlignStr,replaceAlignStr);
+ targetAlignString=Replace(targetAlignString,emtpyAlignStr,replaceAlignStr);
+
+ //remove all ")" from both source and target word-to-word alignments
+ emtpyAlignStr=")";
+ replaceAlignStr="";
+ sourceAlignString=Replace(sourceAlignString,emtpyAlignStr,replaceAlignStr);
+ targetAlignString=Replace(targetAlignString,emtpyAlignStr,replaceAlignStr);
+
+ sourceAlignment = Tokenize(sourceAlignString);
+ targetAlignment = Tokenize(targetAlignString);
+
+ // while(is>>w && w!="|||") sc.push_back(atof(w.c_str()));
+ // Mauro: to handle 0 probs in phrase tables
+ std::vector<float> scoreVector = Tokenize<float>(scoreString);
+ for (size_t i = 0 ; i < scoreVector.size() ; ++i)
+ {
+ float tmp = scoreVector[i];
+ sc.push_back(((tmp>0.0)?tmp:(float)1.0e-38));
+ }
+
+
+ if(f.empty())
+ {
+ TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n");
+ continue;
+ }
+
+ if(currFirstWord==InvalidLabelId) currFirstWord=f[0];
+ if(currF.empty())
+ {
+ ++count;
+ currF=f;
+ // insert src phrase in prefix tree
+ assert(psa);
+ PSA::Data& d=psa->insert(f);
+ if(d==InvalidOffT) d=fTell(ot);
+ else
+ {
+ TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '"
+ <<line<<"'\nf: "<<f<<"\n");
+ abort();
+ }
+ }
+
+ if(currF!=f)
+ {
+ // new src phrase
+ currF=f;
+ if (PrintWordAlignment())
+ tgtCands.writeBinWithAlignment(ot);
+ else
+ tgtCands.writeBin(ot);
+ tgtCands.clear();
+
+ if(++count%10000==0)
+ {
+ TRACE_ERR(".");
+ if(count%500000==0) TRACE_ERR("[phrase:"<<count<<"]\n");
+ }
+
+ if(f[0]!=currFirstWord)
+ {
+ // write src prefix tree to file and clear
+ PTF pf;
+ if(currFirstWord>=vo.size())
+ vo.resize(currFirstWord+1,InvalidOffT);
+ vo[currFirstWord]=fTell(os);
+ pf.create(*psa,os);
+ // clear
+ delete psa;psa=new PSA;
+ currFirstWord=f[0];
+ }
+
+ // insert src phrase in prefix tree
+ assert(psa);
+ PSA::Data& d=psa->insert(f);
+ if(d==InvalidOffT) d=fTell(ot);
+ else
+ {
+ TRACE_ERR("ERROR: xsource phrase already inserted (B)!\nline(" << lnc << "): '"
+ <<line<<"'\nf: "<<f<<"\n");
+ abort();
+ }
+ }
+ tgtCands.push_back(TgtCand(e,sc, sourceAlignment, targetAlignment));
+ assert(currFirstWord!=InvalidLabelId);
+ }
+ if (PrintWordAlignment())
+ tgtCands.writeBinWithAlignment(ot);
+ else
+ tgtCands.writeBin(ot);
+ tgtCands.clear();
+
+ PTF pf;
+ if(currFirstWord>=vo.size()) vo.resize(currFirstWord+1,InvalidOffT);
+ vo[currFirstWord]=fTell(os);
+ pf.create(*psa,os);
+ delete psa;psa=0;
+
+ TRACE_ERR("distinct source phrases: "<<count
+ <<" distinct first words of source phrases: "<<vo.size()
+ <<" number of phrase pairs (line count): "<<lnc
+ <<"\n");
+
+ fClose(os);
+ fClose(ot);
+
+ std::vector<size_t> inv;
+ for(size_t i=0;i<vo.size();++i)
+ if(vo[i]==InvalidOffT) inv.push_back(i);
+
+ if(inv.size())
+ {
+ TRACE_ERR("WARNING: there are src voc entries with no phrase "
+ "translation: count "<<inv.size()<<"\n"
+ "There exists phrase translations for "<<vo.size()-inv.size()
+ <<" entries\n");
+ }
+
+ FILE *oi=fOpen(ofi.c_str(),"wb");
+ fWriteVector(oi,vo);
+ fClose(oi);
+
+ imp->sv->Write(ofsv);
+ imp->tv->Write(oftv);
+
+ return 1;
+}
+
+
+int PhraseDictionaryTree::Read(const std::string& fn)
+{
+ TRACE_ERR("size of OFF_T "<<sizeof(OFF_T)<<"\n");
+ return imp->Read(fn);
+}
+
+
+PhraseDictionaryTree::PrefixPtr PhraseDictionaryTree::GetRoot() const
+{
+ return imp->GetRoot();
+}
+
+PhraseDictionaryTree::PrefixPtr
+PhraseDictionaryTree::Extend(PrefixPtr p, const std::string& w) const
+{
+ return imp->Extend(p,w);
+}
+
+void PhraseDictionaryTree::PrintTargetCandidates(PrefixPtr p,std::ostream& out) const
+{
+
+ TgtCands tcand;
+ imp->GetTargetCandidates(p,tcand);
+ out<<"there are "<<tcand.size()<<" target candidates\n";
+ imp->PrintTgtCand(tcand,out);
+}
+
+void PhraseDictionaryTree::
+GetTargetCandidates(PrefixPtr p,
+ std::vector<StringTgtCand>& rv) const
+{
+ TgtCands tcands;
+ imp->GetTargetCandidates(p,tcands);
+ imp->ConvertTgtCand(tcands,rv);
+}
+
+void PhraseDictionaryTree::
+GetTargetCandidates(PrefixPtr p,
+ std::vector<StringTgtCand>& rv,
+ std::vector<StringWordAlignmentCand>& swa,
+ std::vector<StringWordAlignmentCand>& twa) const
+{
+ TgtCands tcands;
+ imp->GetTargetCandidates(p,tcands);
+ imp->ConvertTgtCand(tcands,rv,swa,twa);
+}
+
+std::string PhraseDictionaryTree::GetScoreProducerDescription() const{
+ return "PhraseDictionaryTree";
+}
+
+}
+
diff --git a/moses/src/PhraseDictionaryTree.h b/moses/src/PhraseDictionaryTree.h
new file mode 100644
index 000000000..9da4a30f8
--- /dev/null
+++ b/moses/src/PhraseDictionaryTree.h
@@ -0,0 +1,134 @@
+// $Id: PhraseDictionaryTree.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_PhraseDictionaryTree_h
+#define moses_PhraseDictionaryTree_h
+
+#include <string>
+#include <vector>
+#include <iostream>
+
+#ifdef WITH_THREADS
+#include <boost/thread/mutex.hpp>
+#endif
+
+#include "TypeDef.h"
+#include "Dictionary.h"
+
+
+#include "PrefixTree.h"
+#include "File.h"
+#include "ObjectPool.h"
+#include "LVoc.h"
+#include "TypeDef.h"
+#include "Util.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+
+class Phrase;
+class Word;
+class ConfusionNet;
+
+
+typedef PrefixTreeF<LabelId,OFF_T> PTF;
+
+class PDTimp;
+class PPimp;
+
+class PhraseDictionaryTree : public Dictionary {
+ PDTimp *imp; //implementation
+
+ PhraseDictionaryTree(); // not implemented
+ PhraseDictionaryTree(const PhraseDictionaryTree&); //not implemented
+ void operator=(const PhraseDictionaryTree&); //not implemented
+public:
+ PhraseDictionaryTree(size_t numScoreComponent);
+
+ void UseWordAlignment(bool a);
+ bool UseWordAlignment();
+
+ void PrintWordAlignment(bool a);
+ bool PrintWordAlignment();
+
+
+ virtual ~PhraseDictionaryTree();
+
+ DecodeType GetDecodeType() const {return Translate;}
+ size_t GetSize() const {return 0;}
+
+ // convert from ascii phrase table format
+ // note: only creates table, does not keep it in memory
+ // -> use Read(outFileNamePrefix);
+ int Create(std::istream& in,const std::string& outFileNamePrefix);
+
+ int Read(const std::string& fileNamePrefix);
+
+ // free memory used by the prefix tree etc.
+ void FreeMemory() const;
+
+
+ /**************************************
+ * access with full source phrase *
+ **************************************/
+ // print target candidates for a given phrase, mainly for debugging
+ void PrintTargetCandidates(const std::vector<std::string>& src,
+ std::ostream& out) const;
+
+ // get the target candidates for a given phrase
+ void GetTargetCandidates(const std::vector<std::string>& src,
+ std::vector<StringTgtCand>& rv) const;
+
+ // get the target candidates for a given phrase
+ void GetTargetCandidates(const std::vector<std::string>& src,
+ std::vector<StringTgtCand>& rv,
+ std::vector<StringWordAlignmentCand>& swa,
+ std::vector<StringWordAlignmentCand>& twa) const;
+
+ /*****************************
+ * access to prefix tree *
+ *****************************/
+
+ // 'pointer' into prefix tree
+ // the only permitted direct operation is a check for NULL,
+ // e.g. PrefixPtr p; if(p) ...
+ // other usage only through PhraseDictionaryTree-functions below
+
+ class PrefixPtr {
+ PPimp* imp;
+ friend class PDTimp;
+ public:
+ PrefixPtr(PPimp* x=0) : imp(x) {}
+ operator bool() const;
+ };
+
+ // return pointer to root node
+ PrefixPtr GetRoot() const;
+ // extend pointer with a word/Factorstring and return the resulting successor
+ // pointer. If there is no such successor node, the result will evaluate to
+ // false. Requirement: the input pointer p evaluates to true.
+ PrefixPtr Extend(PrefixPtr p,const std::string& s) const;
+
+ // get the target candidates for a given prefix pointer
+ // requirement: the pointer has to evaluate to true
+ void GetTargetCandidates(PrefixPtr p,
+ std::vector<StringTgtCand>& rv) const;
+ void GetTargetCandidates(PrefixPtr p,
+ std::vector<StringTgtCand>& rv,
+ std::vector<StringWordAlignmentCand>& swa,
+ std::vector<StringWordAlignmentCand>& twa) const;
+
+ // print target candidates for a given prefix pointer to a stream, mainly
+ // for debugging
+ void PrintTargetCandidates(PrefixPtr p,std::ostream& out) const;
+ std::string GetScoreProducerDescription() const;
+ std::string GetScoreProducerWeightShortName() const
+ {
+ return "tm";
+ }
+};
+
+
+}
+
+#endif
diff --git a/moses/src/PhraseDictionaryTreeAdaptor.cpp b/moses/src/PhraseDictionaryTreeAdaptor.cpp
new file mode 100644
index 000000000..b6809fc01
--- /dev/null
+++ b/moses/src/PhraseDictionaryTreeAdaptor.cpp
@@ -0,0 +1,126 @@
+// $Id: PhraseDictionaryTreeAdaptor.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+#include "PhraseDictionaryTreeAdaptor.h"
+#include <sys/stat.h>
+#include <algorithm>
+#include "PhraseDictionaryTree.h"
+#include "Phrase.h"
+#include "FactorCollection.h"
+#include "InputFileStream.h"
+#include "InputType.h"
+#include "ConfusionNet.h"
+#include "Sentence.h"
+#include "StaticData.h"
+#include "UniqueObject.h"
+#include "PDTAimp.h"
+#include "UserMessage.h"
+
+namespace Moses
+{
+/*************************************************************
+ function definitions of the interface class
+ virtually everything is forwarded to the implementation class
+*************************************************************/
+
+PhraseDictionaryTreeAdaptor::
+PhraseDictionaryTreeAdaptor(size_t numScoreComponent, unsigned numInputScores, const PhraseDictionaryFeature* feature)
+ : PhraseDictionary(numScoreComponent,feature), imp(new PDTAimp(this,numInputScores)) {
+}
+
+PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
+{
+ imp->CleanUp();
+ delete imp;
+}
+
+
+bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ , size_t tableLimit
+ , const LMList &languageModels
+ , float weightWP)
+{
+ if(m_numScoreComponent!=weight.size()) {
+ stringstream strme;
+ strme << "ERROR: mismatch of number of scaling factors: "<<weight.size()
+ <<" "<<m_numScoreComponent<<"\n";
+ UserMessage::Add(strme.str());
+ return false;
+ }
+
+ // set Dictionary members
+ m_inputFactors = FactorMask(input);
+ m_outputFactors = FactorMask(output);
+ VERBOSE(2,"PhraseDictionaryTreeAdaptor: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
+
+ // set PhraseDictionary members
+ m_tableLimit=tableLimit;
+
+ imp->Create(input,output,filePath,
+ weight,languageModels,weightWP);
+ return true;
+}
+
+void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source) {
+ imp->CleanUp();
+ // caching only required for confusion net
+ if(ConfusionNet const* cn=dynamic_cast<ConfusionNet const*>(&source))
+ imp->CacheSource(*cn);
+}
+
+TargetPhraseCollection const*
+PhraseDictionaryTreeAdaptor::GetTargetPhraseCollection(Phrase const &src) const
+{
+ return imp->GetTargetPhraseCollection(src);
+}
+
+TargetPhraseCollection const*
+PhraseDictionaryTreeAdaptor::GetTargetPhraseCollection(InputType const& src,WordsRange const &range) const
+{
+ if(imp->m_rangeCache.empty())
+ {
+ return imp->GetTargetPhraseCollection(src.GetSubString(range));
+ }
+ else
+ {
+ return imp->m_rangeCache[range.GetStartPos()][range.GetEndPos()];
+ }
+}
+
+void PhraseDictionaryTreeAdaptor::
+SetWeightTransModel(const std::vector<float> &weightT)
+{
+ CleanUp();
+ imp->m_weights=weightT;
+}
+
+void PhraseDictionaryTreeAdaptor::
+AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
+{
+ imp->AddEquivPhrase(source,targetPhrase);
+}
+void PhraseDictionaryTreeAdaptor::EnableCache()
+{
+ imp->useCache=1;
+}
+void PhraseDictionaryTreeAdaptor::DisableCache()
+{
+ imp->useCache=0;
+}
+
+
+
+size_t PhraseDictionaryTreeAdaptor::GetNumInputScores() const {
+ return imp->GetNumInputScores();
+}
+
+std::string PhraseDictionaryTreeAdaptor::GetScoreProducerDescription() const
+{
+ return "PhraseModel";
+}
+
+}
+
+
diff --git a/moses/src/PhraseDictionaryTreeAdaptor.h b/moses/src/PhraseDictionaryTreeAdaptor.h
new file mode 100644
index 000000000..1f3fabec3
--- /dev/null
+++ b/moses/src/PhraseDictionaryTreeAdaptor.h
@@ -0,0 +1,77 @@
+// $Id: PhraseDictionaryTreeAdaptor.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_PhraseDictionaryTreeAdaptor_h
+#define moses_PhraseDictionaryTreeAdaptor_h
+
+#include <vector>
+#include "TypeDef.h"
+#include "PhraseDictionaryMemory.h"
+#include "TargetPhraseCollection.h"
+
+namespace Moses
+{
+
+class Phrase;
+class PDTAimp;
+class WordsRange;
+class InputType;
+
+/*** Implementation of a phrase table in a trie that is binarized and
+ * stored on disk.
+ */
+class PhraseDictionaryTreeAdaptor : public PhraseDictionary {
+ typedef PhraseDictionary MyBase;
+ PDTAimp *imp;
+ friend class PDTAimp;
+ PhraseDictionaryTreeAdaptor();
+ PhraseDictionaryTreeAdaptor(const PhraseDictionaryTreeAdaptor&);
+ void operator=(const PhraseDictionaryTreeAdaptor&);
+
+ public:
+ PhraseDictionaryTreeAdaptor(size_t numScoreComponent, unsigned numInputScores, const PhraseDictionaryFeature* feature);
+ virtual ~PhraseDictionaryTreeAdaptor();
+
+ // enable/disable caching
+ // you enable caching if you request the target candidates for a source phrase multiple times
+ // if you do caching somewhere else, disable it
+ // good settings for current Moses: disable for first factor, enable for other factors
+ // default: enable
+
+ void EnableCache();
+ void DisableCache();
+
+ // initialize ...
+ bool Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ , size_t tableLimit
+ , const LMList &languageModels
+ , float weightWP);
+
+ // get translation candidates for a given source phrase
+ // returns null pointer if nothing found
+ TargetPhraseCollection const* GetTargetPhraseCollection(Phrase const &src) const;
+ TargetPhraseCollection const* GetTargetPhraseCollection(InputType const& src,WordsRange const & srcRange) const;
+
+
+
+ // change model scaling factors
+ void SetWeightTransModel(const std::vector<float> &weightT);
+
+ // this function can be only used for UNKNOWN source phrases
+ void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
+
+ std::string GetScoreProducerDescription() const;
+ std::string GetScoreProducerWeightShortName() const
+ {
+ return "tm";
+ }
+
+ size_t GetNumInputScores() const;
+ virtual void InitializeForInput(InputType const& source);
+
+};
+
+}
+#endif
diff --git a/moses/src/PrefixTree.h b/moses/src/PrefixTree.h
new file mode 100644
index 000000000..889ef6162
--- /dev/null
+++ b/moses/src/PrefixTree.h
@@ -0,0 +1,277 @@
+// $Id: PrefixTree.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/* ---------------------------------------------------------------- */
+/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
+/* Richard Zens */
+/* ---------------------------------------------------------------- */
+#ifndef moses_PrefixTree_h
+#define moses_PrefixTree_h
+
+#include <vector>
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include "Util.h"
+#include "FilePtr.h"
+#include "File.h"
+
+namespace Moses
+{
+
+template<typename T,typename D>
+class PrefixTreeSA {
+public:
+ typedef T Key;
+ typedef D Data;
+
+ typedef PrefixTreeSA<T,D> Self;
+ typedef std::vector<T> VT;
+ typedef std::vector<Self*> VP;
+ typedef std::vector<D> VD;
+
+ VT keys;
+ VP ptr;
+ VD data;
+
+ static Data def;
+
+public:
+ PrefixTreeSA() {}
+
+ ~PrefixTreeSA() {for(size_t i=0;i<ptr.size();++i) delete ptr[i];}
+
+ static const Data& getDefault() {return def;}
+ static void setDefault(const Data& x) {def=x;}
+
+
+ // insert sequence
+ template<typename fwiter> Data& insert(fwiter b,fwiter e) {
+ typename VT::iterator i=std::lower_bound(keys.begin(),keys.end(),*b);
+ typename VT::iterator kb=keys.begin();
+ size_t pos=std::distance(kb,i);
+
+ if(i==keys.end() || *i!=*b) {
+ keys.insert(i,*b);
+ data.insert(data.begin()+pos,def);
+ ptr.insert(ptr.begin()+pos,0);
+ }
+ if(++b!=e) {
+ if(!ptr[pos]) ptr[pos]=new Self;
+ return ptr[pos]->insert(b,e);
+ }
+ else return data[pos];
+ }
+ // insert container
+ template<typename cont> Data& insert(const cont& c) {
+ return insert(c.begin(),c.end());}
+
+ size_t size() const {return keys.size();}
+ const Key& getKey(size_t i) const {return keys[i];}
+ const Data& getData(size_t i) const {return data[i];}
+ const Self* getPtr(size_t i) const {return ptr[i];}
+
+ size_t findKey(const Key& k) const {
+ typename VT::const_iterator i=std::lower_bound(keys.begin(),keys.end(),k);
+ if(i==keys.end() || *i!=k) return keys.size();
+ return std::distance(keys.begin(),i);
+ }
+
+ // find sequence
+ template<typename fwiter> const Data* findPtr(fwiter b,fwiter e) const {
+ size_t pos=findKey(*b);
+ if(pos==keys.size()) return 0;
+ if(++b==e) return &data[pos];
+ if(ptr[pos]) return ptr[pos]->findPtr(b,e); else return 0;
+ }
+ // find container
+ template<typename cont> const Data* findPtr(const cont& c) const {
+ return findPtr(c.begin(),c.end());}
+
+
+ // find sequence
+ template<typename fwiter> const Data& find(fwiter b,fwiter e) const {
+ if(const Data* p=findPtr(b,e)) return *p; else return def;
+ }
+
+ // find container
+ template<typename cont> const Data& find(const cont& c) const {
+ return find(c.begin(),c.end());}
+
+ void shrink() {
+ ShrinkToFit(keys); ShrinkToFit(ptr); ShrinkToFit(data);}
+
+};
+template<typename T,typename D> D PrefixTreeSA<T,D>::def;
+
+/////////////////////////////////////////////////////////////////////////////
+
+template<typename T,typename D>
+class PrefixTreeF {
+public:
+ typedef T Key;
+ typedef D Data;
+private:
+ typedef PrefixTreeF<Key,Data> Self;
+public:
+ typedef FilePtr<Self> Ptr;
+private:
+ typedef std::vector<Key> VK;
+ typedef std::vector<Data> VD;
+ typedef std::vector<Ptr> VP;
+
+ VK keys;
+ VD data;
+ VP ptr;
+
+ static Data def;
+
+ OFF_T startPos;
+ FILE* f;
+public:
+
+ PrefixTreeF(FILE* f_=0) : f(f_) {if(f) read();}
+
+ ~PrefixTreeF() {free();}
+
+ void read() {
+ startPos=fTell(f);
+ fReadVector(f,keys);
+ fReadVector(f,data);
+ ptr.clear();ptr.resize(keys.size());
+ std::vector<OFF_T> rawOffs(keys.size());
+ fread(&rawOffs[0], sizeof(OFF_T), keys.size(), f);
+ for(size_t i=0;i<ptr.size();++i)
+ if (rawOffs[i]) ptr[i].set(f, rawOffs[i]);
+ }
+
+ void free() {
+ for(typename VP::iterator i=ptr.begin();i!=ptr.end();++i) i->free();}
+
+ void reserve(size_t s) {
+ keys.reserve(s);data.reserve(s);ptr.reserve(s);}
+
+ template<typename fwiter>
+ void changeData(fwiter b,fwiter e,const Data& d) {
+ typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),*b);
+ if(i==keys.end() || *i!=*b) {
+ TRACE_ERR("ERROR: key not found in changeData!\n"); return;}
+ typename VK::const_iterator kb=keys.begin();
+ size_t pos=std::distance(kb,i);
+ if(++b==e) {
+ OFF_T p=startPos+keys.size()*sizeof(Key)+2*sizeof(unsigned)+pos*sizeof(Data);
+ TRACE_ERR("elem found at pos "<<p<<" old val: "<<data[pos]<<" startpos: "<<startPos<<"\n");
+ if(data[pos]!=d) {
+ data[pos]=d;fSeek(f,p);fWrite(f,d);}
+ return;
+ }
+ if(ptr[pos]) ptr[pos]->changeData(b,e,d); else {
+ TRACE_ERR("ERROR: seg not found!in changeData\n");
+ }
+ }
+
+
+ void create(const PrefixTreeSA<Key,Data>& psa,const std::string& fname) {
+ FILE* f=fOpen(fname.c_str(),"wb");
+ create(psa,f);
+ fclose(f);
+ }
+
+ void create(const PrefixTreeSA<Key,Data>& psa,FILE* f,int verbose=0) {
+ setDefault(psa.getDefault());
+
+ typedef std::pair<const PrefixTreeSA<Key,Data>*,OFF_T> P;
+ typedef std::deque<P> Queue;
+
+ Queue queue;
+
+ queue.push_back(P(&psa,fTell(f)));
+ bool isFirst=1;
+ size_t ns=1;
+ while(queue.size()) {
+ if(verbose && queue.size()>ns) {
+ TRACE_ERR("stack size in PF create: "<<queue.size()<<"\n");
+ while(ns<queue.size()) ns*=2;}
+ const P& pp=queue.back();
+ const PrefixTreeSA<Key,Data>& p=*pp.first;
+ OFF_T pos=pp.second;
+ queue.pop_back();
+
+ if(!isFirst) {
+ OFF_T curr=fTell(f);
+ fSeek(f,pos);
+ fWrite(f,curr);
+ fSeek(f,curr);
+ } else isFirst=0;
+
+ size_t s=0;
+ s+=fWriteVector(f,p.keys);
+ s+=fWriteVector(f,p.data);
+
+ for(size_t i=0;i<p.ptr.size();++i) {
+ if(p.ptr[i])
+ queue.push_back(P(p.ptr[i],fTell(f)));
+ OFF_T ppos=0;
+ s+=fWrite(f,ppos);
+ }
+ }
+ }
+
+ size_t size() const {return keys.size();}
+ const Key& getKey(size_t i) const {return keys[i];}
+ const Data& getData(size_t i) const {return data[i];}
+ const Self* getPtr(size_t i) const {return ptr[i];}
+
+ size_t findKey(const Key& k) const {
+ typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),k);
+ if(i==keys.end() || *i!=k) return keys.size();
+ return std::distance(keys.begin(),i);
+ }
+
+ Ptr const* findKeyPtr(const Key& k) const {
+ size_t pos=findKey(k);
+ return (pos<keys.size() ? &ptr[pos] : 0);
+ }
+
+ // find sequence
+ template<typename fwiter> const Data* findPtr(fwiter b,fwiter e) const {
+ typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),*b);
+ if(i==keys.end() || *i!=*b) return 0;
+ size_t pos=std::distance(keys.begin(),i);
+ if(++b==e) return &data[pos];
+ if(ptr[pos]) return ptr[pos]->findPtr(b,e); else return 0;
+ }
+ // find container
+ template<typename cont> const Data* findPtr(const cont& c) const {
+ return findPtr(c.begin(),c.end());}
+
+
+ // find sequence
+ template<typename fwiter> const Data& find(fwiter b,fwiter e) const {
+ if(const Data* p=findPtr(b,e)) return *p; else return def;} //return (p?*p:def);}
+
+ // find container
+ template<typename cont> const Data& find(const cont& c) const {
+ return find(c.begin(),c.end());}
+
+ static void setDefault(const Data& d) {def=d;}
+ static const Data& getDefault() {return def;}
+
+
+ void print(std::ostream& out,const std::string s="") const {
+
+ out<<s<<"startpos: "<<startPos<<" size: "<<keys.size()<<"\n";
+ for(size_t i=0;i<keys.size();++i) {
+ out<<s<<i<<" - "<<keys[i]<<" "<<data[i]<<"\n";
+ }
+ for(size_t i=0;i<ptr.size();++i)
+ if(ptr[i])
+ ptr[i]->print(out,s+" ");
+ }
+
+
+};
+template<typename T,typename D> D PrefixTreeF<T,D>::def;
+
+}
+
+#endif
diff --git a/moses/src/PrefixTreeMap.cpp b/moses/src/PrefixTreeMap.cpp
new file mode 100644
index 000000000..effa7fb73
--- /dev/null
+++ b/moses/src/PrefixTreeMap.cpp
@@ -0,0 +1,219 @@
+#include "PrefixTreeMap.h"
+#include "TypeDef.h"
+
+namespace Moses
+{
+void GenericCandidate::readBin(FILE* f){
+ m_PhraseList.clear();
+ m_ScoreList.clear();
+ UINT32 num_phrases; // on older compilers, <stdint.h> may need to be included
+ fRead(f, num_phrases);
+ for(unsigned int i = 0; i < num_phrases; ++i){
+ IPhrase phrase;
+ fReadVector(f, phrase);
+ m_PhraseList.push_back(phrase);
+ };
+ UINT32 num_scores;
+ fRead(f, num_scores);
+ for(unsigned int j = 0; j < num_scores; ++j){
+ std::vector<float> score;
+ fReadVector(f, score);
+ m_ScoreList.push_back(score);
+ };
+};
+
+void GenericCandidate::writeBin(FILE* f) const {
+ // cast is necessary to ensure compatibility between 32- and 64-bit platforms
+ fWrite(f, static_cast<UINT32>(m_PhraseList.size()));
+ for(size_t i = 0; i < m_PhraseList.size(); ++i){
+ fWriteVector(f, m_PhraseList[i]);
+ }
+ fWrite(f, static_cast<UINT32>(m_ScoreList.size()));
+ for(size_t j = 0; j < m_ScoreList.size(); ++j){
+ fWriteVector(f, m_ScoreList[j]);
+ }
+};
+
+
+void Candidates::writeBin(FILE* f) const {
+ UINT32 s = this->size();
+ fWrite(f,s);
+ for(size_t i = 0; i < s; ++i) {
+ MyBase::operator[](i).writeBin(f);
+ }
+}
+
+void Candidates::readBin(FILE* f) {
+ UINT32 s;
+ fRead(f,s);
+ this->resize(s);
+ for(size_t i = 0; i<s; ++i) {
+ MyBase::operator[](i).readBin(f);
+ }
+}
+
+const LabelId PrefixTreeMap::MagicWord = std::numeric_limits<LabelId>::max() - 1;
+
+
+void PrefixTreeMap::FreeMemory() {
+ for(Data::iterator i = m_Data.begin(); i != m_Data.end(); ++i){
+ i->free();
+ }
+ /*for(size_t i = 0; i < m_Voc.size(); ++i){
+ delete m_Voc[i];
+ m_Voc[i] = 0;
+ }*/
+ m_PtrPool.reset();
+}
+
+static WordVoc* ReadVoc(const std::string& filename) {
+ static std::map<std::string,WordVoc*> vocs;
+#ifdef WITH_THREADS
+ boost::mutex mutex;
+ boost::mutex::scoped_lock lock(mutex);
+#endif
+ std::map<std::string,WordVoc*>::iterator vi = vocs.find(filename);
+ if (vi == vocs.end()) {
+ WordVoc* voc = new WordVoc();
+ voc->Read(filename);
+ vocs[filename] = voc;
+ }
+ return vocs[filename];
+}
+
+int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs){
+ std::string ifs(fileNameStem + ".srctree"),
+ ift(fileNameStem + ".tgtdata"),
+ ifi(fileNameStem + ".idx"),
+ ifv(fileNameStem + ".voc");
+
+ std::vector<OFF_T> srcOffsets;
+ FILE *ii=fOpen(ifi.c_str(),"rb");
+ fReadVector(ii,srcOffsets);
+ fClose(ii);
+
+ if (m_FileSrc) {
+ fClose(m_FileSrc);
+ }
+ m_FileSrc = fOpen(ifs.c_str(),"rb");
+ if (m_FileTgt) {
+ fClose(m_FileTgt);
+ }
+ m_FileTgt = fOpen(ift.c_str(),"rb");
+
+ m_Data.resize(srcOffsets.size());
+
+ for(size_t i = 0; i < m_Data.size(); ++i){
+ m_Data[i] = CPT(m_FileSrc, srcOffsets[i]);
+ }
+
+ if(-1 == numVocs){
+ char num[5];
+ numVocs = 0;
+ sprintf(num, "%d", numVocs);
+ while(FileExists(ifv + num)){
+ ++numVocs;
+ sprintf(num, "%d", numVocs);
+ }
+ }
+ char num[5];
+ m_Voc.resize(numVocs);
+ for(int i = 0; i < numVocs; ++i){
+ sprintf(num, "%d", i);
+ //m_Voc[i] = new WordVoc();
+ //m_Voc[i]->Read(ifv + num);
+ m_Voc[i] = ReadVoc(ifv + num);
+ }
+
+ TRACE_ERR("binary file loaded, default OFF_T: "<< PTF::getDefault()<<"\n");
+ return 1;
+};
+
+
+void PrefixTreeMap::GetCandidates(const IPhrase& key, Candidates* cands) {
+ //check if key is valid
+ if(key.empty() || key[0] >= m_Data.size() || !m_Data[key[0]]){
+ return;
+ }
+ assert(m_Data[key[0]]->findKey(key[0])<m_Data[key[0]]->size());
+
+ OFF_T candOffset = m_Data[key[0]]->find(key);
+ if(candOffset == InvalidOffT){
+ return;
+ }
+ fSeek(m_FileTgt,candOffset);
+ cands->readBin(m_FileTgt);
+}
+
+void PrefixTreeMap::GetCandidates(const PPimp& p, Candidates* cands) {
+ assert(p.isValid());
+ if(p.isRoot()) {
+ return;
+ };
+ OFF_T candOffset = p.ptr()->getData(p.idx);
+ if(candOffset == InvalidOffT) {
+ return;
+ }
+ fSeek(m_FileTgt,candOffset);
+ cands->readBin(m_FileTgt);
+}
+
+std::vector< std::string const * > PrefixTreeMap::ConvertPhrase(const IPhrase& p, unsigned int voc) const{
+ assert(voc < m_Voc.size() && m_Voc[voc] != 0);
+ std::vector< std::string const * > result; result.reserve(p.size());
+ for(IPhrase::const_iterator i = p.begin(); i != p.end(); ++i){
+ result.push_back(&(m_Voc[voc]->symbol(*i)));
+ }
+ return result;
+}
+
+IPhrase PrefixTreeMap::ConvertPhrase(const std::vector< std::string >& p, unsigned int voc) const{
+ assert(voc < m_Voc.size() && m_Voc[voc] != 0);
+ IPhrase result;
+ result.reserve(p.size());
+ for(size_t i = 0; i < p.size(); ++i){
+ result.push_back(m_Voc[voc]->index(p[i]));
+ }
+ return result;
+}
+
+LabelId PrefixTreeMap::ConvertWord(const std::string& w, unsigned int voc) const {
+ assert(voc < m_Voc.size() && m_Voc[voc] != 0);
+ return m_Voc[voc]->index(w);
+}
+
+std::string PrefixTreeMap::ConvertWord(LabelId w, unsigned int voc) const {
+ assert(voc < m_Voc.size() && m_Voc[voc] != 0);
+ if(w == PrefixTreeMap::MagicWord){
+ return "|||";
+ } else if (w == InvalidLabelId){
+ return "<invalid>";
+ } else {
+ return m_Voc[voc]->symbol(w);
+ }
+}
+
+PPimp* PrefixTreeMap::GetRoot() {
+ return m_PtrPool.get(PPimp(0,0,1));
+}
+
+PPimp* PrefixTreeMap::Extend(PPimp* p, LabelId wi) {
+ assert(p->isValid());
+
+ if(wi == InvalidLabelId) {
+ return 0; // unknown word, return invalid pointer
+
+ } else if(p->isRoot()) {
+ if(wi < m_Data.size() && m_Data[wi]){
+ assert(m_Data[wi]->findKeyPtr(wi));
+ return m_PtrPool.get(PPimp(m_Data[wi],m_Data[wi]->findKey(wi),0));
+ }
+ } else if(PTF const* nextP = p->ptr()->getPtr(p->idx)) {
+ return m_PtrPool.get(PPimp(nextP, nextP->findKey(wi),0));
+ }
+ return 0; // should never get here, return invalid pointer
+
+}
+
+}
+
diff --git a/moses/src/PrefixTreeMap.h b/moses/src/PrefixTreeMap.h
new file mode 100644
index 000000000..a050ae711
--- /dev/null
+++ b/moses/src/PrefixTreeMap.h
@@ -0,0 +1,137 @@
+#ifndef moses_PrefixTreeMap_h
+#define moses_PrefixTreeMap_h
+
+#include<vector>
+#include<climits>
+#include<iostream>
+#include <map>
+
+#ifdef WITH_THREADS
+#include <boost/thread/mutex.hpp>
+#endif
+
+
+#include "PrefixTree.h"
+#include "File.h"
+#include "LVoc.h"
+#include "ObjectPool.h"
+
+namespace Moses
+{
+
+
+typedef PrefixTreeF<LabelId,OFF_T> PTF;
+typedef FilePtr<PTF> CPT;
+typedef std::vector<CPT> Data;
+typedef LVoc<std::string> WordVoc;
+
+class GenericCandidate {
+public:
+ typedef std::vector<IPhrase> PhraseList;
+ typedef std::vector< std::vector<float> > ScoreList;
+public:
+ GenericCandidate(){
+ };
+ GenericCandidate(const GenericCandidate& other)
+ : m_PhraseList(other.m_PhraseList), m_ScoreList(other.m_ScoreList) {
+ };
+ GenericCandidate(const PhraseList& p, const ScoreList& s)
+ : m_PhraseList(p), m_ScoreList(s) {
+ };
+ ~GenericCandidate(){
+ };
+public:
+ size_t NumPhrases() const {
+ return m_PhraseList.size();
+ };
+ size_t NumScores() const {
+ return m_ScoreList.size();
+ };
+ const IPhrase& GetPhrase(unsigned int i) const {
+ return m_PhraseList.at(i);
+ }
+ const std::vector<float>& GetScore(unsigned int i) const {
+ return m_ScoreList.at(i);
+ }
+ void readBin(FILE* f);
+ void writeBin(FILE* f) const;
+private:
+ PhraseList m_PhraseList;
+ ScoreList m_ScoreList;
+};
+
+/*
+class PPtr {
+ public:
+ typedef unsigned IndexType;
+ public:
+ PPtr(PTF const* p, IndexType i, bool isRoot)
+ : m_Ptr(p), m_Index(i), m_IsRoot(isRoot){
+ };
+ ~PPtr(){
+ };
+};
+*/
+
+struct PPimp {
+ PTF const*p;unsigned idx;bool root;
+
+ PPimp(PTF const* x,unsigned i,bool b) : p(x),idx(i),root(b) {}
+ bool isValid() const {return root || (p && idx<p->size());}
+
+ bool isRoot() const {return root;}
+ PTF const* ptr() const {return p;}
+};
+
+
+class Candidates : public std::vector<GenericCandidate> {
+ typedef std::vector<GenericCandidate> MyBase;
+ public:
+ Candidates() : MyBase() {
+ };
+ void writeBin(FILE* f) const;
+ void readBin(FILE* f);
+};
+
+class PrefixTreeMap {
+ public:
+ PrefixTreeMap() : m_FileSrc(0), m_FileTgt(0) {
+ PTF::setDefault(InvalidOffT);
+ }
+ ~PrefixTreeMap() {
+ if(m_FileSrc) {fClose(m_FileSrc);}
+ if(m_FileTgt) {fClose(m_FileTgt);}
+ FreeMemory();
+ }
+ public:
+ static const LabelId MagicWord;
+ public:
+ void FreeMemory();
+
+ int Read(const std::string& fileNameStem, int numVocs = -1);
+
+ void GetCandidates(const IPhrase& key, Candidates* cands);
+ void GetCandidates(const PPimp& p, Candidates* cands);
+
+ std::vector< std::string const * > ConvertPhrase(const IPhrase& p, unsigned int voc) const;
+ IPhrase ConvertPhrase(const std::vector< std::string >& p, unsigned int voc) const;
+ LabelId ConvertWord(const std::string& w, unsigned int voc) const;
+ std::string ConvertWord(LabelId w, unsigned int voc) const;
+public: //low level
+ PPimp* GetRoot();
+ PPimp* Extend(PPimp* p, LabelId wi);
+ PPimp* Extend(PPimp* p, const std::string w, size_t voc){
+ return Extend(p, ConvertWord(w,voc));
+ }
+ private:
+ Data m_Data;
+ FILE* m_FileSrc;
+ FILE* m_FileTgt;
+
+ std::vector<WordVoc*> m_Voc;
+ ObjectPool<PPimp> m_PtrPool;
+};
+
+}
+
+#endif
diff --git a/moses/src/ReorderingConstraint.cpp b/moses/src/ReorderingConstraint.cpp
new file mode 100644
index 000000000..a3f1c91fe
--- /dev/null
+++ b/moses/src/ReorderingConstraint.cpp
@@ -0,0 +1,249 @@
+// $Id: ReorderingConstraint.cpp 988 2006-11-21 19:35:37Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2008 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "ReorderingConstraint.h"
+#include "InputType.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+
+//! allocate memory for reordering walls
+void ReorderingConstraint::InitializeWalls(size_t size)
+{
+ m_size = size;
+ m_wall = (bool*) malloc(sizeof(bool) * size);
+ m_localWall = (bool*) malloc(sizeof(bool) * size);
+
+ for (size_t pos = 0 ; pos < m_size ; pos++)
+ {
+ m_wall[pos] = false;
+ m_localWall[pos] = false;
+ }
+}
+
+
+//! set value at a particular position
+void ReorderingConstraint::SetWall( size_t pos, bool value )
+{
+ VERBOSE(3,"SETTING reordering wall at position " << pos << std::endl);
+ m_wall[pos] = value;
+ m_active = true;
+}
+
+//! has to be called to localized walls
+void ReorderingConstraint::FinalizeWalls()
+{
+ for(size_t z = 0; z < m_zone.size(); z++ )
+ {
+ const size_t startZone = m_zone[z][0];
+ const size_t endZone = m_zone[z][1];// note: wall after endZone is not local
+ for( size_t pos = startZone; pos < endZone; pos++ )
+ {
+ if (m_wall[ pos ])
+ {
+ m_localWall[ pos ] = true;
+ m_wall[ pos ] = false;
+ VERBOSE(3,"SETTING local wall " << pos << std::endl);
+ }
+ }
+ }
+}
+
+//! set walls based on "-monotone-at-punctuation" flag
+void ReorderingConstraint::SetMonotoneAtPunctuation( const Phrase &sentence )
+{
+ for( size_t i=0; i<sentence.GetSize(); i++ )
+ {
+ const Word& word = sentence.GetWord(i);
+ if (word[0]->GetString() == "," ||
+ word[0]->GetString() == "." ||
+ word[0]->GetString() == "!" ||
+ word[0]->GetString() == "?" ||
+ word[0]->GetString() == ":" ||
+ word[0]->GetString() == ";" ||
+ word[0]->GetString() == "\"")
+ {
+ // set wall before and after punc, but not at sentence start, end
+ if (i>0 && i<m_size-1) SetWall( i, true );
+ if (i>1) SetWall( i-1, true );
+ }
+ }
+}
+
+//! set a reordering zone (once entered, need to finish)
+void ReorderingConstraint::SetZone( size_t startPos, size_t endPos )
+{
+ VERBOSE(3,"SETTING zone " << startPos << "-" << endPos << std::endl);
+ std::vector< size_t > newZone;
+ newZone.push_back( startPos );
+ newZone.push_back( endPos );
+ m_zone.push_back( newZone );
+ m_active = true;
+}
+
+//! check if the current hypothesis extension violates reordering constraints
+bool ReorderingConstraint::Check( const WordsBitmap &bitmap, size_t startPos, size_t endPos ) const
+{
+ // nothing to be checked, we are done
+ if (! IsActive() ) return true;
+
+ VERBOSE(3,"CHECK " << bitmap << " " << startPos << "-" << endPos);
+
+ // check walls
+ size_t firstGapPos = bitmap.GetFirstGapPos();
+ // filling first gap -> no wall violation possible
+ if (firstGapPos != startPos)
+ {
+ // if there is a wall before the last word,
+ // we created a gap while moving through wall
+ // -> violation
+ for( size_t pos = firstGapPos; pos < endPos; pos++ )
+ {
+ if( GetWall( pos ) )
+ {
+ VERBOSE(3," hitting wall " << pos << std::endl);
+ return false;
+ }
+ }
+ }
+
+ // monotone -> no violation possible
+ size_t lastPos = bitmap.GetLastPos();
+ if ((lastPos == NOT_FOUND && startPos == 0) ||
+ (firstGapPos > lastPos && firstGapPos == startPos))
+ {
+ VERBOSE(3," montone, fine." << std::endl);
+ return true;
+ }
+
+ // check zones
+ for(size_t z = 0; z < m_zone.size(); z++ )
+ {
+ const size_t startZone = m_zone[z][0];
+ const size_t endZone = m_zone[z][1];
+
+ // fine, if translation has not reached zone yet and phrase outside zone
+ if (lastPos < startZone && ( endPos < startZone || startPos > endZone ) ) {
+ continue;
+ }
+
+ // already completely translated zone, no violations possible
+ if (firstGapPos > endZone)
+ {
+ continue;
+ }
+
+ // some words are translated beyond the start
+ // let's look closer if some are in the zone
+ size_t numWordsInZoneTranslated = 0;
+ if (lastPos >= startZone)
+ {
+ for(size_t pos = startZone; pos <= endZone; pos++ )
+ {
+ if( bitmap.GetValue( pos ) )
+ {
+ numWordsInZoneTranslated++;
+ }
+ }
+ }
+
+ // all words in zone translated, no violation possible
+ if (numWordsInZoneTranslated == endZone-startZone+1)
+ {
+ continue;
+ }
+
+ // flag if this is an active zone
+ bool activeZone = (numWordsInZoneTranslated > 0);
+
+ // fine, if zone completely untranslated and phrase outside zone
+ if (!activeZone && ( endPos < startZone || startPos > endZone ) ) {
+ continue;
+ }
+
+ // violation, if phrase completely outside active zone
+ if (activeZone && ( endPos < startZone || startPos > endZone ) ) {
+ VERBOSE(3," outside active zone" << std::endl);
+ return false;
+ }
+
+ // ok, this is what we know now:
+ // * the phrase is in the zone (at least partially)
+ // * either zone is already active, or it becomes active now
+
+ // check, if we are setting us up for a dead end due to distortion limits
+ if (startPos != firstGapPos && endZone-firstGapPos >= StaticData::Instance().GetMaxDistortion()) {
+ VERBOSE(3," dead end due to distortion limit" << std::endl);
+ return false;
+ }
+
+ // let us check on phrases that are partially outside
+
+ // phrase overlaps at the beginning, always ok
+ if (startPos <= startZone)
+ {
+ continue;
+ }
+
+ // phrase goes beyond end, has to fill zone completely
+ if (endPos > endZone)
+ {
+ if (endZone-startPos+1 < // num. words filled in by phrase
+ endZone-startZone+1-numWordsInZoneTranslated) // num. untranslated
+ {
+ VERBOSE(3," overlap end, but not completing" << std::endl);
+ return false;
+ }
+ else
+ {
+ continue;
+ }
+ }
+
+ // now we are down to phrases that are completely inside the zone
+ // we have to check local walls
+ bool seenUntranslatedBeforeStartPos = false;
+ for(size_t pos = startZone; pos < endZone && pos < endPos; pos++ )
+ {
+ // be careful when there is a gap before phrase
+ if( !bitmap.GetValue( pos ) // untranslated word
+ && pos < startPos ) // before startPos
+ {
+ seenUntranslatedBeforeStartPos = true;
+ }
+ if( seenUntranslatedBeforeStartPos && GetLocalWall( pos ) )
+ {
+ VERBOSE(3," local wall violation" << std::endl);
+ return false;
+ }
+ }
+
+ // passed all checks for this zone, on to the next one
+ }
+
+ // passed all checks, no violations
+ VERBOSE(3," fine." << std::endl);
+ return true;
+}
+
+}
diff --git a/moses/src/ReorderingConstraint.h b/moses/src/ReorderingConstraint.h
new file mode 100644
index 000000000..e8e0350f9
--- /dev/null
+++ b/moses/src/ReorderingConstraint.h
@@ -0,0 +1,97 @@
+// $Id: ReorderingConstraint.h 1466 2007-09-27 23:22:58Z redpony $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2008 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_ReorderingConstraint_h
+#define moses_ReorderingConstraint_h
+
+//#include <malloc.h>
+#include <limits>
+#include <vector>
+#include <iostream>
+#include <cstring>
+#include <cmath>
+#include "TypeDef.h"
+#include "Word.h"
+#include "Phrase.h"
+
+namespace Moses
+{
+
+class InputType;
+
+/** vector of boolean used to represent whether a word has been translated or not */
+class ReorderingConstraint
+{
+ friend std::ostream& operator<<(std::ostream& out, const ReorderingConstraint& reorderingConstraint);
+protected:
+ // const size_t m_size; /**< number of words in sentence */
+ size_t m_size; /**< number of words in sentence */
+ bool *m_wall; /**< flag for each word if it is a wall */
+ bool *m_localWall; /**< flag for each word if it is a local wall */
+ std::vector< std::vector< size_t > > m_zone; /** zones that limit reordering */
+ bool m_active; /**< flag indicating, if there are any active constraints */
+
+public:
+
+ //! create ReorderingConstraint of length size and initialise to zero
+ ReorderingConstraint() :m_wall(NULL),m_localWall(NULL),m_active(false) {}
+
+ //! destructer
+ ~ReorderingConstraint()
+ {
+ if (m_wall != NULL) free(m_wall);
+ if (m_localWall != NULL) free(m_localWall);
+ }
+
+ //! allocate memory for memory for a sentence of a given size
+ void InitializeWalls(size_t size);
+
+ //! changes walls in zones into local walls
+ void FinalizeWalls();
+
+ //! set value at a particular position
+ void SetWall( size_t pos, bool value );
+
+ //! whether a word has been translated at a particular position
+ bool GetWall(size_t pos) const { return m_wall[pos]; }
+
+ //! whether a word has been translated at a particular position
+ bool GetLocalWall(size_t pos) const { return m_localWall[pos]; }
+
+ //! set a zone
+ void SetZone( size_t startPos, size_t endPos );
+
+ //! returns the vector of zones
+ std::vector< std::vector< size_t > > & GetZones() { return m_zone; }
+
+ //! set the reordering walls based on punctuation in the sentence
+ void SetMonotoneAtPunctuation( const Phrase & sentence );
+
+ //! check if all constraints are fulfilled -> all find
+ bool Check( const WordsBitmap &bitmap, size_t start, size_t end ) const;
+
+ //! checks if reordering constraints will be enforced
+ bool IsActive() const { return m_active; }
+};
+
+}
+#endif
diff --git a/moses/src/ScoreComponentCollection.cpp b/moses/src/ScoreComponentCollection.cpp
new file mode 100644
index 000000000..6f7b630db
--- /dev/null
+++ b/moses/src/ScoreComponentCollection.cpp
@@ -0,0 +1,15 @@
+// $Id: ScoreComponentCollection.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+#include "ScoreComponentCollection.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+ScoreComponentCollection::ScoreComponentCollection()
+ : m_scores(StaticData::Instance().GetTotalScoreComponents(), 0.0f)
+ , m_sim(&StaticData::Instance().GetScoreIndexManager())
+{}
+
+}
+
+
diff --git a/moses/src/ScoreComponentCollection.h b/moses/src/ScoreComponentCollection.h
new file mode 100644
index 000000000..712fe5673
--- /dev/null
+++ b/moses/src/ScoreComponentCollection.h
@@ -0,0 +1,207 @@
+// $Id: ScoreComponentCollection.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_ScoreComponentCollection_h
+#define moses_ScoreComponentCollection_h
+
+#include <numeric>
+#include <cassert>
+#include "ScoreProducer.h"
+#include "ScoreIndexManager.h"
+#include "TypeDef.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+/*** An unweighted collection of scores for a translation or step in a translation.
+ *
+ * In the factored phrase-based models that are implemented by moses, there are a set of
+ * scores that come from a variety of sources (translation probabilities, language model
+ * probablilities, distortion probabilities, generation probabilities). Furthermore, while
+ * some of these scores may be 0, this number is fixed (and generally quite small, ie, less
+ * than 15), for a given model.
+ *
+ * The values contained in ScoreComponentCollection objects are unweighted scores (log-probs).
+ *
+ * ScoreComponentCollection objects can be added and subtracted, which makes them appropriate
+ * to be the datatype used to return the result of a score computations (in this case they will
+ * have most values set to zero, except for the ones that are results of the indivudal computation
+ * this will then be added into the "running total" in the Hypothesis. In fact, for a score
+ * to be tracked in the hypothesis (and thus to participate in the decoding process), a class
+ * representing that score must extend the ScoreProducer abstract base class. For an example
+ * refer to the DistortionScoreProducer class.
+ */
+class ScoreComponentCollection {
+ friend std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs);
+ friend class ScoreIndexManager;
+private:
+ std::vector<float> m_scores;
+ const ScoreIndexManager* m_sim;
+
+public:
+ //! Create a new score collection with all values set to 0.0
+ ScoreComponentCollection();
+
+ //! Clone a score collection
+ ScoreComponentCollection(const ScoreComponentCollection& rhs)
+ : m_scores(rhs.m_scores)
+ , m_sim(rhs.m_sim)
+ {}
+
+ inline size_t size() const { return m_scores.size(); }
+ const float& operator[](size_t x) const { return m_scores[x]; }
+
+ //! Set all values to 0.0
+ void ZeroAll()
+ {
+ for (std::vector<float>::iterator i=m_scores.begin(); i!=m_scores.end(); ++i)
+ *i = 0.0f;
+ }
+
+ //! add the score in rhs
+ void PlusEquals(const ScoreComponentCollection& rhs)
+ {
+ assert(m_scores.size() >= rhs.m_scores.size());
+ const size_t l = rhs.m_scores.size();
+ for (size_t i=0; i<l; i++) { m_scores[i] += rhs.m_scores[i]; }
+ }
+
+ //! subtract the score in rhs
+ void MinusEquals(const ScoreComponentCollection& rhs)
+ {
+ assert(m_scores.size() >= rhs.m_scores.size());
+ const size_t l = rhs.m_scores.size();
+ for (size_t i=0; i<l; i++) { m_scores[i] -= rhs.m_scores[i]; }
+ }
+
+ //! Add scores from a single ScoreProducer only
+ //! The length of scores must be equal to the number of score components
+ //! produced by sp
+ void PlusEquals(const ScoreProducer* sp, const std::vector<float>& scores)
+ {
+ assert(scores.size() == sp->GetNumScoreComponents());
+ size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
+ for (std::vector<float>::const_iterator vi = scores.begin();
+ vi != scores.end(); ++vi)
+ {
+ m_scores[i++] += *vi;
+ }
+ }
+
+ //! Add scores from a single ScoreProducer only
+ //! The length of scores must be equal to the number of score components
+ //! produced by sp
+ void PlusEquals(const ScoreProducer* sp, const ScoreComponentCollection& scores)
+ {
+ size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
+ const size_t end = m_sim->GetEndIndex(sp->GetScoreBookkeepingID());
+ for (; i < end; ++i)
+ {
+ m_scores[i] += scores.m_scores[i];
+ }
+ }
+
+ //! Special version PlusEquals(ScoreProducer, vector<float>)
+ //! to add the score from a single ScoreProducer that produces
+ //! a single value
+ void PlusEquals(const ScoreProducer* sp, float score)
+ {
+ assert(1 == sp->GetNumScoreComponents());
+ const size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
+ m_scores[i] += score;
+ }
+
+ void Assign(const ScoreProducer* sp, const std::vector<float>& scores)
+ {
+ assert(scores.size() == sp->GetNumScoreComponents());
+ size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
+ for (std::vector<float>::const_iterator vi = scores.begin();
+ vi != scores.end(); ++vi)
+ {
+ m_scores[i++] = *vi;
+ }
+ }
+
+ //! Special version PlusEquals(ScoreProducer, vector<float>)
+ //! to add the score from a single ScoreProducer that produces
+ //! a single value
+ void Assign(const ScoreProducer* sp, float score)
+ {
+ assert(1 == sp->GetNumScoreComponents());
+ const size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
+ m_scores[i] = score;
+ }
+
+ //! Used to find the weighted total of scores. rhs should contain a vector of weights
+ //! of the same length as the number of scores.
+ float InnerProduct(const std::vector<float>& rhs) const
+ {
+ return std::inner_product(m_scores.begin(), m_scores.end(), rhs.begin(), 0.0f);
+ }
+
+ float PartialInnerProduct(const ScoreProducer* sp, const std::vector<float>& rhs) const
+ {
+ std::vector<float> lhs = GetScoresForProducer(sp);
+ assert(lhs.size() == rhs.size());
+ return std::inner_product(lhs.begin(), lhs.end(), rhs.begin(), 0.0f);
+ }
+
+ //! return a vector of all the scores associated with a certain ScoreProducer
+ std::vector<float> GetScoresForProducer(const ScoreProducer* sp) const
+ {
+ size_t id = sp->GetScoreBookkeepingID();
+ const size_t begin = m_sim->GetBeginIndex(id);
+ const size_t end = m_sim->GetEndIndex(id);
+ std::vector<float> res(end-begin);
+ size_t j = 0;
+ for (size_t i = begin; i < end; i++) {
+ res[j++] = m_scores[i];
+ }
+ return res;
+ }
+
+ //! if a ScoreProducer produces a single score (for example, a language model score)
+ //! this will return it. If not, this method will throw
+ float GetScoreForProducer(const ScoreProducer* sp) const
+ {
+ size_t id = sp->GetScoreBookkeepingID();
+ const size_t begin = m_sim->GetBeginIndex(id);
+#ifndef NDEBUG
+ const size_t end = m_sim->GetEndIndex(id);
+ assert(end-begin == 1);
+#endif
+ return m_scores[begin];
+ }
+
+};
+
+inline std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs)
+{
+ os << "<<" << rhs.m_scores[0];
+ for (size_t i=1; i<rhs.m_scores.size(); i++)
+ os << ", " << rhs.m_scores[i];
+ return os << ">>";
+}
+
+
+}
+#endif
diff --git a/moses/src/ScoreIndexManager.cpp b/moses/src/ScoreIndexManager.cpp
new file mode 100644
index 000000000..e0502b7bc
--- /dev/null
+++ b/moses/src/ScoreIndexManager.cpp
@@ -0,0 +1,148 @@
+// $Id: ScoreIndexManager.cpp 2717 2010-01-28 15:32:04Z phkoehn $
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <cstdio>
+#include <cassert>
+#include "Util.h"
+#include "StaticData.h"
+#include "ScoreIndexManager.h"
+#include "ScoreProducer.h"
+#include "ScoreComponentCollection.h" // debugging
+
+namespace Moses
+{
+void ScoreIndexManager::AddScoreProducer(const ScoreProducer* sp)
+{
+ // Producers must be inserted in the order they are created
+ const_cast<ScoreProducer*>(sp)->CreateScoreBookkeepingID();
+ assert(m_begins.size() == (sp->GetScoreBookkeepingID()));
+
+ m_producers.push_back(sp);
+ if (sp->IsStateless()) {
+ const StatelessFeatureFunction* ff = static_cast<const StatelessFeatureFunction*>(sp);
+ if (!ff->ComputeValueInTranslationOption())
+ m_stateless.push_back(ff);
+ } else {
+ m_stateful.push_back(static_cast<const StatefulFeatureFunction*>(sp));
+ }
+
+ m_begins.push_back(m_last);
+ size_t numScoreCompsProduced = sp->GetNumScoreComponents();
+ assert(numScoreCompsProduced > 0);
+ m_last += numScoreCompsProduced;
+ m_ends.push_back(m_last);
+ /*VERBOSE(1,"Added ScoreProducer(" << sp->GetScoreBookkeepingID()
+ << " " << sp->GetScoreProducerDescription()
+ << ") index=" << m_begins.back() << "-" << m_ends.back()-1 << std::endl);
+ */
+}
+
+void ScoreIndexManager::PrintLabeledScores(std::ostream& os, const ScoreComponentCollection& scores) const
+{
+ std::vector<float> weights(scores.m_scores.size(), 1.0f);
+ PrintLabeledWeightedScores(os, scores, weights);
+}
+
+void ScoreIndexManager::PrintLabeledWeightedScores(std::ostream& os, const ScoreComponentCollection& scores, const std::vector<float>& weights) const
+{
+ assert(m_featureShortNames.size() == weights.size());
+ string lastName = "";
+ for (size_t i = 0; i < m_featureShortNames.size(); ++i)
+ {
+ if (i>0)
+ {
+ os << " ";
+ }
+ if (lastName != m_featureShortNames[i])
+ {
+ os << m_featureShortNames[i] << ": ";
+ lastName = m_featureShortNames[i];
+ }
+ os << weights[i] * scores[i];
+ }
+}
+
+void ScoreIndexManager::InitFeatureNames() {
+ m_featureNames.clear();
+ m_featureShortNames.clear();
+ size_t cur_i = 0;
+ size_t cur_scoreType = 0;
+ while (cur_i < m_last) {
+ size_t nis_idx = 0;
+ bool add_idx = (m_producers[cur_scoreType]->GetNumInputScores() > 1);
+ while (nis_idx < m_producers[cur_scoreType]->GetNumInputScores()){
+ ostringstream os;
+ os << m_producers[cur_scoreType]->GetScoreProducerDescription();
+ if (add_idx)
+ os << '_' << (nis_idx+1);
+ m_featureNames.push_back(os.str());
+ nis_idx++;
+ cur_i++;
+ }
+
+ int ind = 1;
+ add_idx = (m_ends[cur_scoreType] - cur_i > 1);
+ while (cur_i < m_ends[cur_scoreType]) {
+ ostringstream os;
+ os << m_producers[cur_scoreType]->GetScoreProducerDescription();
+ if (add_idx)
+ os << '_' << ind;
+ m_featureNames.push_back(os.str());
+ m_featureShortNames.push_back( m_producers[cur_scoreType]->GetScoreProducerWeightShortName() );
+ ++cur_i;
+ ++ind;
+ }
+ cur_scoreType++;
+ }
+}
+
+#ifdef HAVE_PROTOBUF
+void ScoreIndexManager::SerializeFeatureNamesToPB(hgmert::Hypergraph* hg) const {
+ for (size_t i = 0; i < m_featureNames.size(); ++i) {
+ hg->add_feature_names(m_featureNames[i]);
+ }
+}
+#endif
+
+void ScoreIndexManager::InitWeightVectorFromFile(const std::string& fnam, vector<float>* m_allWeights) const {
+ assert(m_allWeights->size() == m_featureNames.size());
+ ifstream in(fnam.c_str());
+ assert(in.good());
+ char buf[2000];
+ map<string, double> name2val;
+ while (!in.eof()) {
+ in.getline(buf, 2000);
+ if (strlen(buf) == 0) continue;
+ if (buf[0] == '#') continue;
+ istringstream is(buf);
+ string fname;
+ double val;
+ is >> fname >> val;
+ map<string, double>::iterator i = name2val.find(fname);
+ assert(i == name2val.end()); // duplicate weight name
+ name2val[fname] = val;
+ }
+ assert(m_allWeights->size() == m_featureNames.size());
+ for (size_t i = 0; i < m_featureNames.size(); ++i) {
+ map<string, double>::iterator iter = name2val.find(m_featureNames[i]);
+ if (iter == name2val.end()) {
+ cerr << "No weight found found for feature: " << m_featureNames[i] << endl;
+ abort();
+ }
+ (*m_allWeights)[i] = iter->second;
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, const ScoreIndexManager& sim)
+{
+ for (size_t i = 0; i < sim.m_featureNames.size(); ++i) {
+ os << sim.m_featureNames[i] << endl;
+ }
+ os << "Stateless: " << sim.m_stateless.size() << "\tStateful: " << sim.m_stateful.size() << endl;
+ return os;
+}
+
+}
+
diff --git a/moses/src/ScoreIndexManager.h b/moses/src/ScoreIndexManager.h
new file mode 100644
index 000000000..aacc55bb7
--- /dev/null
+++ b/moses/src/ScoreIndexManager.h
@@ -0,0 +1,70 @@
+// $Id: ScoreIndexManager.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_ScoreIndexManager_h
+#define moses_ScoreIndexManager_h
+
+#include <iostream>
+#include <vector>
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#endif
+
+namespace Moses
+{
+
+class ScoreProducer;
+class ScoreComponentCollection; // debugging only
+class StatefulFeatureFunction;
+class StatelessFeatureFunction;
+
+/** Keep track of scores and score producers. Each score producer is reserved contiguous set of slots
+ * to put their score components. All the score components are arranged in a vector with no gaps.
+ * Only 1 ScoreIndexManager object should be instantiated
+*/
+class ScoreIndexManager
+{
+ friend std::ostream& operator<<(std::ostream& os, const ScoreIndexManager& sim);
+public:
+ ScoreIndexManager() : m_last(0) {}
+
+ //! new score producer to manage. Producers must be inserted in the order they are created
+ void AddScoreProducer(const ScoreProducer* producer);
+ void InitFeatureNames();
+
+ //! starting score index for a particular score producer with scoreBookkeepingID
+ size_t GetBeginIndex(size_t scoreBookkeepingID) const { return m_begins[scoreBookkeepingID]; }
+ //! end score index for a particular score producer with scoreBookkeepingID
+ size_t GetEndIndex(size_t scoreBookkeepingID) const { return m_ends[scoreBookkeepingID]; }
+ //! sum of all score components from every score producer
+ size_t GetTotalNumberOfScores() const { return m_last; }
+ //! print unweighted scores of each ScoreManager to stream os
+ void PrintLabeledScores(std::ostream& os, const ScoreComponentCollection& scc) const;
+ //! print weighted scores of each ScoreManager to stream os
+ void PrintLabeledWeightedScores(std::ostream& os, const ScoreComponentCollection& scc, const std::vector<float>& weights) const;
+#ifdef HAVE_PROTOBUF
+ void SerializeFeatureNamesToPB(hgmert::Hypergraph* hg) const;
+#endif
+ void InitWeightVectorFromFile(const std::string& fnam, std::vector<float>* m_allWeights) const;
+ const std::vector<const ScoreProducer*>& GetFeatureFunctions() const { return m_producers; }
+ const std::vector<const StatefulFeatureFunction*>& GetStatefulFeatureFunctions() const { return m_stateful; }
+ const std::vector<const StatelessFeatureFunction*>& GetStatelessFeatureFunctions() const { return m_stateless; }
+private:
+ ScoreIndexManager(const ScoreIndexManager&); // don't implement
+
+ std::vector<size_t> m_begins;
+ std::vector<size_t> m_ends;
+ std::vector<const ScoreProducer*> m_producers; /**< all the score producers in this run */
+ std::vector<const StatefulFeatureFunction*> m_stateful; /**< all the score producers in this run */
+ std::vector<const StatelessFeatureFunction*> m_stateless; /**< all the score producers in this run */
+ std::vector<std::string> m_featureNames;
+ std::vector<std::string> m_featureShortNames;
+ size_t m_last;
+};
+
+
+}
+
+#endif
diff --git a/moses/src/ScoreProducer.cpp b/moses/src/ScoreProducer.cpp
new file mode 100644
index 000000000..194192725
--- /dev/null
+++ b/moses/src/ScoreProducer.cpp
@@ -0,0 +1,21 @@
+// $Id: ScoreProducer.cpp 2087 2009-02-06 15:43:06Z redpony $
+
+#include <iostream>
+#include <typeinfo>
+#include "ScoreProducer.h"
+#include "StaticData.h"
+#include "ScoreIndexManager.h"
+
+namespace Moses
+{
+unsigned int ScoreProducer::s_globalScoreBookkeepingIdCounter(0);
+
+ScoreProducer::~ScoreProducer() {}
+
+ScoreProducer::ScoreProducer()
+{
+ m_scoreBookkeepingId = UNASSIGNED;
+}
+
+}
+
diff --git a/moses/src/ScoreProducer.h b/moses/src/ScoreProducer.h
new file mode 100644
index 000000000..abe2a2a60
--- /dev/null
+++ b/moses/src/ScoreProducer.h
@@ -0,0 +1,64 @@
+// $Id: ScoreProducer.h 2939 2010-02-24 11:15:44Z jfouet $
+
+#ifndef moses_ScoreProducer_h
+#define moses_ScoreProducer_h
+
+#include <string>
+#include <limits>
+
+namespace Moses
+{
+
+class Hypothesis;
+class ScoreComponentCollection;
+class ScoreIndexManager;
+class FFState;
+
+/** to keep track of the various things that can produce a score,
+ * we use this evil implementation-inheritance to give them each
+ * a unique, sequential (read: good for vector indices) ID
+ *
+ * @note do not confuse this with a producer/consumer pattern.
+ * this is not a producer in that sense.
+ */
+class ScoreProducer
+{
+private:
+ static unsigned int s_globalScoreBookkeepingIdCounter;
+ unsigned int m_scoreBookkeepingId;
+
+ ScoreProducer(const ScoreProducer&); // don't implement
+
+ #define UNASSIGNED std::numeric_limits<unsigned int>::max()
+
+protected:
+ // it would be nice to force registration here, but some Producer objects
+ // are constructed before they know how many scores they have
+ ScoreProducer();
+ virtual ~ScoreProducer();
+
+public:
+ //! contiguous id
+ unsigned int GetScoreBookkeepingID() const { return m_scoreBookkeepingId; }
+ void CreateScoreBookkeepingID() { m_scoreBookkeepingId = s_globalScoreBookkeepingIdCounter++;}
+ //! returns the number of scores that a subclass produces.
+ //! For example, a language model conventionally produces 1, a translation table some arbitrary number, etc
+ virtual size_t GetNumScoreComponents() const = 0;
+
+ //! returns a string description of this producer
+ virtual std::string GetScoreProducerDescription() const = 0;
+
+ //! returns the weight parameter name of this producer (used in n-best list)
+ virtual std::string GetScoreProducerWeightShortName() const = 0;
+
+ //! returns the number of scores gathered from the input (0 by default)
+ virtual size_t GetNumInputScores() const { return 0; };
+
+ virtual bool IsStateless() const = 0;
+
+};
+
+
+}
+
+#endif
diff --git a/moses/src/Search.cpp b/moses/src/Search.cpp
new file mode 100644
index 000000000..49d84e80a
--- /dev/null
+++ b/moses/src/Search.cpp
@@ -0,0 +1,32 @@
+
+#include "Manager.h"
+#include "SearchCubePruning.h"
+#include "SearchNormal.h"
+#include "UserMessage.h"
+
+namespace Moses
+{
+
+
+Search *Search::CreateSearch(Manager& manager, const InputType &source,
+ SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
+{
+ switch(searchAlgorithm)
+ {
+ case Normal:
+ return new SearchNormal(manager,source, transOptColl);
+ case CubePruning:
+ return new SearchCubePruning(manager, source, transOptColl);
+ case CubeGrowing:
+ return NULL;
+ default:
+ UserMessage::Add("ERROR: search. Aborting\n");
+ abort();
+ return NULL;
+ }
+
+}
+
+}
+
+
diff --git a/moses/src/Search.h b/moses/src/Search.h
new file mode 100644
index 000000000..b796a12b5
--- /dev/null
+++ b/moses/src/Search.h
@@ -0,0 +1,40 @@
+#ifndef moses_Search_h
+#define moses_Search_h
+
+#include <vector>
+#include "TypeDef.h"
+#include "Phrase.h"
+
+namespace Moses
+{
+
+class HypothesisStack;
+class Hypothesis;
+class InputType;
+class TranslationOptionCollection;
+class Manager;
+
+class Search
+{
+public:
+ virtual const std::vector < HypothesisStack* >& GetHypothesisStacks() const = 0;
+ virtual const Hypothesis *GetBestHypothesis() const = 0;
+ virtual void ProcessSentence() = 0;
+ Search(Manager& manager) : m_manager(manager) {}
+ virtual ~Search()
+ {}
+
+ // Factory
+ static Search *CreateSearch(Manager& manager, const InputType &source, SearchAlgorithm searchAlgorithm,
+ const TranslationOptionCollection &transOptColl);
+
+protected:
+
+ const Phrase *m_constraint;
+ Manager& m_manager;
+
+};
+
+
+}
+#endif
diff --git a/moses/src/SearchCubePruning.cpp b/moses/src/SearchCubePruning.cpp
new file mode 100644
index 000000000..0f10c9c23
--- /dev/null
+++ b/moses/src/SearchCubePruning.cpp
@@ -0,0 +1,347 @@
+#include "Manager.h"
+#include "Util.h"
+#include "SearchCubePruning.h"
+#include "StaticData.h"
+#include "InputType.h"
+#include "TranslationOptionCollection.h"
+
+namespace Moses
+{
+class BitmapContainerOrderer
+{
+ public:
+ bool operator()(const BitmapContainer* A, const BitmapContainer* B) const
+ {
+ if (B->Empty()) {
+ if (A->Empty()) {
+ return A < B;
+ }
+ return false;
+ }
+ if (A->Empty()) {
+ return true;
+ }
+
+ // Compare the top hypothesis of each bitmap container using the TotalScore, which includes future cost
+ const float scoreA = A->Top()->GetHypothesis()->GetTotalScore();
+ const float scoreB = B->Top()->GetHypothesis()->GetTotalScore();
+
+ if (scoreA < scoreB)
+ {
+ return true;
+ }
+ else if (scoreA > scoreB)
+ {
+ return false;
+ }
+ else
+ {
+ return A < B;
+ }
+ }
+};
+
+SearchCubePruning::SearchCubePruning(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl)
+ :Search(manager)
+ ,m_source(source)
+,m_hypoStackColl(source.GetSize() + 1)
+,m_initialTargetPhrase(Output)
+,m_start(clock())
+,m_transOptColl(transOptColl)
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ /* constraint search not implemented in cube pruning
+ long sentenceID = source.GetTranslationId();
+ m_constraint = staticData.GetConstrainingPhrase(sentenceID);
+ */
+
+ std::vector < HypothesisStackCubePruning >::iterator iterStack;
+ for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind)
+ {
+ HypothesisStackCubePruning *sourceHypoColl = new HypothesisStackCubePruning(m_manager);
+ sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize());
+ sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
+
+ m_hypoStackColl[ind] = sourceHypoColl;
+ }
+}
+
+SearchCubePruning::~SearchCubePruning()
+{
+ RemoveAllInColl(m_hypoStackColl);
+}
+
+/**
+ * Main decoder loop that translates a sentence by expanding
+ * hypotheses stack by stack, until the end of the sentence.
+ */
+void SearchCubePruning::ProcessSentence()
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ // initial seed hypothesis: nothing translated, no words produced
+ Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTargetPhrase);
+
+ HypothesisStackCubePruning &firstStack = *static_cast<HypothesisStackCubePruning*>(m_hypoStackColl.front());
+ firstStack.AddInitial(hypo);
+ // Call this here because the loop below starts at the second stack.
+ firstStack.CleanupArcList();
+ CreateForwardTodos(firstStack);
+
+ const size_t PopLimit = StaticData::Instance().GetCubePruningPopLimit();
+ VERBOSE(3,"Cube Pruning pop limit is " << PopLimit << std::endl)
+
+ const size_t Diversity = StaticData::Instance().GetCubePruningDiversity();
+ VERBOSE(3,"Cube Pruning diversity is " << Diversity << std::endl)
+
+ // go through each stack
+ size_t stackNo = 1;
+ std::vector < HypothesisStack* >::iterator iterStack;
+ for (iterStack = ++m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack)
+ {
+ // check if decoding ran out of time
+ double _elapsed_time = GetUserTime();
+ if (_elapsed_time > staticData.GetTimeoutThreshold()){
+ VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
+ return;
+ }
+ HypothesisStackCubePruning &sourceHypoColl = *static_cast<HypothesisStackCubePruning*>(*iterStack);
+
+ // priority queue which has a single entry for each bitmap container, sorted by score of top hyp
+ std::priority_queue< BitmapContainer*, std::vector< BitmapContainer* >, BitmapContainerOrderer> BCQueue;
+
+ _BMType::const_iterator bmIter;
+ const _BMType &accessor = sourceHypoColl.GetBitmapAccessor();
+
+ for(bmIter = accessor.begin(); bmIter != accessor.end(); ++bmIter)
+ {
+ bmIter->second->InitializeEdges();
+ BCQueue.push(bmIter->second);
+
+ // old algorithm
+ // bmIter->second->EnsureMinStackHyps(PopLimit);
+ }
+
+ // main search loop, pop k best hyps
+ for (size_t numpops = 1; numpops <= PopLimit && !BCQueue.empty(); numpops++) {
+ BitmapContainer *bc = BCQueue.top();
+ BCQueue.pop();
+ bc->ProcessBestHypothesis();
+ if (!bc->Empty())
+ BCQueue.push(bc);
+ }
+
+ // ensure diversity, a minimum number of inserted hyps for each bitmap container;
+ // NOTE: diversity doesn't ensure they aren't pruned at some later point
+ if (Diversity > 0) {
+ for(bmIter = accessor.begin(); bmIter != accessor.end(); ++bmIter)
+ {
+ bmIter->second->EnsureMinStackHyps(Diversity);
+ }
+ }
+
+ // the stack is pruned before processing (lazy pruning):
+ VERBOSE(3,"processing hypothesis from next stack");
+ // VERBOSE("processing next stack at ");
+ sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
+ VERBOSE(3,std::endl);
+ sourceHypoColl.CleanupArcList();
+
+ CreateForwardTodos(sourceHypoColl);
+
+ stackNo++;
+ }
+
+ PrintBitmapContainerGraph();
+
+ // some more logging
+ IFVERBOSE(2) { m_manager.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
+ VERBOSE(2, m_manager.GetSentenceStats());
+}
+
+void SearchCubePruning::CreateForwardTodos(HypothesisStackCubePruning &stack)
+{
+ const _BMType &bitmapAccessor = stack.GetBitmapAccessor();
+ _BMType::const_iterator iterAccessor;
+ size_t size = m_source.GetSize();
+
+ stack.AddHypothesesToBitmapContainers();
+
+ for (iterAccessor = bitmapAccessor.begin() ; iterAccessor != bitmapAccessor.end() ; ++iterAccessor)
+ {
+ const WordsBitmap &bitmap = iterAccessor->first;
+ BitmapContainer &bitmapContainer = *iterAccessor->second;
+
+ if (bitmapContainer.GetHypothesesSize() == 0)
+ { // no hypothese to expand. don't bother doing it
+ continue;
+ }
+
+ // Sort the hypotheses inside the Bitmap Container as they are being used by now.
+ bitmapContainer.SortHypotheses();
+
+ // check bitamp and range doesn't overlap
+ size_t startPos, endPos;
+ for (startPos = 0 ;startPos < size ; startPos++)
+ {
+ if (bitmap.GetValue(startPos))
+ continue;
+
+ // not yet covered
+ WordsRange applyRange(startPos, startPos);
+ if (CheckDistortion(bitmap, applyRange))
+ { // apply range
+ CreateForwardTodos(bitmap, applyRange, bitmapContainer);
+ }
+
+ size_t maxSize = size - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for (endPos = startPos+1; endPos < startPos + maxSize; endPos++)
+ {
+ if (bitmap.GetValue(endPos))
+ break;
+
+ WordsRange applyRange(startPos, endPos);
+ if (CheckDistortion(bitmap, applyRange))
+ { // apply range
+ CreateForwardTodos(bitmap, applyRange, bitmapContainer);
+ }
+ }
+ }
+ }
+}
+
+void SearchCubePruning::CreateForwardTodos(const WordsBitmap &bitmap, const WordsRange &range, BitmapContainer &bitmapContainer)
+{
+ WordsBitmap newBitmap = bitmap;
+ newBitmap.SetValue(range.GetStartPos(), range.GetEndPos(), true);
+
+ size_t numCovered = newBitmap.GetNumWordsCovered();
+ const TranslationOptionList &transOptList = m_transOptColl.GetTranslationOptionList(range);
+ const SquareMatrix &futureScore = m_transOptColl.GetFutureScore();
+
+ if (transOptList.size() > 0)
+ {
+ HypothesisStackCubePruning &newStack = *static_cast<HypothesisStackCubePruning*>(m_hypoStackColl[numCovered]);
+ newStack.SetBitmapAccessor(newBitmap, newStack, range, bitmapContainer, futureScore, transOptList);
+ }
+}
+
+bool SearchCubePruning::CheckDistortion(const WordsBitmap &hypoBitmap, const WordsRange &range) const
+{
+ // since we check for reordering limits, its good to have that limit handy
+ int maxDistortion = StaticData::Instance().GetMaxDistortion();
+
+ // if there are reordering limits, make sure it is not violated
+ // the coverage bitmap is handy here (and the position of the first gap)
+ const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos()
+ , startPos = range.GetStartPos()
+ , endPos = range.GetEndPos();
+
+ // if reordering constraints are used (--monotone-at-punctuation or xml), check if passes all
+ if (! m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) )
+ {
+ return false;
+ }
+
+ // no limit of reordering: no problem
+ if (maxDistortion < 0)
+ {
+ return true;
+ }
+
+ bool leftMostEdge = (hypoFirstGapPos == startPos);
+ // any length extension is okay if starting at left-most edge
+ if (leftMostEdge)
+ {
+ return true;
+ }
+ // starting somewhere other than left-most edge, use caution
+ // the basic idea is this: we would like to translate a phrase starting
+ // from a position further right than the left-most open gap. The
+ // distortion penalty for the following phrase will be computed relative
+ // to the ending position of the current extension, so we ask now what
+ // its maximum value will be (which will always be the value of the
+ // hypothesis starting at the left-most edge). If this vlaue is than
+ // the distortion limit, we don't allow this extension to be made.
+ WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
+ int required_distortion =
+ m_source.ComputeDistortionDistance(range, bestNextExtension);
+
+ if (required_distortion > maxDistortion) {
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Find best hypothesis on the last stack.
+ * This is the end point of the best translation, which can be traced back from here
+ */
+const Hypothesis *SearchCubePruning::GetBestHypothesis() const
+{
+ // const HypothesisStackCubePruning &hypoColl = m_hypoStackColl.back();
+ const HypothesisStack &hypoColl = *m_hypoStackColl.back();
+ return hypoColl.GetBestHypothesis();
+}
+
+/**
+ * Logging of hypothesis stack sizes
+ */
+void SearchCubePruning::OutputHypoStackSize()
+{
+ std::vector < HypothesisStack* >::const_iterator iterStack = m_hypoStackColl.begin();
+ TRACE_ERR( "Stack sizes: " << (int)(*iterStack)->size());
+ for (++iterStack; iterStack != m_hypoStackColl.end() ; ++iterStack)
+ {
+ TRACE_ERR( ", " << (int)(*iterStack)->size());
+ }
+ TRACE_ERR( endl);
+}
+
+void SearchCubePruning::PrintBitmapContainerGraph()
+{
+ HypothesisStackCubePruning &lastStack = *static_cast<HypothesisStackCubePruning*>(m_hypoStackColl.back());
+ const _BMType &bitmapAccessor = lastStack.GetBitmapAccessor();
+
+ _BMType::const_iterator iterAccessor;
+ for (iterAccessor = bitmapAccessor.begin(); iterAccessor != bitmapAccessor.end(); ++iterAccessor)
+ {
+ cerr << iterAccessor->first << endl;
+ BitmapContainer &container = *iterAccessor->second;
+ }
+
+}
+
+/**
+ * Logging of hypothesis stack contents
+ * \param stack number of stack to be reported, report all stacks if 0
+ */
+void SearchCubePruning::OutputHypoStack(int stack)
+{
+ if (stack >= 0)
+ {
+ TRACE_ERR( "Stack " << stack << ": " << endl << m_hypoStackColl[stack] << endl);
+ }
+ else
+ { // all stacks
+ int i = 0;
+ vector < HypothesisStack* >::iterator iterStack;
+ for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack)
+ {
+ HypothesisStackCubePruning &hypoColl = *static_cast<HypothesisStackCubePruning*>(*iterStack);
+ TRACE_ERR( "Stack " << i++ << ": " << endl << hypoColl << endl);
+ }
+ }
+}
+
+const std::vector < HypothesisStack* >& SearchCubePruning::GetHypothesisStacks() const
+{
+ return m_hypoStackColl;
+}
+
+}
+
diff --git a/moses/src/SearchCubePruning.h b/moses/src/SearchCubePruning.h
new file mode 100644
index 000000000..adbc51ba2
--- /dev/null
+++ b/moses/src/SearchCubePruning.h
@@ -0,0 +1,47 @@
+#ifndef moses_SearchCubePruning_h
+#define moses_SearchCubePruning_h
+
+#include <vector>
+#include "Search.h"
+#include "HypothesisStackCubePruning.h"
+
+namespace Moses
+{
+
+class InputType;
+class TranslationOptionCollection;
+
+class SearchCubePruning: public Search
+{
+protected:
+ const InputType &m_source;
+ std::vector < HypothesisStack* > m_hypoStackColl; /**< stacks to store hypotheses (partial translations) */
+ // no of elements = no of words in source + 1
+ TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
+ clock_t m_start; /**< used to track time spend on translation */
+ const TranslationOptionCollection &m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
+
+ //! go thru all bitmaps in 1 stack & create backpointers to bitmaps in the stack
+ void CreateForwardTodos(HypothesisStackCubePruning &stack);
+ //! create a back pointer to this bitmap, with edge that has this words range translation
+ void CreateForwardTodos(const WordsBitmap &bitmap, const WordsRange &range, BitmapContainer &bitmapContainer);
+ bool CheckDistortion(const WordsBitmap &bitmap, const WordsRange &range) const;
+
+ void PrintBitmapContainerGraph();
+
+public:
+ SearchCubePruning(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
+ ~SearchCubePruning();
+
+ void ProcessSentence();
+
+ void OutputHypoStackSize();
+ void OutputHypoStack(int stack);
+
+ virtual const std::vector < HypothesisStack* >& GetHypothesisStacks() const;
+ virtual const Hypothesis *GetBestHypothesis() const;
+};
+
+
+}
+#endif
diff --git a/moses/src/SearchNormal.cpp b/moses/src/SearchNormal.cpp
new file mode 100644
index 000000000..01788520d
--- /dev/null
+++ b/moses/src/SearchNormal.cpp
@@ -0,0 +1,388 @@
+#include "Manager.h"
+#include "Timer.h"
+#include "SearchNormal.h"
+
+namespace Moses
+{
+/**
+ * Organizing main function
+ *
+ * /param source input sentence
+ * /param transOptColl collection of translation options to be used for this sentence
+ */
+SearchNormal::SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl)
+ :Search(manager)
+ ,m_source(source)
+ ,m_hypoStackColl(source.GetSize() + 1)
+ ,m_initialTargetPhrase(Output)
+ ,m_start(clock())
+ ,interrupted_flag(0)
+ ,m_transOptColl(transOptColl)
+{
+ VERBOSE(1, "Translating: " << m_source << endl);
+ const StaticData &staticData = StaticData::Instance();
+
+ // only if constraint decoding (having to match a specified output)
+ long sentenceID = source.GetTranslationId();
+ m_constraint = staticData.GetConstrainingPhrase(sentenceID);
+ if (m_constraint) {
+ VERBOSE(1, "Search constraint to output: " << *m_constraint<<endl);
+ }
+
+ // initialize the stacks: create data structure and set limits
+ std::vector < HypothesisStackNormal >::iterator iterStack;
+ for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind)
+ {
+ HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal(m_manager);
+ sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize(),staticData.GetMinHypoStackDiversity());
+ sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
+
+ m_hypoStackColl[ind] = sourceHypoColl;
+ }
+}
+
+SearchNormal::~SearchNormal()
+{
+ RemoveAllInColl(m_hypoStackColl);
+}
+
+/**
+ * Main decoder loop that translates a sentence by expanding
+ * hypotheses stack by stack, until the end of the sentence.
+ */
+void SearchNormal::ProcessSentence()
+{
+ const StaticData &staticData = StaticData::Instance();
+ SentenceStats &stats = m_manager.GetSentenceStats();
+ clock_t t=0; // used to track time for steps
+
+ // initial seed hypothesis: nothing translated, no words produced
+ Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTargetPhrase);
+ m_hypoStackColl[0]->AddPrune(hypo);
+
+ // go through each stack
+ std::vector < HypothesisStack* >::iterator iterStack;
+ for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack)
+ {
+ // check if decoding ran out of time
+ double _elapsed_time = GetUserTime();
+ if (_elapsed_time > staticData.GetTimeoutThreshold()){
+ VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
+ interrupted_flag = 1;
+ return;
+ }
+ HypothesisStackNormal &sourceHypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
+
+ // the stack is pruned before processing (lazy pruning):
+ VERBOSE(3,"processing hypothesis from next stack");
+ IFVERBOSE(2) { t = clock(); }
+ sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
+ VERBOSE(3,std::endl);
+ sourceHypoColl.CleanupArcList();
+ IFVERBOSE(2) { stats.AddTimeStack( clock()-t ); }
+
+ // go through each hypothesis on the stack and try to expand it
+ HypothesisStackNormal::const_iterator iterHypo;
+ for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)
+ {
+ Hypothesis &hypothesis = **iterHypo;
+ ProcessOneHypothesis(hypothesis); // expand the hypothesis
+ }
+ // some logging
+ IFVERBOSE(2) { OutputHypoStackSize(); }
+
+ // this stack is fully expanded;
+ actual_hypoStack = &sourceHypoColl;
+ }
+
+ // some more logging
+ IFVERBOSE(2) { m_manager.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
+ VERBOSE(2, m_manager.GetSentenceStats());
+}
+
+
+/** Find all translation options to expand one hypothesis, trigger expansion
+ * this is mostly a check for overlap with already covered words, and for
+ * violation of reordering limits.
+ * \param hypothesis hypothesis to be expanded upon
+ */
+void SearchNormal::ProcessOneHypothesis(const Hypothesis &hypothesis)
+{
+ // since we check for reordering limits, its good to have that limit handy
+ int maxDistortion = StaticData::Instance().GetMaxDistortion();
+ bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput;
+
+ // no limit of reordering: only check for overlap
+ if (maxDistortion < 0)
+ {
+ const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
+ const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos()
+ , sourceSize = m_source.GetSize();
+
+ for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
+ {
+ size_t maxSize = sourceSize - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
+ {
+ // basic checks
+ // there have to be translation options
+ if (m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos)).size() == 0 ||
+ // no overlap with existing words
+ hypoBitmap.Overlap(WordsRange(startPos, endPos)) ||
+ // specified reordering constraints (set with -monotone-at-punctuation or xml)
+ !m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) )
+ {
+ continue;
+ }
+
+ //TODO: does this method include incompatible WordLattice hypotheses?
+ ExpandAllHypotheses(hypothesis, startPos, endPos);
+ }
+ }
+
+ return; // done with special case (no reordering limit)
+ }
+
+ // if there are reordering limits, make sure it is not violated
+ // the coverage bitmap is handy here (and the position of the first gap)
+ const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
+ const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos()
+ , sourceSize = m_source.GetSize();
+
+ // MAIN LOOP. go through each possible range
+ for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
+ {
+ int lastEnd = static_cast<int>(hypothesis.GetCurrSourceWordsRange().GetEndPos());
+ if (startPos != 0 && (static_cast<int>(startPos) - lastEnd - 1) > maxDistortion) {
+// cerr << "sp=" << startPos << " le=" << lastEnd << " X=" << (static_cast<int>(startPos) - lastEnd - 1) << " MD:=" << maxDistortion << endl;
+ continue;
+ }
+ size_t maxSize = sourceSize - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+ size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
+ if (isWordLattice) {
+ // first question: is there a path from the closest translated word to the left
+ // of the hypothesized extension to the start of the hypothesized extension?
+ // long version: is there anything to our left? is it farther left than where we're starting anyway? can we get to it?
+ // closestLeft is exclusive: a value of 3 means 2 is covered, our arc is currently ENDING at 3 and can start at 3 implicitly
+ if (closestLeft != 0 && closestLeft != startPos && !m_source.CanIGetFromAToB(closestLeft, startPos)) {
+ continue;
+ }
+ }
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
+ {
+ // basic checks
+ WordsRange extRange(startPos, endPos);
+ // there have to be translation options
+ if (m_transOptColl.GetTranslationOptionList(extRange).size() == 0 ||
+ // no overlap with existing words
+ hypoBitmap.Overlap(extRange) ||
+ // specified reordering constraints (set with -monotone-at-punctuation or xml)
+ !m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) || //
+ // connection in input word lattice
+ (isWordLattice && !m_source.IsCoveragePossible(extRange)))
+ {
+ continue;
+ }
+
+ // ask second question here:
+ // we already know we can get to our starting point from the closest thing to the left. We now ask the follow up:
+ // can we get from our end to the closest thing on the right?
+ // long version: is anything to our right? is it farther right than our (inclusive) end? can our end reach it?
+ bool leftMostEdge = (hypoFirstGapPos == startPos);
+
+ // closest right definition:
+ size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);
+ if (isWordLattice) {
+ //if (!leftMostEdge && closestRight != endPos && closestRight != sourceSize && !m_source.CanIGetFromAToB(endPos, closestRight + 1)) {
+ if (closestRight != endPos && ((closestRight + 1) < sourceSize) && !m_source.CanIGetFromAToB(endPos, closestRight + 1)) {
+ continue;
+ }
+ }
+
+ // any length extension is okay if starting at left-most edge
+ if (leftMostEdge)
+ {
+ ExpandAllHypotheses(hypothesis, startPos, endPos);
+ }
+ // starting somewhere other than left-most edge, use caution
+ else
+ {
+ // the basic idea is this: we would like to translate a phrase starting
+ // from a position further right than the left-most open gap. The
+ // distortion penalty for the following phrase will be computed relative
+ // to the ending position of the current extension, so we ask now what
+ // its maximum value will be (which will always be the value of the
+ // hypothesis starting at the left-most edge). If this value is less than
+ // the distortion limit, we don't allow this extension to be made.
+ WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
+ int required_distortion =
+ m_source.ComputeDistortionDistance(extRange, bestNextExtension);
+
+ if (required_distortion > maxDistortion) {
+ continue;
+ }
+
+ // everything is fine, we're good to go
+ ExpandAllHypotheses(hypothesis, startPos, endPos);
+
+ }
+ }
+ }
+}
+
+
+/**
+ * Expand a hypothesis given a list of translation options
+ * \param hypothesis hypothesis to be expanded upon
+ * \param startPos first word position of span covered
+ * \param endPos last word position of span covered
+ */
+
+void SearchNormal::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos)
+{
+ // early discarding: check if hypothesis is too bad to build
+ // this idea is explained in (Moore&Quirk, MT Summit 2007)
+ float expectedScore = 0.0f;
+ if (StaticData::Instance().UseEarlyDiscarding())
+ {
+ // expected score is based on score of current hypothesis
+ expectedScore = hypothesis.GetScore();
+
+ // add new future score estimate
+ expectedScore += m_transOptColl.GetFutureScore().CalcFutureScore( hypothesis.GetWordsBitmap(), startPos, endPos );
+ }
+
+ // loop through all translation options
+ const TranslationOptionList &transOptList = m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos));
+ TranslationOptionList::const_iterator iter;
+ for (iter = transOptList.begin() ; iter != transOptList.end() ; ++iter)
+ {
+ ExpandHypothesis(hypothesis, **iter, expectedScore);
+ }
+}
+
+/**
+ * Expand one hypothesis with a translation option.
+ * this involves initial creation, scoring and adding it to the proper stack
+ * \param hypothesis hypothesis to be expanded upon
+ * \param transOpt translation option (phrase translation)
+ * that is applied to create the new hypothesis
+ * \param expectedScore base score for early discarding
+ * (base hypothesis score plus future score estimation)
+ */
+void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, float expectedScore)
+{
+ const StaticData &staticData = StaticData::Instance();
+ SentenceStats &stats = m_manager.GetSentenceStats();
+ clock_t t=0; // used to track time for steps
+
+ Hypothesis *newHypo;
+ if (! staticData.UseEarlyDiscarding())
+ {
+ // simple build, no questions asked
+ IFVERBOSE(2) { t = clock(); }
+ newHypo = hypothesis.CreateNext(transOpt, m_constraint);
+ IFVERBOSE(2) { stats.AddTimeBuildHyp( clock()-t ); }
+ if (newHypo==NULL) return;
+ newHypo->CalcScore(m_transOptColl.GetFutureScore());
+ }
+ else
+ // early discarding: check if hypothesis is too bad to build
+ {
+ // worst possible score may have changed -> recompute
+ size_t wordsTranslated = hypothesis.GetWordsBitmap().GetNumWordsCovered() + transOpt.GetSize();
+ float allowedScore = m_hypoStackColl[wordsTranslated]->GetWorstScore();
+ if (staticData.GetMinHypoStackDiversity())
+ {
+ WordsBitmapID id = hypothesis.GetWordsBitmap().GetIDPlus(transOpt.GetStartPos(), transOpt.GetEndPos());
+ float allowedScoreForBitmap = m_hypoStackColl[wordsTranslated]->GetWorstScoreForBitmap( id );
+ allowedScore = std::min( allowedScore, allowedScoreForBitmap );
+ }
+ allowedScore += staticData.GetEarlyDiscardingThreshold();
+
+ // add expected score of translation option
+ expectedScore += transOpt.GetFutureScore();
+ // TRACE_ERR("EXPECTED diff: " << (newHypo->GetTotalScore()-expectedScore) << " (pre " << (newHypo->GetTotalScore()-expectedScorePre) << ") " << hypothesis.GetTargetPhrase() << " ... " << transOpt.GetTargetPhrase() << " [" << expectedScorePre << "," << expectedScore << "," << newHypo->GetTotalScore() << "]" << endl);
+
+ // check if transOpt score push it already below limit
+ if (expectedScore < allowedScore)
+ {
+ IFVERBOSE(2) { stats.AddNotBuilt(); }
+ return;
+ }
+
+ // build the hypothesis without scoring
+ IFVERBOSE(2) { t = clock(); }
+ newHypo = hypothesis.CreateNext(transOpt, m_constraint);
+ if (newHypo==NULL) return;
+ IFVERBOSE(2) { stats.AddTimeBuildHyp( clock()-t ); }
+
+ // compute expected score (all but correct LM)
+ expectedScore = newHypo->CalcExpectedScore( m_transOptColl.GetFutureScore() );
+ // ... and check if that is below the limit
+ if (expectedScore < allowedScore)
+ {
+ IFVERBOSE(2) { stats.AddEarlyDiscarded(); }
+ FREEHYPO( newHypo );
+ return;
+ }
+
+ // ok, all is good, compute remaining scores
+ newHypo->CalcRemainingScore();
+
+ }
+
+ // logging for the curious
+ IFVERBOSE(3) {
+ newHypo->PrintHypothesis();
+ }
+
+ // add to hypothesis stack
+ size_t wordsTranslated = newHypo->GetWordsBitmap().GetNumWordsCovered();
+ IFVERBOSE(2) { t = clock(); }
+ m_hypoStackColl[wordsTranslated]->AddPrune(newHypo);
+ IFVERBOSE(2) { stats.AddTimeStack( clock()-t ); }
+}
+
+const std::vector < HypothesisStack* >& SearchNormal::GetHypothesisStacks() const
+{
+ return m_hypoStackColl;
+}
+
+/**
+ * Find best hypothesis on the last stack.
+ * This is the end point of the best translation, which can be traced back from here
+ */
+const Hypothesis *SearchNormal::GetBestHypothesis() const
+{
+ if (interrupted_flag == 0){
+ const HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(m_hypoStackColl.back());
+ return hypoColl.GetBestHypothesis();
+ }
+ else{
+ const HypothesisStackNormal &hypoColl = *actual_hypoStack;
+ return hypoColl.GetBestHypothesis();
+ }
+}
+
+/**
+ * Logging of hypothesis stack sizes
+ */
+void SearchNormal::OutputHypoStackSize()
+{
+ std::vector < HypothesisStack* >::const_iterator iterStack = m_hypoStackColl.begin();
+ TRACE_ERR( "Stack sizes: " << (int)(*iterStack)->size());
+ for (++iterStack; iterStack != m_hypoStackColl.end() ; ++iterStack)
+ {
+ TRACE_ERR( ", " << (int)(*iterStack)->size());
+ }
+ TRACE_ERR( endl);
+}
+
+}
diff --git a/moses/src/SearchNormal.h b/moses/src/SearchNormal.h
new file mode 100644
index 000000000..fb5e66689
--- /dev/null
+++ b/moses/src/SearchNormal.h
@@ -0,0 +1,49 @@
+#ifndef moses_SearchNormal_h
+#define moses_SearchNormal_h
+
+#include <vector>
+#include "Search.h"
+#include "HypothesisStackNormal.h"
+#include "TranslationOptionCollection.h"
+#include "Timer.h"
+
+namespace Moses
+{
+
+class Manager;
+class InputType;
+class TranslationOptionCollection;
+
+class SearchNormal: public Search
+{
+protected:
+ const InputType &m_source;
+ std::vector < HypothesisStack* > m_hypoStackColl; /**< stacks to store hypotheses (partial translations) */
+ // no of elements = no of words in source + 1
+ TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
+ clock_t m_start; /**< starting time, used for logging */
+ size_t interrupted_flag; /**< flag indicating that decoder ran out of time (see switch -time-out) */
+ HypothesisStackNormal* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/
+ const TranslationOptionCollection &m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
+
+ // functions for creating hypotheses
+ void ProcessOneHypothesis(const Hypothesis &hypothesis);
+ void ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos);
+ void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
+
+public:
+ SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
+ ~SearchNormal();
+
+ void ProcessSentence();
+
+ void OutputHypoStackSize();
+ void OutputHypoStack(int stack);
+
+ virtual const std::vector < HypothesisStack* >& GetHypothesisStacks() const;
+ virtual const Hypothesis *GetBestHypothesis() const;
+};
+
+}
+
+#endif
diff --git a/moses/src/Sentence.cpp b/moses/src/Sentence.cpp
new file mode 100644
index 000000000..9bfde5737
--- /dev/null
+++ b/moses/src/Sentence.cpp
@@ -0,0 +1,200 @@
+// $Id: Sentence.cpp 2561 2009-10-02 16:58:02Z bhaddow $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <stdexcept>
+
+#include "Sentence.h"
+#include "PhraseDictionaryMemory.h"
+#include "TranslationOptionCollectionText.h"
+#include "StaticData.h"
+#include "Util.h"
+
+namespace Moses
+{
+int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
+{
+ const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+ std::string line;
+ std::map<std::string, std::string> meta;
+
+ if (getline(in, line, '\n').eof())
+ return 0;
+ // remove extra spaces
+ line = Trim(line);
+
+ // if sentences is specified as "<seg id=1> ... </seg>", extract id
+ meta = ProcessAndStripSGML(line);
+ if (meta.find("id") != meta.end()) { this->SetTranslationId(atol(meta["id"].c_str())); }
+
+ // parse XML markup in translation line
+ const StaticData &staticData = StaticData::Instance();
+ std::vector<std::vector<XmlOption*> > xmlOptionsList(0);
+ std::vector< size_t > xmlWalls;
+ if (staticData.GetXmlInputType() != XmlPassThrough) {
+ if (!ProcessAndStripXMLTags(line, xmlOptionsList, m_reorderingConstraint, xmlWalls )) {
+ const string msg("Unable to parse XML in line: " + line);
+ TRACE_ERR(msg << endl);
+ throw runtime_error(msg);
+ }
+ }
+ Phrase::CreateFromString(factorOrder, line, factorDelimiter);
+
+ //now that we have final word positions in phrase (from CreateFromString),
+ //we can make input phrase objects to go with our XmlOptions and create TranslationOptions
+
+ //only fill the vector if we are parsing XML
+ if (staticData.GetXmlInputType() != XmlPassThrough ) {
+ for (size_t i=0; i<GetSize();i++) {
+ m_xmlCoverageMap.push_back(false);
+ }
+
+ //iterXMLOpts will be empty for XmlIgnore
+ //look at each column
+ for(std::vector<std::vector<XmlOption*> >::const_iterator iterXmlOpts = xmlOptionsList.begin();
+ iterXmlOpts != xmlOptionsList.end(); iterXmlOpts++) {
+
+ //now we are looking through one column of linked things.
+ //TODO: We could drop this inner loop if we didn't support linked opts.
+ //we could loop once, make the new TranslationOption, note its pos. in the coverageMap,
+ //and delete the XmlOption -JS
+ std::vector<TranslationOption*> linkedTransOpts(0);
+ for(std::vector<XmlOption*>::const_iterator iterLinkedXmlOpts = (iterXmlOpts)->begin();
+ iterLinkedXmlOpts != (iterXmlOpts)->end(); iterLinkedXmlOpts++) {
+
+ //make each item into a translation option
+ TranslationOption *transOpt = new TranslationOption((*iterLinkedXmlOpts)->range,(*iterLinkedXmlOpts)->targetPhrase,*this);
+
+ //store it temporarily in the linkedTransOpts vector
+ linkedTransOpts.push_back(transOpt);
+
+ delete (*iterLinkedXmlOpts);
+ }
+
+ //now link them up and add to m_XmlOptionsList TODO: this is complicated by linked options. Drop it? -JS
+ for(std::vector<TranslationOption *>::const_iterator iterLinkedTransOpts1 = linkedTransOpts.begin();
+ iterLinkedTransOpts1 != linkedTransOpts.end(); iterLinkedTransOpts1++) {
+
+ for(std::vector<TranslationOption *>::const_iterator iterLinkedTransOpts2 = linkedTransOpts.begin();
+ iterLinkedTransOpts2 != linkedTransOpts.end(); iterLinkedTransOpts2++) {
+
+ if (iterLinkedTransOpts1 != iterLinkedTransOpts2) {
+ (*iterLinkedTransOpts1)->AddLinkedTransOpt(*iterLinkedTransOpts2);
+ }
+ } //inner linked opts loop
+
+ //ok everything is linked up and initialized, add it to our list of options and mark locations in coverage map
+ TranslationOption *transOpt = *iterLinkedTransOpts1;
+
+ m_xmlOptionsList.push_back(transOpt);
+
+ for(size_t j=transOpt->GetSourceWordsRange().GetStartPos();j<=transOpt->GetSourceWordsRange().GetEndPos();j++) {
+ m_xmlCoverageMap[j]=true;
+ }
+ }//outer linked opts loop
+ }
+
+ }
+
+ m_reorderingConstraint.InitializeWalls( GetSize() );
+
+ // set reordering walls, if "-monotone-at-punction" is set
+ if (staticData.UseReorderingConstraint())
+ {
+ m_reorderingConstraint.SetMonotoneAtPunctuation( GetSubString( WordsRange(0,GetSize()-1 ) ) );
+ }
+
+ // set walls obtained from xml
+ for(size_t i=0; i<xmlWalls.size(); i++)
+ if( xmlWalls[i] < GetSize() ) // no buggy walls, please
+ m_reorderingConstraint.SetWall( xmlWalls[i], true );
+ m_reorderingConstraint.FinalizeWalls();
+
+ return 1;
+}
+
+TranslationOptionCollection*
+Sentence::CreateTranslationOptionCollection() const
+{
+ size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
+ float transOptThreshold = StaticData::Instance().GetTranslationOptionThreshold();
+ TranslationOptionCollection *rv= new TranslationOptionCollectionText(*this, maxNoTransOptPerCoverage, transOptThreshold);
+ assert(rv);
+ return rv;
+}
+void Sentence::Print(std::ostream& out) const
+{
+ out<<*static_cast<Phrase const*>(this)<<"\n";
+}
+
+
+bool Sentence::XmlOverlap(size_t startPos, size_t endPos) const {
+ for (size_t pos = startPos; pos <= endPos ; pos++)
+ {
+ if (pos < m_xmlCoverageMap.size() && m_xmlCoverageMap[pos]) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list, size_t startPos, size_t endPos) const {
+ //iterate over XmlOptions list, find exact source/target matches
+ const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+
+ for (std::vector<TranslationOption*>::const_iterator iterXMLOpts = m_xmlOptionsList.begin();
+ iterXMLOpts != m_xmlOptionsList.end(); iterXMLOpts++) {
+ if (startPos == (**iterXMLOpts).GetSourceWordsRange().GetStartPos() && endPos == (**iterXMLOpts).GetSourceWordsRange().GetEndPos()) {
+ list.push_back(*iterXMLOpts);
+ }
+ }
+}
+
+
+std::string Sentence::ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName){
+ /*TODO deal with unescaping \"*/
+ string tagOpen = attributeName + "=\"";
+ size_t contentsStart = tag.find(tagOpen);
+ if (contentsStart == std::string::npos) return "";
+ contentsStart += tagOpen.size();
+ size_t contentsEnd = tag.find_first_of('"',contentsStart+1);
+ if (contentsEnd == std::string::npos) {
+ TRACE_ERR("Malformed XML attribute: "<< tag);
+ return "";
+ }
+ size_t possibleEnd;
+ while (tag.at(contentsEnd-1) == '\\' && (possibleEnd = tag.find_first_of('"',contentsEnd+1)) != std::string::npos) {
+ contentsEnd = possibleEnd;
+ }
+ return tag.substr(contentsStart,contentsEnd-contentsStart);
+}
+
+void Sentence::CreateFromString(const std::vector<FactorType> &factorOrder
+ , const std::string &phraseString
+ , const std::string &factorDelimiter)
+{
+ Phrase::CreateFromString(factorOrder, phraseString, factorDelimiter);
+}
+
+
+
+}
+
diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h
new file mode 100644
index 000000000..b8ed35f74
--- /dev/null
+++ b/moses/src/Sentence.h
@@ -0,0 +1,104 @@
+// $Id: Sentence.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_Sentence_h
+#define moses_Sentence_h
+
+#include <vector>
+#include <string>
+#include "Word.h"
+#include "Phrase.h"
+#include "InputType.h"
+#include "XmlOption.h"
+
+namespace Moses
+{
+
+class WordsRange;
+class PhraseDictionary;
+class TranslationOption;
+class TranslationOptionCollection;
+
+
+/***
+ * A Phrase class with an ID. Used specifically as source input so contains functionality to read
+ * from IODevice and create trans opt
+ */
+class Sentence : public Phrase, public InputType
+{
+
+ private:
+
+ /**
+ * Utility method that takes in a string representing an XML tag and the name of the attribute,
+ * and returns the value of that tag if present, empty string otherwise
+ */
+ static std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
+ std::vector <TranslationOption*> m_xmlOptionsList;
+ std::vector <bool> m_xmlCoverageMap;
+
+ public:
+ Sentence(FactorDirection direction) : Phrase(direction), InputType()
+ {
+ }
+
+ InputTypeEnum GetType() const
+ { return SentenceInput;}
+
+ //! Calls Phrase::GetSubString(). Implements abstract InputType::GetSubString()
+ Phrase GetSubString(const WordsRange& r) const
+ {
+ return Phrase::GetSubString(r);
+ }
+
+ //! Calls Phrase::GetWord(). Implements abstract InputType::GetWord()
+ const Word& GetWord(size_t pos) const
+ {
+ return Phrase::GetWord(pos);
+ }
+
+ //! Calls Phrase::GetSize(). Implements abstract InputType::GetSize()
+ size_t GetSize() const
+ {
+ return Phrase::GetSize();
+ }
+
+ //! Returns true if there were any XML tags parsed that at least partially covered the range passed
+ bool XmlOverlap(size_t startPos, size_t endPos) const;
+
+ //! populates vector argument with XML force translation options for the specific range passed
+ void GetXmlTranslationOptions(std::vector <TranslationOption*> &list, size_t startPos, size_t endPos) const;
+
+ int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
+ void Print(std::ostream& out) const;
+
+ TranslationOptionCollection* CreateTranslationOptionCollection() const;
+
+ void CreateFromString(const std::vector<FactorType> &factorOrder
+ , const std::string &phraseString
+ , const std::string &factorDelimiter);
+
+};
+
+
+}
+
+#endif
diff --git a/moses/src/SentenceStats.cpp b/moses/src/SentenceStats.cpp
new file mode 100644
index 000000000..ceb9bdd01
--- /dev/null
+++ b/moses/src/SentenceStats.cpp
@@ -0,0 +1,51 @@
+// $Id: SentenceStats.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <iostream>
+using std::cout;
+using std::endl;
+#include "SentenceStats.h"
+
+namespace Moses
+{
+/***
+ * to be called after decoding a sentence
+ */
+void SentenceStats::CalcFinalStats(const Hypothesis& bestHypo)
+{
+ //deleted words
+ AddDeletedWords(bestHypo);
+ //inserted words--not implemented yet 8/1 TODO
+}
+
+void SentenceStats::AddDeletedWords(const Hypothesis& hypo)
+{
+ //don't check either a null pointer or the empty initial hypothesis (if we were given the empty hypo, the null check will save us)
+ if(hypo.GetPrevHypo() != NULL && hypo.GetPrevHypo()->GetCurrSourceWordsRange().GetNumWordsCovered() > 0) AddDeletedWords(*hypo.GetPrevHypo());
+ if(hypo.GetCurrTargetWordsRange().GetNumWordsCovered() == 0)
+ {
+ m_deletedWords.push_back(hypo.GetSourcePhrase());
+ }
+}
+
+}
+
diff --git a/moses/src/SentenceStats.h b/moses/src/SentenceStats.h
new file mode 100644
index 000000000..44d47a69b
--- /dev/null
+++ b/moses/src/SentenceStats.h
@@ -0,0 +1,172 @@
+// $Id: SentenceStats.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_SentenceStats_h
+#define moses_SentenceStats_h
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <time.h>
+#include "Phrase.h"
+#include "Hypothesis.h"
+#include "TypeDef.h" //FactorArray
+#include "InputType.h"
+#include "Util.h" //Join()
+
+namespace Moses
+{
+
+struct RecombinationInfo
+{
+ RecombinationInfo() {} //for std::vector
+ RecombinationInfo(size_t srcWords, float gProb, float bProb)
+ : numSourceWords(srcWords), betterProb(gProb), worseProb(bProb) {}
+
+ size_t numSourceWords;
+ float betterProb, worseProb;
+};
+
+/***
+ * stats relating to decoder operation on a given sentence
+ */
+class SentenceStats
+{
+ public:
+
+ /***
+ * to be called before decoding a sentence
+ */
+ SentenceStats(const InputType& source) {Initialize(source);}
+ void Initialize(const InputType& source)
+ {
+ m_numHyposPruned = 0;
+ m_numHyposDiscarded = 0;
+ m_numHyposEarlyDiscarded = 0;
+ m_numHyposNotBuilt = 0;
+ m_timeCollectOpts = 0;
+ m_timeBuildHyp = 0;
+ m_timeEstimateScore = 0;
+ m_timeCalcLM = 0;
+ m_timeOtherScore = 0;
+ m_timeStack = 0;
+ m_totalSourceWords = source.GetSize();
+ m_recombinationInfos.clear();
+ m_deletedWords.clear();
+ m_insertedWords.clear();
+ }
+
+ /***
+ * to be called after decoding a sentence
+ */
+ void CalcFinalStats(const Hypothesis& bestHypo);
+
+ unsigned int GetTotalHypos() const {return Hypothesis::GetHypothesesCreated() + m_numHyposNotBuilt; }
+ size_t GetNumHyposRecombined() const {return m_recombinationInfos.size();}
+ unsigned int GetNumHyposPruned() const {return m_numHyposPruned;}
+ unsigned int GetNumHyposDiscarded() const {return m_numHyposDiscarded;}
+ unsigned int GetNumHyposEarlyDiscarded() const {return m_numHyposEarlyDiscarded;}
+ unsigned int GetNumHyposNotBuilt() const {return m_numHyposNotBuilt;}
+ float GetTimeCollectOpts() const { return m_timeCollectOpts/(float)CLOCKS_PER_SEC; }
+ float GetTimeBuildHyp() const { return m_timeBuildHyp/(float)CLOCKS_PER_SEC; }
+ float GetTimeCalcLM() const { return m_timeCalcLM/(float)CLOCKS_PER_SEC; }
+ float GetTimeEstimateScore() const { return m_timeEstimateScore/(float)CLOCKS_PER_SEC; }
+ float GetTimeOtherScore() const { return m_timeOtherScore/(float)CLOCKS_PER_SEC; }
+ float GetTimeStack() const { return m_timeStack/(float)CLOCKS_PER_SEC; }
+ float GetTimeTotal() const { return m_timeTotal/(float)CLOCKS_PER_SEC; }
+ size_t GetTotalSourceWords() const {return m_totalSourceWords;}
+ size_t GetNumWordsDeleted() const {return m_deletedWords.size();}
+ size_t GetNumWordsInserted() const {return m_insertedWords.size();}
+ const std::vector<const Phrase*>& GetDeletedWords() const {return m_deletedWords;}
+ const std::vector<std::string>& GetInsertedWords() const {return m_insertedWords;}
+
+ void AddRecombination(const Hypothesis& worseHypo, const Hypothesis& betterHypo)
+ {
+ m_recombinationInfos.push_back(RecombinationInfo(worseHypo.GetWordsBitmap().GetNumWordsCovered(),
+ betterHypo.GetTotalScore(), worseHypo.GetTotalScore()));
+ }
+ void AddPruning() {m_numHyposPruned++;}
+ void AddEarlyDiscarded() {m_numHyposEarlyDiscarded++;}
+ void AddNotBuilt() {m_numHyposNotBuilt++;}
+ void AddDiscarded() {m_numHyposDiscarded++;}
+
+ void AddTimeCollectOpts( clock_t t ) { m_timeCollectOpts += t; }
+ void AddTimeBuildHyp( clock_t t ) { m_timeBuildHyp += t; }
+ void AddTimeCalcLM( clock_t t ) { m_timeCalcLM += t; }
+ void AddTimeEstimateScore( clock_t t ) { m_timeEstimateScore += t; }
+ void AddTimeOtherScore( clock_t t ) { m_timeOtherScore += t; }
+ void AddTimeStack( clock_t t ) { m_timeStack += t; }
+ void SetTimeTotal( clock_t t ) { m_timeTotal = t; }
+
+ protected:
+
+ /***
+ * auxiliary to CalcFinalStats()
+ */
+ void AddDeletedWords(const Hypothesis& hypo);
+
+ //hypotheses
+ std::vector<RecombinationInfo> m_recombinationInfos;
+ unsigned int m_numHyposPruned;
+ unsigned int m_numHyposDiscarded;
+ unsigned int m_numHyposEarlyDiscarded;
+ unsigned int m_numHyposNotBuilt;
+ clock_t m_timeCollectOpts;
+ clock_t m_timeBuildHyp;
+ clock_t m_timeEstimateScore;
+ clock_t m_timeCalcLM;
+ clock_t m_timeOtherScore;
+ clock_t m_timeStack;
+ clock_t m_timeTotal;
+
+ //words
+ size_t m_totalSourceWords;
+ std::vector<const Phrase*> m_deletedWords; //count deleted words/phrases in the final hypothesis
+ std::vector<std::string> m_insertedWords; //count inserted words in the final hypothesis
+};
+
+inline std::ostream& operator<<(std::ostream& os, const SentenceStats& ss)
+{
+ float totalTime = ss.GetTimeTotal();
+ float otherTime = totalTime - (ss.GetTimeCollectOpts() + ss.GetTimeBuildHyp() + ss.GetTimeEstimateScore() + ss.GetTimeCalcLM() + ss.GetTimeOtherScore() + ss.GetTimeStack());
+
+ return os << "total hypotheses considered = " << ss.GetTotalHypos() << std::endl
+ << " number not built = " << ss.GetNumHyposNotBuilt() << std::endl
+ << " number discarded early = " << ss.GetNumHyposEarlyDiscarded() << std::endl
+ << " number discarded = " << ss.GetNumHyposDiscarded() << std::endl
+ << " number recombined = " << ss.GetNumHyposRecombined() << std::endl
+ << " number pruned = " << ss.GetNumHyposPruned() << std::endl
+
+ << "time to collect opts " << ss.GetTimeCollectOpts() << " (" << (int)(100 * ss.GetTimeCollectOpts()/totalTime) << "%)" << std::endl
+ << " create hyps " << ss.GetTimeBuildHyp() << " (" << (int)(100 * ss.GetTimeBuildHyp()/totalTime) << "%)" << std::endl
+ << " estimate score " << ss.GetTimeEstimateScore() << " (" << (int)(100 * ss.GetTimeEstimateScore()/totalTime) << "%)" << std::endl
+ << " calc lm " << ss.GetTimeCalcLM() << " (" << (int)(100 * ss.GetTimeCalcLM()/totalTime) << "%)" << std::endl
+ << " other hyp score " << ss.GetTimeOtherScore() << " (" << (int)(100 * ss.GetTimeOtherScore()/totalTime) << "%)" << std::endl
+ << " manage stacks " << ss.GetTimeStack() << " (" << (int)(100 * ss.GetTimeStack()/totalTime) << "%)" << std::endl
+ << " other " << otherTime << " (" << (int)(100 * otherTime/totalTime) << "%)" << std::endl
+
+ << "total source words = " << ss.GetTotalSourceWords() << std::endl
+ << " words deleted = " << ss.GetNumWordsDeleted() << " (" << Join(" ", ss.GetDeletedWords()) << ")" << std::endl
+ << " words inserted = " << ss.GetNumWordsInserted() << " (" << Join(" ", ss.GetInsertedWords()) << ")" << std::endl;
+}
+
+}
+#endif
diff --git a/moses/src/SquareMatrix.cpp b/moses/src/SquareMatrix.cpp
new file mode 100644
index 000000000..500dee36d
--- /dev/null
+++ b/moses/src/SquareMatrix.cpp
@@ -0,0 +1,128 @@
+// $Id: SquareMatrix.cpp 1964 2008-12-20 20:22:35Z phkoehn $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <string>
+#include <iostream>
+#include "SquareMatrix.h"
+#include "TypeDef.h"
+#include "Util.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+/**
+ * Calculare future score estimate for a given coverage bitmap
+ *
+ * /param bitmap coverage bitmap
+ */
+
+float SquareMatrix::CalcFutureScore( WordsBitmap const &bitmap ) const
+{
+ const size_t notInGap= numeric_limits<size_t>::max();
+ size_t startGap = notInGap;
+ float futureScore = 0.0f;
+ for(size_t currPos = 0 ; currPos < bitmap.GetSize() ; currPos++)
+ {
+ // start of a new gap?
+ if(bitmap.GetValue(currPos) == false && startGap == notInGap)
+ {
+ startGap = currPos;
+ }
+ // end of a gap?
+ else if(bitmap.GetValue(currPos) == true && startGap != notInGap)
+ {
+ futureScore += GetScore(startGap, currPos - 1);
+ startGap = notInGap;
+ }
+ }
+ // coverage ending with gap?
+ if (startGap != notInGap)
+ {
+ futureScore += GetScore(startGap, bitmap.GetSize() - 1);
+ }
+
+ return futureScore;
+}
+
+/**
+ * Calculare future score estimate for a given coverage bitmap
+ * and an additional span that is also covered. This function is used
+ * to compute future score estimates for hypotheses that we may want
+ * build, but first want to check.
+ *
+ * Note: this function is implemented a bit more complex than
+ * the basic one (w/o additional phrase) for speed reasons,
+ * which is probably overkill.
+ *
+ * /param bitmap coverage bitmap
+ * /param startPos start of the span that is added to the coverage
+ * /param endPos end of the span that is added to the coverage
+ */
+
+float SquareMatrix::CalcFutureScore( WordsBitmap const &bitmap, size_t startPos, size_t endPos ) const
+{
+ const size_t notInGap= numeric_limits<size_t>::max();
+ float futureScore = 0.0f;
+ size_t startGap = bitmap.GetFirstGapPos();
+ if (startGap == NOT_FOUND) return futureScore; // everything filled
+
+ // start loop at first gap
+ size_t startLoop = startGap+1;
+ if (startPos == startGap) // unless covered by phrase
+ {
+ startGap = notInGap;
+ startLoop = endPos+1; // -> postpone start
+ }
+
+ size_t lastCovered = bitmap.GetLastPos();
+ if (endPos > lastCovered || lastCovered == NOT_FOUND) lastCovered = endPos;
+
+ for(size_t currPos = startLoop; currPos <= lastCovered ; currPos++)
+ {
+ // start of a new gap?
+ if(startGap == notInGap && bitmap.GetValue(currPos) == false && (currPos < startPos || currPos > endPos))
+ {
+ startGap = currPos;
+ }
+ // end of a gap?
+ else if(startGap != notInGap && (bitmap.GetValue(currPos) == true || (startPos <= currPos && currPos <= endPos)))
+ {
+ futureScore += GetScore(startGap, currPos - 1);
+ startGap = notInGap;
+ }
+ }
+ // coverage ending with gap?
+ if (lastCovered != bitmap.GetSize() - 1)
+ {
+ futureScore += GetScore(lastCovered+1, bitmap.GetSize() - 1);
+ }
+
+ return futureScore;
+}
+
+TO_STRING_BODY(SquareMatrix);
+
+}
+
+
diff --git a/moses/src/SquareMatrix.h b/moses/src/SquareMatrix.h
new file mode 100644
index 000000000..7b6cbd63b
--- /dev/null
+++ b/moses/src/SquareMatrix.h
@@ -0,0 +1,88 @@
+// $Id: SquareMatrix.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_SquareMatrix_h
+#define moses_SquareMatrix_h
+
+#include <iostream>
+#include "TypeDef.h"
+#include "Util.h"
+#include "WordsBitmap.h"
+
+namespace Moses
+{
+
+//! A square array of floats to store future costs
+class SquareMatrix
+{
+ friend std::ostream& operator<<(std::ostream &out, const SquareMatrix &matrix);
+protected:
+ const size_t m_size; /**< length of the square (sentence length) */
+ float *m_array; /**< two-dimensional array to store floats */
+
+ SquareMatrix(); // not implemented
+ SquareMatrix(const SquareMatrix &copy); // not implemented
+
+public:
+ SquareMatrix(size_t size)
+ :m_size(size)
+ {
+ m_array = (float*) malloc(sizeof(float) * size * size);
+ }
+ ~SquareMatrix()
+ {
+ free(m_array);
+ }
+ /** Returns length of the square: typically the sentence length */
+ inline size_t GetSize() const
+ {
+ return m_size;
+ }
+ /** Get a future cost score for a span */
+ inline float GetScore(size_t startPos, size_t endPos) const
+ {
+ return m_array[startPos * m_size + endPos];
+ }
+ /** Set a future cost score for a span */
+ inline void SetScore(size_t startPos, size_t endPos, float value)
+ {
+ m_array[startPos * m_size + endPos] = value;
+ }
+ float CalcFutureScore( WordsBitmap const& ) const;
+ float CalcFutureScore( WordsBitmap const&, size_t startPos, size_t endPos ) const;
+
+ TO_STRING();
+};
+
+inline std::ostream& operator<<(std::ostream &out, const SquareMatrix &matrix)
+{
+ for (size_t endPos = 0 ; endPos < matrix.GetSize() ; endPos++)
+ {
+ for (size_t startPos = 0 ; startPos < matrix.GetSize() ; startPos++)
+ out << matrix.GetScore(startPos, endPos) << " ";
+ out << std::endl;
+ }
+
+ return out;
+}
+
+}
+#endif
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
new file mode 100644
index 000000000..7f4b51515
--- /dev/null
+++ b/moses/src/StaticData.cpp
@@ -0,0 +1,1138 @@
+// $Id: StaticData.cpp 2977 2010-03-15 13:08:58Z hieuhoang1972 $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <string>
+#include <cassert>
+#include "PhraseDictionaryMemory.h"
+#include "DecodeStepTranslation.h"
+#include "DecodeStepGeneration.h"
+#include "GenerationDictionary.h"
+#include "DummyScoreProducers.h"
+#include "StaticData.h"
+#include "Util.h"
+#include "FactorCollection.h"
+#include "Timer.h"
+#include "LanguageModelSingleFactor.h"
+#include "LanguageModelMultiFactor.h"
+#include "LanguageModelFactory.h"
+#include "LexicalReordering.h"
+#include "GlobalLexicalModel.h"
+#include "SentenceStats.h"
+#include "PhraseDictionaryTreeAdaptor.h"
+#include "UserMessage.h"
+#include "TranslationOption.h"
+#include "DecodeGraph.h"
+#include "InputFileStream.h"
+/*******************************************************************************************/
+#include "DPR_reordering.h"
+/*******************************************************************************************/
+
+using namespace std;
+
+namespace Moses
+{
+static size_t CalcMax(size_t x, const vector<size_t>& y) {
+ size_t max = x;
+ for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i)
+ if (*i > max) max = *i;
+ return max;
+}
+
+static size_t CalcMax(size_t x, const vector<size_t>& y, const vector<size_t>& z) {
+ size_t max = x;
+ for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i)
+ if (*i > max) max = *i;
+ for (vector<size_t>::const_iterator i=z.begin(); i != z.end(); ++i)
+ if (*i > max) max = *i;
+ return max;
+}
+
+StaticData StaticData::s_instance;
+
+StaticData::StaticData()
+:m_fLMsLoaded(false)
+,m_inputType(SentenceInput)
+,m_numInputScores(0)
+,m_distortionScoreProducer(0)
+,m_wpProducer(0)
+,m_isDetailedTranslationReportingEnabled(false)
+,m_onlyDistinctNBest(false)
+,m_computeLMBackoffStats(false)
+,m_factorDelimiter("|") // default delimiter between factors
+,m_isAlwaysCreateDirectTranslationOption(false)
+,m_sourceStartPosMattersForRecombination(false)
+,m_numLinkParams(1)
+{
+ m_maxFactorIdx[0] = 0; // source side
+ m_maxFactorIdx[1] = 0; // target side
+
+ // memory pools
+ Phrase::InitializeMemPool();
+}
+
+bool StaticData::LoadData(Parameter *parameter)
+{
+ ResetUserTime();
+ m_parameter = parameter;
+
+ // verbose level
+ m_verboseLevel = 1;
+ if (m_parameter->GetParam("verbose").size() == 1)
+ {
+ m_verboseLevel = Scan<size_t>( m_parameter->GetParam("verbose")[0]);
+ }
+
+ // input type has to be specified BEFORE loading the phrase tables!
+ if(m_parameter->GetParam("inputtype").size())
+ m_inputType= (InputTypeEnum) Scan<int>(m_parameter->GetParam("inputtype")[0]);
+ std::string s_it = "text input";
+ if (m_inputType == 1) { s_it = "confusion net"; }
+ if (m_inputType == 2) { s_it = "word lattice"; }
+ VERBOSE(2,"input type is: "<<s_it<<"\n");
+
+ if(m_parameter->GetParam("recover-input-path").size()) {
+ m_recoverPath = Scan<bool>(m_parameter->GetParam("recover-input-path")[0]);
+ if (m_recoverPath && m_inputType == SentenceInput) {
+ TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
+ m_recoverPath = false;
+ }
+ }
+
+
+ // factor delimiter
+ if (m_parameter->GetParam("factor-delimiter").size() > 0) {
+ m_factorDelimiter = m_parameter->GetParam("factor-delimiter")[0];
+ }
+
+ //word-to-word alignment
+ SetBooleanParameter( &m_UseAlignmentInfo, "use-alignment-info", false );
+ SetBooleanParameter( &m_PrintAlignmentInfo, "print-alignment-info", false );
+ SetBooleanParameter( &m_PrintAlignmentInfoNbest, "print-alignment-info-in-n-best", false );
+
+ if (!m_UseAlignmentInfo && m_PrintAlignmentInfo){
+ TRACE_ERR("--print-alignment-info should only be used together with \"--use-alignment-info true\". Continue forcing to false.\n");
+ m_PrintAlignmentInfo=false;
+ }
+ if (!m_UseAlignmentInfo && m_PrintAlignmentInfoNbest){
+ TRACE_ERR("--print-alignment-info-in-n-best should only be used together with \"--use-alignment-info true\". Continue forcing to false.\n");
+ m_PrintAlignmentInfoNbest=false;
+ }
+
+ // n-best
+ if (m_parameter->GetParam("n-best-list").size() >= 2)
+ {
+ m_nBestFilePath = m_parameter->GetParam("n-best-list")[0];
+ m_nBestSize = Scan<size_t>( m_parameter->GetParam("n-best-list")[1] );
+ m_onlyDistinctNBest=(m_parameter->GetParam("n-best-list").size()>2 && m_parameter->GetParam("n-best-list")[2]=="distinct");
+ }
+ else if (m_parameter->GetParam("n-best-list").size() == 1) {
+ UserMessage::Add(string("ERROR: wrong format for switch -n-best-list file size"));
+ return false;
+ }
+ else
+ {
+ m_nBestSize = 0;
+ }
+ if (m_parameter->GetParam("n-best-factor").size() > 0)
+ {
+ m_nBestFactor = Scan<size_t>( m_parameter->GetParam("n-best-factor")[0]);
+ }
+ else {
+ m_nBestFactor = 20;
+ }
+
+ // word graph
+ if (m_parameter->GetParam("output-word-graph").size() == 2)
+ m_outputWordGraph = true;
+ else
+ m_outputWordGraph = false;
+
+ // search graph
+ if (m_parameter->GetParam("output-search-graph").size() > 0)
+ {
+ if (m_parameter->GetParam("output-search-graph").size() != 1) {
+ UserMessage::Add(string("ERROR: wrong format for switch -output-search-graph file"));
+ return false;
+ }
+ m_outputSearchGraph = true;
+ }
+ // ... in extended format
+ else if (m_parameter->GetParam("output-search-graph-extended").size() > 0)
+ {
+ if (m_parameter->GetParam("output-search-graph-extended").size() != 1) {
+ UserMessage::Add(string("ERROR: wrong format for switch -output-search-graph-extended file"));
+ return false;
+ }
+ m_outputSearchGraph = true;
+ m_outputSearchGraphExtended = true;
+ }
+ else
+ m_outputSearchGraph = false;
+#ifdef HAVE_PROTOBUF
+ if (m_parameter->GetParam("output-search-graph-pb").size() > 0)
+ {
+ if (m_parameter->GetParam("output-search-graph-pb").size() != 1) {
+ UserMessage::Add(string("ERROR: wrong format for switch -output-search-graph-pb path"));
+ return false;
+ }
+ m_outputSearchGraphPB = true;
+ }
+ else
+ m_outputSearchGraphPB = false;
+#endif
+
+ // include feature names in the n-best list
+ SetBooleanParameter( &m_labeledNBestList, "labeled-n-best-list", true );
+
+ // include word alignment in the n-best list
+ SetBooleanParameter( &m_nBestIncludesAlignment, "include-alignment-in-n-best", false );
+
+ // printing source phrase spans
+ SetBooleanParameter( &m_reportSegmentation, "report-segmentation", false );
+
+ // print all factors of output translations
+ SetBooleanParameter( &m_reportAllFactors, "report-all-factors", false );
+
+ // print all factors of output translations
+ SetBooleanParameter( &m_reportAllFactorsNBest, "report-all-factors-in-n-best", false );
+
+ //
+ if (m_inputType == SentenceInput)
+ {
+ SetBooleanParameter( &m_useTransOptCache, "use-persistent-cache", true );
+ m_transOptCacheMaxSize = (m_parameter->GetParam("persistent-cache-size").size() > 0)
+ ? Scan<size_t>(m_parameter->GetParam("persistent-cache-size")[0]) : DEFAULT_MAX_TRANS_OPT_CACHE_SIZE;
+ }
+ else
+ {
+ m_useTransOptCache = false;
+ }
+
+
+ //input factors
+ const vector<string> &inputFactorVector = m_parameter->GetParam("input-factors");
+ for(size_t i=0; i<inputFactorVector.size(); i++)
+ {
+ m_inputFactorOrder.push_back(Scan<FactorType>(inputFactorVector[i]));
+ }
+ if(m_inputFactorOrder.empty())
+ {
+ UserMessage::Add(string("no input factor specified in config file"));
+ return false;
+ }
+
+ //output factors
+ const vector<string> &outputFactorVector = m_parameter->GetParam("output-factors");
+ for(size_t i=0; i<outputFactorVector.size(); i++)
+ {
+ m_outputFactorOrder.push_back(Scan<FactorType>(outputFactorVector[i]));
+ }
+ if(m_outputFactorOrder.empty())
+ { // default. output factor 0
+ m_outputFactorOrder.push_back(0);
+ }
+
+ //source word deletion
+ SetBooleanParameter( &m_wordDeletionEnabled, "phrase-drop-allowed", false );
+
+ //Disable discarding
+ SetBooleanParameter(&m_disableDiscarding, "disable-discarding", false);
+
+ //Print All Derivations
+ SetBooleanParameter( &m_printAllDerivations , "print-all-derivations", false );
+
+ // additional output
+ SetBooleanParameter( &m_isDetailedTranslationReportingEnabled,
+ "translation-details", false );
+
+ SetBooleanParameter( &m_computeLMBackoffStats, "lmstats", false );
+ if (m_computeLMBackoffStats &&
+ ! m_isDetailedTranslationReportingEnabled) {
+ VERBOSE(1, "-lmstats implies -translation-details, enabling" << std::endl);
+ m_isDetailedTranslationReportingEnabled = true;
+ }
+
+ // score weights
+ const vector<string> distortionWeights = m_parameter->GetParam("weight-d");
+ m_weightDistortion = Scan<float>(distortionWeights[0]);
+ m_weightWordPenalty = Scan<float>( m_parameter->GetParam("weight-w")[0] );
+ m_weightUnknownWord = (m_parameter->GetParam("weight-u").size() > 0) ? Scan<float>(m_parameter->GetParam("weight-u")[0]) : 1;
+
+ m_wpProducer = new WordPenaltyProducer(m_scoreIndexManager);
+ m_allWeights.push_back(m_weightWordPenalty);
+
+ m_unknownWordPenaltyProducer = new UnknownWordPenaltyProducer(m_scoreIndexManager);
+ m_allWeights.push_back(m_weightUnknownWord);
+
+ m_distortionScoreProducer = new DistortionScoreProducer(m_scoreIndexManager);
+ m_allWeights.push_back(m_weightDistortion);
+
+ // reordering constraints
+ m_maxDistortion = (m_parameter->GetParam("distortion-limit").size() > 0) ?
+ Scan<int>(m_parameter->GetParam("distortion-limit")[0])
+ : -1;
+ SetBooleanParameter( &m_reorderingConstraint, "monotone-at-punctuation", false );
+
+ // settings for pruning
+ m_maxHypoStackSize = (m_parameter->GetParam("stack").size() > 0)
+ ? Scan<size_t>(m_parameter->GetParam("stack")[0]) : DEFAULT_MAX_HYPOSTACK_SIZE;
+ m_minHypoStackDiversity = 0;
+ if (m_parameter->GetParam("stack-diversity").size() > 0) {
+ if (m_maxDistortion > 15) {
+ UserMessage::Add("stack diversity > 0 is not allowed for distortion limits larger than 15");
+ return false;
+ }
+ if (m_inputType == WordLatticeInput) {
+ UserMessage::Add("stack diversity > 0 is not allowed for lattice input");
+ return false;
+ }
+ m_minHypoStackDiversity = Scan<size_t>(m_parameter->GetParam("stack-diversity")[0]);
+ }
+
+ m_beamWidth = (m_parameter->GetParam("beam-threshold").size() > 0) ?
+ TransformScore(Scan<float>(m_parameter->GetParam("beam-threshold")[0]))
+ : TransformScore(DEFAULT_BEAM_WIDTH);
+ m_earlyDiscardingThreshold = (m_parameter->GetParam("early-discarding-threshold").size() > 0) ?
+ TransformScore(Scan<float>(m_parameter->GetParam("early-discarding-threshold")[0]))
+ : TransformScore(DEFAULT_EARLY_DISCARDING_THRESHOLD);
+ m_translationOptionThreshold = (m_parameter->GetParam("translation-option-threshold").size() > 0) ?
+ TransformScore(Scan<float>(m_parameter->GetParam("translation-option-threshold")[0]))
+ : TransformScore(DEFAULT_TRANSLATION_OPTION_THRESHOLD);
+
+ m_maxNoTransOptPerCoverage = (m_parameter->GetParam("max-trans-opt-per-coverage").size() > 0)
+ ? Scan<size_t>(m_parameter->GetParam("max-trans-opt-per-coverage")[0]) : DEFAULT_MAX_TRANS_OPT_SIZE;
+
+ m_maxNoPartTransOpt = (m_parameter->GetParam("max-partial-trans-opt").size() > 0)
+ ? Scan<size_t>(m_parameter->GetParam("max-partial-trans-opt")[0]) : DEFAULT_MAX_PART_TRANS_OPT_SIZE;
+
+ m_maxPhraseLength = (m_parameter->GetParam("max-phrase-length").size() > 0)
+ ? Scan<size_t>(m_parameter->GetParam("max-phrase-length")[0]) : DEFAULT_MAX_PHRASE_LENGTH;
+
+ m_cubePruningPopLimit = (m_parameter->GetParam("cube-pruning-pop-limit").size() > 0)
+ ? Scan<size_t>(m_parameter->GetParam("cube-pruning-pop-limit")[0]) : DEFAULT_CUBE_PRUNING_POP_LIMIT;
+
+ m_cubePruningDiversity = (m_parameter->GetParam("cube-pruning-diversity").size() > 0)
+ ? Scan<size_t>(m_parameter->GetParam("cube-pruning-diversity")[0]) : DEFAULT_CUBE_PRUNING_DIVERSITY;
+
+ // unknown word processing
+ SetBooleanParameter( &m_dropUnknown, "drop-unknown", false );
+
+ // minimum Bayes risk decoding
+ SetBooleanParameter( &m_mbr, "minimum-bayes-risk", false );
+ m_mbrSize = (m_parameter->GetParam("mbr-size").size() > 0) ?
+ Scan<size_t>(m_parameter->GetParam("mbr-size")[0]) : 200;
+ m_mbrScale = (m_parameter->GetParam("mbr-scale").size() > 0) ?
+ Scan<float>(m_parameter->GetParam("mbr-scale")[0]) : 1.0f;
+
+ //lattice mbr
+ SetBooleanParameter( &m_useLatticeMBR, "lminimum-bayes-risk", false );
+ if (m_useLatticeMBR)
+ m_mbr = m_useLatticeMBR;
+
+ m_lmbrPruning = (m_parameter->GetParam("lmbr-pruning-factor").size() > 0) ?
+ Scan<size_t>(m_parameter->GetParam("lmbr-pruning-factor")[0]) : 30;
+ m_lmbrThetas = Scan<float>(m_parameter->GetParam("lmbr-thetas"));
+ SetBooleanParameter( &m_useLatticeHypSetForLatticeMBR, "lattice-hypo-set", false );
+ m_lmbrPrecision = (m_parameter->GetParam("lmbr-p").size() > 0) ?
+ Scan<float>(m_parameter->GetParam("lmbr-p")[0]) : 0.8f;
+ m_lmbrPRatio = (m_parameter->GetParam("lmbr-r").size() > 0) ?
+ Scan<float>(m_parameter->GetParam("lmbr-r")[0]) : 0.6f;
+
+ m_timeout_threshold = (m_parameter->GetParam("time-out").size() > 0) ?
+ Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;
+ m_timeout = (GetTimeoutThreshold() == -1) ? false : true;
+
+ // Read in constraint decoding file, if provided
+ if(m_parameter->GetParam("constraint").size()) {
+ if (m_parameter->GetParam("search-algorithm").size() > 0
+ && Scan<size_t>(m_parameter->GetParam("search-algorithm")[0]) != 0) {
+ cerr << "Can use -constraint only with stack-based search (-search-algorithm 0)" << endl;
+ exit(1);
+ }
+ m_constraintFileName = m_parameter->GetParam("constraint")[0];
+
+ InputFileStream constraintFile(m_constraintFileName);
+
+ std::string line;
+
+ long sentenceID = -1;
+ while (getline(constraintFile, line))
+ {
+ vector<string> vecStr = Tokenize(line, "\t");
+
+ if (vecStr.size() == 1) {
+ sentenceID++;
+ Phrase phrase(Output);
+ phrase.CreateFromString(GetOutputFactorOrder(), vecStr[0], GetFactorDelimiter());
+ m_constraints.insert(make_pair(sentenceID,phrase));
+ }
+ else if (vecStr.size() == 2) {
+ sentenceID = Scan<long>(vecStr[0]);
+ Phrase phrase(Output);
+ phrase.CreateFromString(GetOutputFactorOrder(), vecStr[1], GetFactorDelimiter());
+ m_constraints.insert(make_pair(sentenceID,phrase));
+ }
+ else {
+ assert(false);
+ }
+ }
+ }
+
+ // to cube or not to cube
+ m_searchAlgorithm = (m_parameter->GetParam("search-algorithm").size() > 0) ?
+ (SearchAlgorithm) Scan<size_t>(m_parameter->GetParam("search-algorithm")[0]) : Normal;
+
+ // use of xml in input
+ if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough;
+ else if (m_parameter->GetParam("xml-input")[0]=="exclusive") m_xmlInputType = XmlExclusive;
+ else if (m_parameter->GetParam("xml-input")[0]=="inclusive") m_xmlInputType = XmlInclusive;
+ else if (m_parameter->GetParam("xml-input")[0]=="ignore") m_xmlInputType = XmlIgnore;
+ else if (m_parameter->GetParam("xml-input")[0]=="pass-through") m_xmlInputType = XmlPassThrough;
+ else {
+ UserMessage::Add("invalid xml-input value, must be pass-through, exclusive, inclusive, or ignore");
+ return false;
+ }
+
+ if (!LoadLexicalReorderingModel()) return false;
+ if (!LoadLanguageModels()) return false;
+ if (!LoadGenerationTables()) return false;
+ if (!LoadPhraseTables()) return false;
+ if (!LoadGlobalLexicalModel()) return false;
+ /*******************************************************************************************/
+ //Load the reordering probabilities
+ if (!LoadDPRReordering()) return false;
+ /*******************************************************************************************/
+
+ m_scoreIndexManager.InitFeatureNames();
+ if (m_parameter->GetParam("weight-file").size() > 0) {
+ UserMessage::Add("ERROR: weight-file option is broken\n");
+ abort();
+// if (m_parameter->GetParam("weight-file").size() != 1) {
+// UserMessage::Add(string("ERROR: weight-file takes a single parameter"));
+// return false;
+// }
+// string fnam = m_parameter->GetParam("weight-file")[0];
+// m_scoreIndexManager.InitWeightVectorFromFile(fnam, &m_allWeights);
+ }
+
+ return true;
+}
+
+void StaticData::SetBooleanParameter( bool *parameter, string parameterName, bool defaultValue )
+{
+ // default value if nothing is specified
+ *parameter = defaultValue;
+ if (! m_parameter->isParamSpecified( parameterName ) )
+ {
+ return;
+ }
+
+ // if parameter is just specified as, e.g. "-parameter" set it true
+ if (m_parameter->GetParam( parameterName ).size() == 0)
+ {
+ *parameter = true;
+ }
+
+ // if paramter is specified "-parameter true" or "-parameter false"
+ else if (m_parameter->GetParam( parameterName ).size() == 1)
+ {
+ *parameter = Scan<bool>( m_parameter->GetParam( parameterName )[0]);
+ }
+}
+
+StaticData::~StaticData()
+{
+ RemoveAllInColl(m_phraseDictionary);
+ RemoveAllInColl(m_generationDictionary);
+ RemoveAllInColl(m_languageModel);
+ RemoveAllInColl(m_reorderModels);
+ RemoveAllInColl(m_globalLexicalModels);
+ /*******************************************************************************************/
+ //remove all items in DPR model
+ RemoveAllInColl(m_dprReorderModels);
+ /*******************************************************************************************/
+
+ // delete trans opt
+ map<std::pair<size_t, Phrase>, std::pair< TranslationOptionList*, clock_t > >::iterator iterCache;
+ for (iterCache = m_transOptCache.begin() ; iterCache != m_transOptCache.end() ; ++iterCache)
+ {
+ TranslationOptionList *transOptList = iterCache->second.first;
+ delete transOptList;
+ }
+
+ // small score producers
+ delete m_distortionScoreProducer;
+ delete m_wpProducer;
+ delete m_unknownWordPenaltyProducer;
+
+ // memory pools
+ Phrase::FinalizeMemPool();
+
+}
+
+
+/*******************************Loading DPR model**********************************************/
+//load the reordering model
+//push it in m_dprReorderModels
+bool StaticData::LoadDPRReordering()
+{
+ std::cerr << "Loading DPR reordering models...\n";
+
+ //1. Read the start position of the sentence options file
+ string filePath = m_parameter->GetParam("DPR-file")[0]; //path of the sentence option
+
+ //2. Get the weight (only the first weight is used)
+ vector<float> weights = Scan<float>(m_parameter->GetParam("wDPR"));
+
+ //3. Get the class setup
+ string classString = m_parameter->GetParam("class-DPR")[0]; //the number of classes
+
+
+ DPR_reordering *model = new DPR_reordering(m_scoreIndexManager, filePath, classString, weights);
+
+ m_dprReorderModels.push_back(model);
+
+
+ return true;
+}
+/*******************************************************************************************/
+
+
+bool StaticData::LoadLexicalReorderingModel()
+{
+ std::cerr << "Loading lexical distortion models...\n";
+ const vector<string> fileStr = m_parameter->GetParam("distortion-file");
+ const vector<string> weightsStr = m_parameter->GetParam("weight-d");
+
+ std::vector<float> weights;
+ size_t w = 1; //cur weight
+ size_t f = 0; //cur file
+ //get weights values
+ std::cerr << "have " << fileStr.size() << " models\n";
+ for(size_t j = 0; j < weightsStr.size(); ++j){
+ weights.push_back(Scan<float>(weightsStr[j]));
+ }
+ //load all models
+ for(size_t i = 0; i < fileStr.size(); ++i)
+ {
+ vector<string> spec = Tokenize<string>(fileStr[f], " ");
+ ++f; //mark file as consumed
+ if(4 != spec.size()){
+ //wrong file specification string...
+ std::cerr << "Wrong Lexical Reordering Model Specification for model " << i << "!\n";
+ return false;
+ }
+ //spec[0] = factor map
+ //spec[1] = name
+ //spec[2] = num weights
+ //spec[3] = fileName
+ //decode data into these
+ vector<FactorType> input,output;
+ LexicalReordering::Direction direction;
+ LexicalReordering::Condition condition;
+ size_t numWeights;
+ //decode factor map
+ vector<string> inputfactors = Tokenize(spec[0],"-");
+ if(inputfactors.size() == 2){
+ input = Tokenize<FactorType>(inputfactors[0],",");
+ output = Tokenize<FactorType>(inputfactors[1],",");
+ }
+ else if(inputfactors.size() == 1)
+ {
+ //if there is only one side assume it is on e side... why?
+ output = Tokenize<FactorType>(inputfactors[0],",");
+ }
+ else
+ {
+ //format error
+ return false;
+ }
+ //decode name
+ vector<string> params = Tokenize<string>(spec[1],"-");
+ std::string type(ToLower(params[0]));
+ std::string dir;
+ std::string cond;
+
+ if(3 == params.size())
+ {
+ //name format is 'type'-'direction'-'condition'
+ dir = ToLower(params[1]);
+ cond = ToLower(params[2]);
+ }
+ else if(2 == params.size())
+ {
+ //assume name format is 'type'-'condition' with implicit unidirectional
+ std::cerr << "Warning: Lexical model type underspecified...assuming unidirectional in model " << i << "\n";
+ dir = "unidirectional";
+ cond = ToLower(params[1]);
+ }
+ else
+ {
+ std::cerr << "Lexical model type underspecified for model " << i << "!\n";
+ return false;
+ }
+
+ if(dir == "forward"){
+ direction = LexicalReordering::Forward;
+ }
+ else if(dir == "backward" || dir == "unidirectional" || dir == "uni")
+ {
+ direction = LexicalReordering::Backward;
+ }
+ else if(dir == "bidirectional" || dir == "bi")
+ {
+ direction = LexicalReordering::Bidirectional;
+ }
+ else
+ {
+ std::cerr << "Unknown direction declaration '" << dir << "'for lexical reordering model " << i << "\n";
+ return false;
+ }
+
+ if(cond == "f"){
+ condition = LexicalReordering::F;
+ }
+ else if(cond == "fe")
+ {
+ condition = LexicalReordering::FE;
+ }
+ else if(cond == "fec")
+ {
+ condition = LexicalReordering::FEC;
+ }
+ else
+ {
+ std::cerr << "Unknown conditioning declaration '" << cond << "'for lexical reordering model " << i << "!\n";
+ return false;
+ }
+
+ //decode num weights (and fetch weight from array...)
+ std::vector<float> mweights;
+ numWeights = atoi(spec[2].c_str());
+ for(size_t k = 0; k < numWeights; ++k, ++w)
+ {
+ if(w >= weights.size()){
+ //error not enough weights...
+ std::cerr << "Lexicalized distortion model: Not enough weights, add to [weight-d]\n";
+ return false;
+ } else {
+ mweights.push_back(weights[w]);
+ }
+ }
+
+ //decode filename
+ string filePath = spec[3];
+
+ //all ready load it
+ //std::cerr << type;
+ if("monotonicity" == type){
+ m_reorderModels.push_back(new LexicalMonotonicReordering(filePath, mweights, direction, condition, input, output));
+ }
+ else if("orientation" == type || "msd" == type)
+ {
+ m_reorderModels.push_back(new LexicalOrientationReordering(filePath, mweights, direction, condition, input, output));
+ }
+ else if("directional" == type)
+ {
+ m_reorderModels.push_back(new LexicalDirectionalReordering(filePath, mweights, direction, condition, input, output));
+ }
+ else
+ {
+ //error unknown type!
+ std::cerr << " ...unknown type!\n";
+ return false;
+ }
+ //std::cerr << "\n";
+
+ }
+ return true;
+}
+
+bool StaticData::LoadGlobalLexicalModel()
+{
+ const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-lex"));
+ const vector<string> &file = m_parameter->GetParam("global-lexical-file");
+
+ if (weight.size() != file.size())
+ {
+ std::cerr << "number of weights and models for the global lexical model does not match ("
+ << weight.size() << " != " << file.size() << ")" << std::endl;
+ return false;
+ }
+
+ for (size_t i = 0; i < weight.size(); i++ )
+ {
+ vector<string> spec = Tokenize<string>(file[i], " ");
+ if ( spec.size() != 2 )
+ {
+ std::cerr << "wrong global lexical model specification: " << file[i] << endl;
+ return false;
+ }
+ vector< string > factors = Tokenize(spec[0],"-");
+ if ( factors.size() != 2 )
+ {
+ std::cerr << "wrong factor definition for global lexical model: " << spec[0] << endl;
+ return false;
+ }
+ vector<FactorType> inputFactors = Tokenize<FactorType>(factors[0],",");
+ vector<FactorType> outputFactors = Tokenize<FactorType>(factors[1],",");
+ m_globalLexicalModels.push_back( new GlobalLexicalModel( spec[1], weight[i], inputFactors, outputFactors ) );
+ }
+ return true;
+}
+
+bool StaticData::LoadLanguageModels()
+{
+ if (m_parameter->GetParam("lmodel-file").size() > 0)
+ {
+ // weights
+ vector<float> weightAll = Scan<float>(m_parameter->GetParam("weight-l"));
+
+ for (size_t i = 0 ; i < weightAll.size() ; i++)
+ {
+ m_allWeights.push_back(weightAll[i]);
+ }
+
+ // dictionary upper-bounds fo all IRST LMs
+ vector<int> LMdub = Scan<int>(m_parameter->GetParam("lmodel-dub"));
+ if (m_parameter->GetParam("lmodel-dub").size() == 0){
+ for(size_t i=0; i<m_parameter->GetParam("lmodel-file").size(); i++)
+ LMdub.push_back(0);
+ }
+
+ // initialize n-gram order for each factor. populated only by factored lm
+ const vector<string> &lmVector = m_parameter->GetParam("lmodel-file");
+
+ for(size_t i=0; i<lmVector.size(); i++)
+ {
+ vector<string> token = Tokenize(lmVector[i]);
+ if (token.size() != 4 && token.size() != 5 )
+ {
+ UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
+ return false;
+ }
+ // type = implementation, SRI, IRST etc
+ LMImplementation lmImplementation = static_cast<LMImplementation>(Scan<int>(token[0]));
+
+ // factorType = 0 = Surface, 1 = POS, 2 = Stem, 3 = Morphology, etc
+ vector<FactorType> factorTypes = Tokenize<FactorType>(token[1], ",");
+
+ // nGramOrder = 2 = bigram, 3 = trigram, etc
+ size_t nGramOrder = Scan<int>(token[2]);
+
+ string &languageModelFile = token[3];
+ if (token.size() == 5){
+ if (lmImplementation==IRST)
+ languageModelFile += " " + token[4];
+ else {
+ UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
+ return false;
+ }
+ }
+ IFVERBOSE(1)
+ PrintUserTime(string("Start loading LanguageModel ") + languageModelFile);
+
+ LanguageModel *lm = LanguageModelFactory::CreateLanguageModel(
+ lmImplementation
+ , factorTypes
+ , nGramOrder
+ , languageModelFile
+ , weightAll[i]
+ , m_scoreIndexManager
+ , LMdub[i]);
+ if (lm == NULL)
+ {
+ UserMessage::Add("no LM created. We probably don't have it compiled");
+ return false;
+ }
+
+ m_languageModel.push_back(lm);
+ }
+ }
+ // flag indicating that language models were loaded,
+ // since phrase table loading requires their presence
+ m_fLMsLoaded = true;
+ IFVERBOSE(1)
+ PrintUserTime("Finished loading LanguageModels");
+ return true;
+}
+
+bool StaticData::LoadGenerationTables()
+{
+ if (m_parameter->GetParam("generation-file").size() > 0)
+ {
+ const vector<string> &generationVector = m_parameter->GetParam("generation-file");
+ const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-generation"));
+
+ IFVERBOSE(1)
+ {
+ TRACE_ERR( "weight-generation: ");
+ for (size_t i = 0 ; i < weight.size() ; i++)
+ {
+ TRACE_ERR( weight[i] << "\t");
+ }
+ TRACE_ERR(endl);
+ }
+ size_t currWeightNum = 0;
+
+ for(size_t currDict = 0 ; currDict < generationVector.size(); currDict++)
+ {
+ vector<string> token = Tokenize(generationVector[currDict]);
+ vector<FactorType> input = Tokenize<FactorType>(token[0], ",")
+ ,output = Tokenize<FactorType>(token[1], ",");
+ m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], input, output);
+ string filePath;
+ size_t numFeatures;
+
+ numFeatures = Scan<size_t>(token[2]);
+ filePath = token[3];
+
+ if (!FileExists(filePath) && FileExists(filePath + ".gz")) {
+ filePath += ".gz";
+ }
+
+ VERBOSE(1, filePath << endl);
+
+ m_generationDictionary.push_back(new GenerationDictionary(numFeatures, m_scoreIndexManager));
+ assert(m_generationDictionary.back() && "could not create GenerationDictionary");
+ if (!m_generationDictionary.back()->Load(input
+ , output
+ , filePath
+ , Output))
+ {
+ delete m_generationDictionary.back();
+ return false;
+ }
+ for(size_t i = 0; i < numFeatures; i++) {
+ assert(currWeightNum < weight.size());
+ m_allWeights.push_back(weight[currWeightNum++]);
+ }
+ }
+ if (currWeightNum != weight.size()) {
+ TRACE_ERR( " [WARNING] config file has " << weight.size() << " generation weights listed, but the configuration for generation files indicates there should be " << currWeightNum << "!\n");
+ }
+ }
+
+ return true;
+}
+
+bool StaticData::LoadPhraseTables()
+{
+ VERBOSE(2,"About to LoadPhraseTables" << endl);
+
+ // language models must be loaded prior to loading phrase tables
+ assert(m_fLMsLoaded);
+ // load phrase translation tables
+ if (m_parameter->GetParam("ttable-file").size() > 0)
+ {
+ // weights
+ vector<float> weightAll = Scan<float>(m_parameter->GetParam("weight-t"));
+
+ const vector<string> &translationVector = m_parameter->GetParam("ttable-file");
+ vector<size_t> maxTargetPhrase = Scan<size_t>(m_parameter->GetParam("ttable-limit"));
+
+ size_t index = 0;
+ size_t weightAllOffset = 0;
+ for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++)
+ {
+ vector<string> token = Tokenize(translationVector[currDict]);
+ //characteristics of the phrase table
+ vector<FactorType> input = Tokenize<FactorType>(token[0], ",")
+ ,output = Tokenize<FactorType>(token[1], ",");
+ m_maxFactorIdx[0] = CalcMax(m_maxFactorIdx[0], input);
+ m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], output);
+ m_maxNumFactors = std::max(m_maxFactorIdx[0], m_maxFactorIdx[1]) + 1;
+ string filePath= token[3];
+ size_t numScoreComponent = Scan<size_t>(token[2]);
+
+ assert(weightAll.size() >= weightAllOffset + numScoreComponent);
+
+ // weights for this phrase dictionary
+ // first InputScores (if any), then translation scores
+ vector<float> weight;
+
+ if(currDict==0 && m_inputType)
+ { // TODO. find what the assumptions made by confusion network about phrase table output which makes
+ // it only work with binrary file. This is a hack
+
+ m_numInputScores=m_parameter->GetParam("weight-i").size();
+ for(unsigned k=0;k<m_numInputScores;++k)
+ weight.push_back(Scan<float>(m_parameter->GetParam("weight-i")[k]));
+
+ if(m_parameter->GetParam("link-param-count").size())
+ m_numLinkParams = Scan<size_t>(m_parameter->GetParam("link-param-count")[0]);
+
+ //print some info about this interaction:
+ if (m_numLinkParams == m_numInputScores) {
+ VERBOSE(1,"specified equal numbers of link parameters and insertion weights, not using non-epsilon 'real' word link count.\n");
+ } else if ((m_numLinkParams + 1) == m_numInputScores) {
+ VERBOSE(1,"WARN: "<< m_numInputScores << " insertion weights found and only "<< m_numLinkParams << " link parameters specified, applying non-epsilon 'real' word link count for last feature weight.\n");
+ } else {
+ stringstream strme;
+ strme << "You specified " << m_numInputScores
+ << " input weights (weight-i), but you specified " << m_numLinkParams << " link parameters (link-param-count)!";
+ UserMessage::Add(strme.str());
+ return false;
+ }
+
+ }
+ if (!m_inputType){
+ m_numInputScores=0;
+ }
+ //this number changes depending on what phrase table we're talking about: only 0 has the weights on it
+ size_t tableInputScores = (currDict == 0 ? m_numInputScores : 0);
+
+ for (size_t currScore = 0 ; currScore < numScoreComponent; currScore++)
+ weight.push_back(weightAll[weightAllOffset + currScore]);
+
+
+ if(weight.size() - tableInputScores != numScoreComponent)
+ {
+ stringstream strme;
+ strme << "Your phrase table has " << numScoreComponent
+ << " scores, but you specified " << (weight.size() - tableInputScores) << " weights!";
+ UserMessage::Add(strme.str());
+ return false;
+ }
+
+ weightAllOffset += numScoreComponent;
+ numScoreComponent += tableInputScores;
+
+ assert(numScoreComponent==weight.size());
+
+ std::copy(weight.begin(),weight.end(),std::back_inserter(m_allWeights));
+
+ IFVERBOSE(1)
+ PrintUserTime(string("Start loading PhraseTable ") + filePath);
+ VERBOSE(1,"filePath: " << filePath << endl);
+
+ PhraseDictionaryFeature* pdf = new PhraseDictionaryFeature(
+ numScoreComponent
+ , (currDict==0 ? m_numInputScores : 0)
+ , input
+ , output
+ , filePath
+ , weight
+ , maxTargetPhrase[index]);
+
+ m_phraseDictionary.push_back(pdf);
+
+
+
+
+
+ index++;
+ }
+ }
+
+ IFVERBOSE(1)
+ PrintUserTime("Finished loading phrase tables");
+ return true;
+}
+
+vector<DecodeGraph*> StaticData::GetDecodeStepVL(const InputType& source) const
+{
+ vector<DecodeGraph*> decodeStepVL;
+ // mapping
+ const vector<string> &mappingVector = m_parameter->GetParam("mapping");
+ DecodeStep *prev = 0;
+ size_t previousVectorList = 0;
+ for(size_t i=0; i<mappingVector.size(); i++)
+ {
+ vector<string> token = Tokenize(mappingVector[i]);
+ size_t vectorList;
+ DecodeType decodeType;
+ size_t index;
+ if (token.size() == 2)
+ {
+ vectorList = 0;
+ decodeType = token[0] == "T" ? Translate : Generate;
+ index = Scan<size_t>(token[1]);
+ }
+ //Smoothing
+ else if (token.size() == 3)
+ {
+ vectorList = Scan<size_t>(token[0]);
+ //the vectorList index can only increment by one
+ assert(vectorList == previousVectorList || vectorList == previousVectorList + 1);
+ if (vectorList > previousVectorList)
+ {
+ prev = NULL;
+ }
+ decodeType = token[1] == "T" ? Translate : Generate;
+ index = Scan<size_t>(token[2]);
+ }
+ else
+ {
+ UserMessage::Add("Malformed mapping!");
+ assert(false);
+ }
+
+ DecodeStep* decodeStep = 0;
+ switch (decodeType) {
+ case Translate:
+ if(index>=m_phraseDictionary.size())
+ {
+ stringstream strme;
+ strme << "No phrase dictionary with index "
+ << index << " available!";
+ UserMessage::Add(strme.str());
+ assert(false);
+ }
+ decodeStep = new DecodeStepTranslation(m_phraseDictionary[index]->GetDictionary(source), prev);
+ break;
+ case Generate:
+ if(index>=m_generationDictionary.size())
+ {
+ stringstream strme;
+ strme << "No generation dictionary with index "
+ << index << " available!";
+ UserMessage::Add(strme.str());
+ assert(false);
+ }
+ decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev);
+ break;
+ case InsertNullFertilityWord:
+ assert(!"Please implement NullFertilityInsertion.");
+ break;
+ }
+ assert(decodeStep);
+ if (decodeStepVL.size() < vectorList + 1)
+ {
+ decodeStepVL.push_back(new DecodeGraph(decodeStepVL.size()));
+ }
+ decodeStepVL[vectorList]->Add(decodeStep);
+ prev = decodeStep;
+ previousVectorList = vectorList;
+ }
+
+ return decodeStepVL;
+}
+
+void StaticData::CleanUpAfterSentenceProcessing() const
+{
+ for(size_t i=0;i<m_generationDictionary.size();++i)
+ m_generationDictionary[i]->CleanUp();
+
+ //something LMs could do after each sentence
+ LMList::const_iterator iterLM;
+ for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
+ {
+ LanguageModel &languageModel = **iterLM;
+ languageModel.CleanUpAfterSentenceProcessing();
+ }
+}
+
+/** initialize the translation and language models for this sentence
+ (includes loading of translation table entries on demand, if
+ binary format is used) */
+void StaticData::InitializeBeforeSentenceProcessing(InputType const& in) const
+{
+ for(size_t i=0;i<m_reorderModels.size();++i) {
+ m_reorderModels[i]->InitializeForInput(in);
+ }
+ for(size_t i=0;i<m_globalLexicalModels.size();++i) {
+ m_globalLexicalModels[i]->InitializeForInput((Sentence const&)in);
+ }
+ //something LMs could do before translating a sentence
+ LMList::const_iterator iterLM;
+ for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
+ {
+ LanguageModel &languageModel = **iterLM;
+ languageModel.InitializeBeforeSentenceProcessing();
+ }
+}
+
+void StaticData::SetWeightsForScoreProducer(const ScoreProducer* sp, const std::vector<float>& weights)
+{
+ const size_t id = sp->GetScoreBookkeepingID();
+ const size_t begin = m_scoreIndexManager.GetBeginIndex(id);
+ const size_t end = m_scoreIndexManager.GetEndIndex(id);
+ assert(end - begin == weights.size());
+ if (m_allWeights.size() < end)
+ m_allWeights.resize(end);
+ std::vector<float>::const_iterator weightIter = weights.begin();
+ for (size_t i = begin; i < end; i++)
+ m_allWeights[i] = *weightIter++;
+}
+
+const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const
+{
+ std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_transOptCacheMutex);
+#endif
+ std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
+ = m_transOptCache.find(key);
+ if (iter == m_transOptCache.end())
+ return NULL;
+ iter->second.second = clock(); // update last used time
+ return iter->second.first;
+}
+
+void StaticData::ReduceTransOptCache() const
+{
+ if (m_transOptCache.size() <= m_transOptCacheMaxSize) return; // not full
+ clock_t t = clock();
+
+ // find cutoff for last used time
+ priority_queue< clock_t > lastUsedTimes;
+ std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter;
+ iter = m_transOptCache.begin();
+ while( iter != m_transOptCache.end() )
+ {
+ lastUsedTimes.push( iter->second.second );
+ iter++;
+ }
+ for( size_t i=0; i < lastUsedTimes.size()-m_transOptCacheMaxSize/2; i++ )
+ lastUsedTimes.pop();
+ clock_t cutoffLastUsedTime = lastUsedTimes.top();
+
+ // remove all old entries
+ iter = m_transOptCache.begin();
+ while( iter != m_transOptCache.end() )
+ {
+ if (iter->second.second < cutoffLastUsedTime)
+ {
+ std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iterRemove = iter++;
+ delete iterRemove->second.first;
+ m_transOptCache.erase(iterRemove);
+ }
+ else iter++;
+ }
+ VERBOSE(2,"Reduced persistent translation option cache in " << ((clock()-t)/(float)CLOCKS_PER_SEC) << " seconds." << std::endl);
+}
+
+void StaticData::AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const
+{
+ std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
+ TranslationOptionList* storedTransOptList = new TranslationOptionList(transOptList);
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_transOptCacheMutex);
+#endif
+ m_transOptCache[key] = make_pair( storedTransOptList, clock() );
+ ReduceTransOptCache();
+}
+
+}
+
+
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
new file mode 100644
index 000000000..8669ae57a
--- /dev/null
+++ b/moses/src/StaticData.h
@@ -0,0 +1,535 @@
+// $Id: StaticData.h 2977 2010-03-15 13:08:58Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_StaticData_h
+#define moses_StaticData_h
+
+#include <list>
+#include <vector>
+#include <map>
+#include <memory>
+
+#ifdef WITH_THREADS
+#include <boost/thread/mutex.hpp>
+#endif
+
+#include "TypeDef.h"
+#include "ScoreIndexManager.h"
+#include "FactorCollection.h"
+#include "Parameter.h"
+#include "LanguageModel.h"
+#include "LMList.h"
+#include "SentenceStats.h"
+#include "DecodeGraph.h"
+#include "TranslationOptionList.h"
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+//#include "UnknownWordHandler.h"
+
+namespace Moses
+{
+
+class InputType;
+class LexicalReordering;
+class GlobalLexicalModel;
+class PhraseDictionaryFeature;
+class GenerationDictionary;
+class DistortionScoreProducer;
+class WordPenaltyProducer;
+class DecodeStep;
+class UnknownWordPenaltyProducer;
+/*******************************************************************************************/
+class DPR_reordering;
+/*******************************************************************************************/
+
+/** Contains global variables and contants */
+class StaticData
+{
+private:
+ static StaticData s_instance;
+protected:
+
+ std::map<long,Phrase> m_constraints;
+ std::vector<PhraseDictionaryFeature*> m_phraseDictionary;
+ std::vector<GenerationDictionary*> m_generationDictionary;
+ Parameter *m_parameter;
+ std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
+ LMList m_languageModel;
+ ScoreIndexManager m_scoreIndexManager;
+ std::vector<float> m_allWeights;
+ std::vector<LexicalReordering*> m_reorderModels;
+ std::vector<GlobalLexicalModel*> m_globalLexicalModels;
+
+ /*******************************************************************************************/
+ //define the DPR model vector<pointer>
+ std::vector<DPR_reordering*> m_dprReorderModels;
+ /*******************************************************************************************/
+
+ // Initial = 0 = can be used when creating poss trans
+ // Other = 1 = used to calculate LM score once all steps have been processed
+ float
+ m_beamWidth,
+ m_earlyDiscardingThreshold,
+ m_translationOptionThreshold,
+ m_weightDistortion,
+ m_weightWordPenalty,
+ m_wordDeletionWeight,
+ m_weightUnknownWord;
+ // PhraseTrans, Generation & LanguageModelScore has multiple weights.
+ int m_maxDistortion;
+ // do it differently from old pharaoh
+ // -ve = no limit on distortion
+ // 0 = no disortion (monotone in old pharaoh)
+ bool m_reorderingConstraint; // use additional reordering constraints
+ size_t
+ m_maxHypoStackSize //hypothesis-stack size that triggers pruning
+ , m_minHypoStackDiversity // minimum number of hypothesis in stack for each source word coverage
+ , m_nBestSize
+ , m_nBestFactor
+ , m_maxNoTransOptPerCoverage
+ , m_maxNoPartTransOpt
+ , m_maxPhraseLength
+ , m_numLinkParams;
+
+ std::string
+ m_constraintFileName;
+
+ std::string m_nBestFilePath;
+ bool m_fLMsLoaded, m_labeledNBestList,m_nBestIncludesAlignment;
+ /***
+ * false = treat unknown words as unknowns, and translate them as themselves;
+ * true = drop (ignore) them
+ */
+ bool m_dropUnknown;
+ bool m_wordDeletionEnabled;
+ bool m_disableDiscarding;
+ bool m_printAllDerivations;
+
+ bool m_sourceStartPosMattersForRecombination;
+ bool m_recoverPath;
+
+ SearchAlgorithm m_searchAlgorithm;
+ InputTypeEnum m_inputType;
+ size_t m_numInputScores;
+
+ mutable size_t m_verboseLevel;
+ DistortionScoreProducer *m_distortionScoreProducer;
+ WordPenaltyProducer *m_wpProducer;
+ UnknownWordPenaltyProducer *m_unknownWordPenaltyProducer;
+ bool m_reportSegmentation;
+ bool m_reportAllFactors;
+ bool m_reportAllFactorsNBest;
+ bool m_isDetailedTranslationReportingEnabled;
+ bool m_onlyDistinctNBest;
+ bool m_computeLMBackoffStats;
+ bool m_UseAlignmentInfo;
+ bool m_PrintAlignmentInfo;
+ bool m_PrintAlignmentInfoNbest;
+
+
+ std::string m_factorDelimiter; //! by default, |, but it can be changed
+ size_t m_maxFactorIdx[2]; //! number of factors on source and target side
+ size_t m_maxNumFactors; //! max number of factors on both source and target sides
+
+ XmlInputType m_xmlInputType; //! method for handling sentence XML input
+
+ bool m_mbr; //! use MBR decoder
+ bool m_useLatticeMBR; //! use MBR decoder
+ size_t m_mbrSize; //! number of translation candidates considered
+ float m_mbrScale; //! scaling factor for computing marginal probability of candidate translation
+ size_t m_lmbrPruning; //! average number of nodes per word wanted in pruned lattice
+ std::vector<float> m_lmbrThetas; //! theta(s) for lattice mbr calculation
+ bool m_useLatticeHypSetForLatticeMBR; //! to use nbest as hypothesis set during lattice MBR
+ float m_lmbrPrecision; //! unigram precision theta - see Tromble et al 08 for more details
+ float m_lmbrPRatio; //! decaying factor for ngram thetas - see Tromble et al 08 for more details
+
+
+ bool m_timeout; //! use timeout
+ size_t m_timeout_threshold; //! seconds after which time out is activated
+
+ bool m_useTransOptCache; //! flag indicating, if the persistent translation option cache should be used
+ mutable std::map<std::pair<size_t, Phrase>, pair<TranslationOptionList*,clock_t> > m_transOptCache; //! persistent translation option cache
+ size_t m_transOptCacheMaxSize; //! maximum size for persistent translation option cache
+ //FIXME: Single lock for cache not most efficient. However using a
+ //reader-writer for LRU cache is tricky - how to record last used time?
+#ifdef WITH_THREADS
+ mutable boost::mutex m_transOptCacheMutex;
+#endif
+ bool m_isAlwaysCreateDirectTranslationOption;
+ //! constructor. only the 1 static variable can be created
+
+ bool m_outputWordGraph; //! whether to output word graph
+ bool m_outputSearchGraph; //! whether to output search graph
+ bool m_outputSearchGraphExtended; //! ... in extended format
+#ifdef HAVE_PROTOBUF
+ bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
+#endif
+
+ size_t m_cubePruningPopLimit;
+ size_t m_cubePruningDiversity;
+ StaticData();
+
+ //! helper fn to set bool param from ini file/command line
+ void SetBooleanParameter(bool *paramter, string parameterName, bool defaultValue);
+
+ /***
+ * load all language models as specified in ini file
+ */
+ bool LoadLanguageModels();
+ /***
+ * load not only the main phrase table but also any auxiliary tables that depend on which features are being used
+ * (eg word-deletion, word-insertion tables)
+ */
+ bool LoadPhraseTables();
+ //! load all generation tables as specified in ini file
+ bool LoadGenerationTables();
+ //! load decoding steps
+ bool LoadLexicalReorderingModel();
+ bool LoadGlobalLexicalModel();
+ /*******************************************************************************************/
+ //Define the DPR funciton
+ bool LoadDPRReordering();
+ /*******************************************************************************************/
+ void ReduceTransOptCache() const;
+
+public:
+
+ bool IsAlwaysCreateDirectTranslationOption() const {
+ return m_isAlwaysCreateDirectTranslationOption;
+ }
+ //! destructor
+ ~StaticData();
+ //! return static instance for use like global variable
+ static const StaticData& Instance() { return s_instance; }
+
+ /** delete current static instance and replace with another.
+ * Used by gui front end
+ */
+ #ifdef WIN32
+ static void Reset() { s_instance = StaticData(); }
+ #endif
+
+ /** load data into static instance. This function is required
+ * as LoadData() is not const
+ */
+ static bool LoadDataStatic(Parameter *parameter)
+ {
+ return s_instance.LoadData(parameter);
+ }
+
+ /** Main function to load everything.
+ * Also initialize the Parameter object
+ */
+ bool LoadData(Parameter *parameter);
+
+ const PARAM_VEC &GetParam(const std::string &paramName) const
+ {
+ return m_parameter->GetParam(paramName);
+ }
+
+ bool IsComputeLMBackoffStats() const
+ {
+ return m_computeLMBackoffStats;
+ }
+ const std::vector<FactorType> &GetInputFactorOrder() const
+ {
+ return m_inputFactorOrder;
+ }
+ const std::vector<FactorType> &GetOutputFactorOrder() const
+ {
+ return m_outputFactorOrder;
+ }
+
+ std::vector<DecodeGraph*> GetDecodeStepVL(const InputType& source) const;
+
+ inline bool GetSourceStartPosMattersForRecombination() const
+ {
+ return m_sourceStartPosMattersForRecombination;
+ }
+ inline bool GetDropUnknown() const
+ {
+ return m_dropUnknown;
+ }
+ inline bool GetDisableDiscarding() const
+ {
+ return m_disableDiscarding;
+ }
+ inline size_t GetMaxNoTransOptPerCoverage() const
+ {
+ return m_maxNoTransOptPerCoverage;
+ }
+ inline size_t GetMaxNoPartTransOpt() const
+ {
+ return m_maxNoPartTransOpt;
+ }
+ inline const Phrase* GetConstrainingPhrase(long sentenceID) const
+ {
+ std::map<long,Phrase>::const_iterator iter = m_constraints.find(sentenceID);
+ if (iter != m_constraints.end())
+ {
+ const Phrase& phrase = iter->second;
+ return &phrase;
+ }
+ else
+ {
+ return NULL;
+ }
+ }
+ inline size_t GetMaxPhraseLength() const
+ {
+ return m_maxPhraseLength;
+ }
+ const std::vector<LexicalReordering*> &GetReorderModels() const
+ {
+ return m_reorderModels;
+ }
+ float GetWeightDistortion() const
+ {
+ return m_weightDistortion;
+ }
+ float GetWeightWordPenalty() const
+ {
+ return m_weightWordPenalty;
+ }
+ float GetWeightUnknownWord() const
+ {
+ return m_weightUnknownWord;
+ }
+ bool IsWordDeletionEnabled() const
+ {
+ return m_wordDeletionEnabled;
+ }
+ size_t GetMaxHypoStackSize() const
+ {
+ return m_maxHypoStackSize;
+ }
+ size_t GetMinHypoStackDiversity() const
+ {
+ return m_minHypoStackDiversity;
+ }
+ size_t GetCubePruningPopLimit() const
+ {
+ return m_cubePruningPopLimit;
+ }
+ size_t GetCubePruningDiversity() const
+ {
+ return m_cubePruningDiversity;
+ }
+ size_t IsPathRecoveryEnabled() const
+ {
+ return m_recoverPath;
+ }
+ int GetMaxDistortion() const
+ {
+ return m_maxDistortion;
+ }
+ bool UseReorderingConstraint() const
+ {
+ return m_reorderingConstraint;
+ }
+ float GetBeamWidth() const
+ {
+ return m_beamWidth;
+ }
+ float GetEarlyDiscardingThreshold() const
+ {
+ return m_earlyDiscardingThreshold;
+ }
+ bool UseEarlyDiscarding() const
+ {
+ return m_earlyDiscardingThreshold != -numeric_limits<float>::infinity();
+ }
+ float GetTranslationOptionThreshold() const
+ {
+ return m_translationOptionThreshold;
+ }
+ //! returns the total number of score components across all types, all factors
+ size_t GetTotalScoreComponents() const
+ {
+ return m_scoreIndexManager.GetTotalNumberOfScores();
+ }
+ const ScoreIndexManager& GetScoreIndexManager() const
+ {
+ return m_scoreIndexManager;
+ }
+
+ size_t GetLMSize() const
+ {
+ return m_languageModel.size();
+ }
+ const LMList &GetAllLM() const
+ {
+ return m_languageModel;
+ }
+ size_t GetPhraseDictionarySize() const
+ {
+ return m_phraseDictionary.size();
+ }
+ const std::vector<PhraseDictionaryFeature*> &GetPhraseDictionaries() const
+ {
+ return m_phraseDictionary;
+ }
+ const std::vector<GenerationDictionary*> &GetGenerationDictionaries() const
+ {
+ return m_generationDictionary;
+ }
+ size_t GetGenerationDictionarySize() const
+ {
+ return m_generationDictionary.size();
+ }
+ size_t GetVerboseLevel() const
+ {
+ return m_verboseLevel;
+ }
+ void SetVerboseLevel(int x) const { m_verboseLevel = x; }
+ bool GetReportSegmentation() const
+ {
+ return m_reportSegmentation;
+ }
+ bool GetReportAllFactors() const
+ {
+ return m_reportAllFactors;
+ }
+ bool GetReportAllFactorsNBest() const
+ {
+ return m_reportAllFactorsNBest;
+ }
+ bool IsDetailedTranslationReportingEnabled() const
+ {
+ return m_isDetailedTranslationReportingEnabled;
+ }
+
+ bool IsLabeledNBestList() const
+ {
+ return m_labeledNBestList;
+ }
+ bool NBestIncludesAlignment() const
+ {
+ return m_nBestIncludesAlignment;
+ }
+ size_t GetNumLinkParams() const
+ {
+ return m_numLinkParams;
+ }
+ const std::vector<std::string> &GetDescription() const
+ {
+ return m_parameter->GetParam("description");
+ }
+
+ // for mert
+ size_t GetNBestSize() const
+ {
+ return m_nBestSize;
+ }
+ const std::string &GetNBestFilePath() const
+ {
+ return m_nBestFilePath;
+ }
+ bool IsNBestEnabled() const {
+ return (!m_nBestFilePath.empty()) || m_mbr || m_useLatticeMBR || m_outputSearchGraph
+#ifdef HAVE_PROTOBUF
+ || m_outputSearchGraphPB
+#endif
+ ;
+ }
+ size_t GetNBestFactor() const
+ {
+ return m_nBestFactor;
+ }
+ bool GetOutputWordGraph() const
+ { return m_outputWordGraph; }
+
+ //! Sets the global score vector weights for a given ScoreProducer.
+ void SetWeightsForScoreProducer(const ScoreProducer* sp, const std::vector<float>& weights);
+ InputTypeEnum GetInputType() const {return m_inputType;}
+ SearchAlgorithm GetSearchAlgorithm() const {return m_searchAlgorithm;}
+ size_t GetNumInputScores() const {return m_numInputScores;}
+ void InitializeBeforeSentenceProcessing(InputType const&) const;
+ void CleanUpAfterSentenceProcessing() const;
+
+ const std::vector<float>& GetAllWeights() const
+ {
+ return m_allWeights;
+ }
+ const DistortionScoreProducer *GetDistortionScoreProducer() const { return m_distortionScoreProducer; }
+ const WordPenaltyProducer *GetWordPenaltyProducer() const { return m_wpProducer; }
+ const UnknownWordPenaltyProducer *GetUnknownWordPenaltyProducer() const { return m_unknownWordPenaltyProducer; }
+
+ bool UseAlignmentInfo() const { return m_UseAlignmentInfo;}
+ void UseAlignmentInfo(bool a){ m_UseAlignmentInfo=a; };
+ bool PrintAlignmentInfo() const { return m_PrintAlignmentInfo; }
+ bool PrintAlignmentInfoInNbest() const {return m_PrintAlignmentInfoNbest;}
+ bool GetDistinctNBest() const {return m_onlyDistinctNBest;}
+ const std::string& GetFactorDelimiter() const {return m_factorDelimiter;}
+ size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; }
+ size_t GetMaxNumFactors() const { return m_maxNumFactors; }
+ bool UseMBR() const { return m_mbr; }
+ bool UseLatticeMBR() const { return m_useLatticeMBR ;}
+ void SetUseLatticeMBR(bool flag) {m_useLatticeMBR = flag; }
+ size_t GetMBRSize() const { return m_mbrSize; }
+ float GetMBRScale() const { return m_mbrScale; }
+ void SetMBRScale(float scale) {
+ m_mbrScale = scale;
+ }
+ size_t GetLatticeMBRPruningFactor() const { return m_lmbrPruning; }
+ void SetLatticeMBRPruningFactor(size_t prune) {
+ m_lmbrPruning = prune;
+ }
+ const std::vector<float>& GetLatticeMBRThetas() const {return m_lmbrThetas;}
+ bool UseLatticeHypSetForLatticeMBR() const { return m_useLatticeHypSetForLatticeMBR;}
+ float GetLatticeMBRPrecision() const {
+ return m_lmbrPrecision;
+ }
+ void SetLatticeMBRPrecision(float p) {
+ m_lmbrPrecision = p;
+ }
+ float GetLatticeMBRPRatio() const {
+ return m_lmbrPRatio;
+ }
+ void SetLatticeMBRPRatio(float r) {
+ m_lmbrPRatio = r;
+ }
+
+ bool UseTimeout() const { return m_timeout; }
+ size_t GetTimeoutThreshold() const { return m_timeout_threshold; }
+
+ bool GetOutputSearchGraph() const { return m_outputSearchGraph; }
+ bool GetOutputSearchGraphExtended() const { return m_outputSearchGraphExtended; }
+#ifdef HAVE_PROTOBUF
+ bool GetOutputSearchGraphPB() const { return m_outputSearchGraphPB; }
+#endif
+
+ XmlInputType GetXmlInputType() const { return m_xmlInputType; }
+
+ bool GetUseTransOptCache() const { return m_useTransOptCache; }
+
+ void AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const;
+
+
+ const TranslationOptionList* FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const;
+
+ bool PrintAllDerivations() const { return m_printAllDerivations;}
+};
+
+}
+#endif
diff --git a/moses/src/TargetPhrase.cpp b/moses/src/TargetPhrase.cpp
new file mode 100644
index 000000000..222de9bca
--- /dev/null
+++ b/moses/src/TargetPhrase.cpp
@@ -0,0 +1,224 @@
+// $Id: TargetPhrase.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cassert>
+#include <algorithm>
+#include "TargetPhrase.h"
+#include "PhraseDictionaryMemory.h"
+#include "GenerationDictionary.h"
+#include "LanguageModel.h"
+#include "StaticData.h"
+#include "ScoreIndexManager.h"
+#include "LMList.h"
+#include "ScoreComponentCollection.h"
+#include "Util.h"
+
+using namespace std;
+
+namespace Moses
+{
+bool TargetPhrase::wordalignflag=StaticData::Instance().UseAlignmentInfo();
+bool TargetPhrase::printalign=StaticData::Instance().PrintAlignmentInfo();
+
+//bool TargetPhrase::wordalignflag;
+//bool TargetPhrase::printalign;
+
+TargetPhrase::TargetPhrase(FactorDirection direction)
+ :Phrase(direction),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
+{
+ wordalignflag=StaticData::Instance().UseAlignmentInfo();
+ printalign=StaticData::Instance().PrintAlignmentInfo();
+}
+
+void TargetPhrase::SetScore()
+{ // used when creating translations of unknown words:
+ m_transScore = m_ngramScore = 0;
+ m_fullScore = - StaticData::Instance().GetWeightWordPenalty();
+}
+
+#ifdef HAVE_PROTOBUF
+void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const {
+ pb->add_trg_words("[X,1]");
+ for (size_t pos = 0 ; pos < GetSize() ; pos++)
+ pb->add_trg_words(GetWord(pos)[0]->GetString());
+}
+#endif
+
+
+
+void TargetPhrase::SetScore(float score)
+{
+ //we use an existing score producer to figure out information for score setting (number of scores and weights)
+ //TODO: is this a good idea?
+ ScoreProducer* prod = StaticData::Instance().GetPhraseDictionaries()[0];
+
+ //get the weight list
+ unsigned int id = prod->GetScoreBookkeepingID();
+
+ const vector<float> &allWeights = StaticData::Instance().GetAllWeights();
+
+ size_t beginIndex = StaticData::Instance().GetScoreIndexManager().GetBeginIndex(id);
+ size_t endIndex = StaticData::Instance().GetScoreIndexManager().GetEndIndex(id);
+
+ vector<float> weights;
+
+ std::copy(allWeights.begin() +beginIndex, allWeights.begin() + endIndex,std::back_inserter(weights));
+
+ //find out how many items are in the score vector for this producer
+ size_t numScores = prod->GetNumScoreComponents();
+
+ //divide up the score among all of the score vectors
+ vector <float> scoreVector(numScores,score/numScores);
+
+ //Now we have what we need to call the full SetScore method
+ SetScore(prod,scoreVector,weights,StaticData::Instance().GetWeightWordPenalty(),StaticData::Instance().GetAllLM());
+}
+
+/**
+ * used for setting scores for unknown words with input link features (lattice/conf. nets)
+ * \param scoreVector input scores
+ */
+void TargetPhrase::SetScore(const Scores &scoreVector)
+{
+ //we use an existing score producer to figure out information for score setting (number of scores and weights)
+ ScoreProducer* prod = StaticData::Instance().GetPhraseDictionaries()[0];
+
+ //get the weight list
+ unsigned int id = prod->GetScoreBookkeepingID();
+ const vector<float> &allWeights = StaticData::Instance().GetAllWeights();
+ size_t beginIndex = StaticData::Instance().GetScoreIndexManager().GetBeginIndex(id);
+ size_t endIndex = StaticData::Instance().GetScoreIndexManager().GetEndIndex(id);
+ vector<float> weights;
+ std::copy(allWeights.begin() +beginIndex, allWeights.begin() + endIndex,std::back_inserter(weights));
+
+ //expand the input weight vector
+ assert(scoreVector.size() <= prod->GetNumScoreComponents());
+ Scores sizedScoreVector = scoreVector;
+ sizedScoreVector.resize(prod->GetNumScoreComponents(),0.0f);
+
+ SetScore(prod,sizedScoreVector,weights,StaticData::Instance().GetWeightWordPenalty(),StaticData::Instance().GetAllLM());
+}
+
+void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
+ const Scores &scoreVector,
+ const vector<float> &weightT,
+ float weightWP, const LMList &languageModels)
+{
+ assert(weightT.size() == scoreVector.size());
+ // calc average score if non-best
+
+ m_transScore = std::inner_product(scoreVector.begin(), scoreVector.end(), weightT.begin(), 0.0f);
+ m_scoreBreakdown.PlusEquals(translationScoreProducer, scoreVector);
+
+ // Replicated from TranslationOptions.cpp
+ float totalFutureScore = 0;
+ float totalNgramScore = 0;
+ float totalFullScore = 0;
+
+ LMList::const_iterator lmIter;
+ for (lmIter = languageModels.begin(); lmIter != languageModels.end(); ++lmIter)
+ {
+ const LanguageModel &lm = **lmIter;
+
+ if (lm.Useable(*this))
+ { // contains factors used by this LM
+ const float weightLM = lm.GetWeight();
+ float fullScore, nGramScore;
+
+ lm.CalcScore(*this, fullScore, nGramScore);
+ m_scoreBreakdown.Assign(&lm, nGramScore);
+
+ // total LM score so far
+ totalNgramScore += nGramScore * weightLM;
+ totalFullScore += fullScore * weightLM;
+
+ }
+ }
+ m_ngramScore = totalNgramScore;
+
+ m_fullScore = m_transScore + totalFutureScore + totalFullScore
+ - (this->GetSize() * weightWP); // word penalty
+}
+
+void TargetPhrase::SetWeights(const ScoreProducer* translationScoreProducer, const vector<float> &weightT)
+{
+ // calling this function in case of confusion net input is undefined
+ assert(StaticData::Instance().GetInputType()==SentenceInput);
+
+ /* one way to fix this, you have to make sure the weightT contains (in
+ addition to the usual phrase translation scaling factors) the input
+ weight factor as last element
+ */
+
+ m_transScore = m_scoreBreakdown.PartialInnerProduct(translationScoreProducer, weightT);
+}
+
+void TargetPhrase::ResetScore()
+{
+ m_fullScore = m_ngramScore = 0;
+ m_scoreBreakdown.ZeroAll();
+}
+
+TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
+{
+ if (! IsCompatible(inputPhrase))
+ {
+ return NULL;
+ }
+
+ // ok, merge
+ TargetPhrase *clone = new TargetPhrase(*this);
+ clone->m_sourcePhrase = m_sourcePhrase;
+ int currWord = 0;
+ const size_t len = GetSize();
+ for (size_t currPos = 0 ; currPos < len ; currPos++)
+ {
+ const Word &inputWord = inputPhrase.GetWord(currPos);
+ Word &cloneWord = clone->GetWord(currPos);
+ cloneWord.Merge(inputWord);
+
+ currWord++;
+ }
+
+ return clone;
+}
+
+
+
+
+
+
+
+
+
+
+TO_STRING_BODY(TargetPhrase);
+
+std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
+{
+ os << static_cast<const Phrase&>(tp);
+ os << ", pC=" << tp.m_transScore << ", c=" << tp.m_fullScore;
+
+ return os;
+}
+
+}
+
diff --git a/moses/src/TargetPhrase.h b/moses/src/TargetPhrase.h
new file mode 100644
index 000000000..691e7a562
--- /dev/null
+++ b/moses/src/TargetPhrase.h
@@ -0,0 +1,161 @@
+// $Id: TargetPhrase.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TargetPhrase_h
+#define moses_TargetPhrase_h
+
+#include <vector>
+#include "TypeDef.h"
+#include "Phrase.h"
+#include "ScoreComponentCollection.h"
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+#ifdef HAVE_PROTOBUF
+#include "rule.pb.h"
+#endif
+
+namespace Moses
+{
+
+class LMList;
+class PhraseDictionary;
+class GenerationDictionary;
+class ScoreProducer;
+
+/** represents an entry on the target side of a phrase table (scores, translation, alignment)
+ */
+class TargetPhrase: public Phrase
+{
+ friend std::ostream& operator<<(std::ostream&, const TargetPhrase&);
+protected:
+ float m_transScore, m_ngramScore, m_fullScore;
+ //float m_ngramScore, m_fullScore;
+ ScoreComponentCollection m_scoreBreakdown;
+
+ // in case of confusion net, ptr to source phrase
+ Phrase const* m_sourcePhrase;
+
+ static bool wordalignflag;
+ static bool printalign;
+
+public:
+ TargetPhrase(FactorDirection direction=Output);
+ ~TargetPhrase(){};
+
+ /** used by the unknown word handler.
+ * Set alignment to 0
+ */
+ void SetAlignment();
+
+ //! used by the unknown word handler- these targets
+ //! don't have a translation score, so wp is the only thing used
+ void SetScore();
+
+ //!Set score for Sentence XML target options
+ void SetScore(float score);
+
+ //! Set score for unknown words with input weights
+ void SetScore(const Scores &scoreVector);
+
+
+ /*** Called immediately after creation to initialize scores.
+ *
+ * @param translationScoreProducer The PhraseDictionaryMemory that this TargetPhrase is contained by.
+ * Used to identify where the scores for this phrase belong in the list of all scores.
+ * @param scoreVector the vector of scores (log probs) associated with this translation
+ * @param weighT the weights for the individual scores (t-weights in the .ini file)
+ * @param languageModels all the LanguageModels that should be used to compute the LM scores
+ * @param weightWP the weight of the word penalty
+ *
+ * @TODO should this be part of the constructor? If not, add explanation why not.
+ */
+ void SetScore(const ScoreProducer* translationScoreProducer,
+ const Scores &scoreVector,
+ const std::vector<float> &weightT,
+ float weightWP,
+ const LMList &languageModels);
+
+
+ // used when creating translations of unknown words:
+ void ResetScore();
+ void SetWeights(const ScoreProducer*, const std::vector<float> &weightT);
+
+ TargetPhrase *MergeNext(const TargetPhrase &targetPhrase) const;
+ // used for translation step
+
+#ifdef HAVE_PROTOBUF
+ void WriteToRulePB(hgmert::Rule* pb) const;
+#endif
+
+/* inline float GetTranslationScore() const
+ {
+ return m_transScore;
+ }*/
+ /***
+ * return the estimated score resulting from our being added to a sentence
+ * (it's an estimate because we don't have full n-gram info for the language model
+ * without using the (unknown) full sentence)
+ *
+ */
+ inline float GetFutureScore() const
+ {
+ return m_fullScore;
+ }
+ inline const ScoreComponentCollection &GetScoreBreakdown() const
+ {
+ return m_scoreBreakdown;
+ }
+
+ //! TODO - why is this needed and is it set correctly by every phrase dictionary class ? should be set in constructor
+ void SetSourcePhrase(Phrase const* p)
+ {
+ m_sourcePhrase=p;
+ }
+ Phrase const* GetSourcePhrase() const
+ {
+ return m_sourcePhrase;
+ }
+
+
+
+
+ void UseWordAlignment(bool a){
+ wordalignflag=a;
+ };
+ bool UseWordAlignment() const {
+ return wordalignflag;
+ };
+ void PrintAlignmentInfo(bool a) {
+ printalign=a;
+ }
+ bool PrintAlignmentInfo() const {
+ return printalign;
+ }
+
+ TO_STRING();
+};
+
+std::ostream& operator<<(std::ostream&, const TargetPhrase&);
+
+}
+
+#endif
diff --git a/moses/src/TargetPhraseCollection.cpp b/moses/src/TargetPhraseCollection.cpp
new file mode 100644
index 000000000..7d6cf1c39
--- /dev/null
+++ b/moses/src/TargetPhraseCollection.cpp
@@ -0,0 +1,49 @@
+// $Id: TargetPhraseCollection.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <algorithm>
+#include "TargetPhraseCollection.h"
+
+using namespace std;
+
+namespace Moses
+{
+// helper for sort
+struct CompareTargetPhrase
+{
+ bool operator() (const TargetPhrase *a, const TargetPhrase *b)
+ {
+ return a->GetFutureScore() > b->GetFutureScore();
+ }
+};
+
+void TargetPhraseCollection::NthElement(size_t tableLimit)
+{
+ vector<TargetPhrase*>::iterator
+ iterMiddle = (tableLimit == 0 || m_collection.size() < tableLimit) ?m_collection.end() : m_collection.begin() + tableLimit;
+
+ //std::sort(m_collection.begin(), m_collection.end(), CompareTargetPhrase());
+ std::nth_element(m_collection.begin(), iterMiddle, m_collection.end(), CompareTargetPhrase());
+}
+
+}
+
+
diff --git a/moses/src/TargetPhraseCollection.h b/moses/src/TargetPhraseCollection.h
new file mode 100644
index 000000000..5b1a7fd10
--- /dev/null
+++ b/moses/src/TargetPhraseCollection.h
@@ -0,0 +1,76 @@
+// $Id: TargetPhraseCollection.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TargetPhraseCollection_h
+#define moses_TargetPhraseCollection_h
+
+#include <vector>
+#include "TargetPhrase.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+//! a list of target phrases that is translated from the same source phrase
+class TargetPhraseCollection
+{
+protected:
+ std::vector<TargetPhrase*> m_collection;
+
+public:
+ // iters
+ typedef std::vector<TargetPhrase*>::iterator iterator;
+ typedef std::vector<TargetPhrase*>::const_iterator const_iterator;
+
+ iterator begin() { return m_collection.begin(); }
+ iterator end() { return m_collection.end(); }
+ const_iterator begin() const { return m_collection.begin(); }
+ const_iterator end() const { return m_collection.end(); }
+
+ ~TargetPhraseCollection()
+ {
+ RemoveAllInColl(m_collection);
+ }
+
+ //! divide collection into 2 buckets using std::nth_element, the top & bottom according to table limit
+ void NthElement(size_t tableLimit);
+
+ //! number of target phrases in this collection
+ size_t GetSize() const
+ {
+ return m_collection.size();
+ }
+ //! wether collection has any phrases
+ bool IsEmpty() const
+ {
+ return m_collection.empty();
+ }
+ //! add a new entry into collection
+ void Add(TargetPhrase *targetPhrase)
+ {
+ m_collection.push_back(targetPhrase);
+ }
+
+};
+
+}
+
+#endif
diff --git a/moses/src/Timer.cpp b/moses/src/Timer.cpp
new file mode 100644
index 000000000..ba2f41faa
--- /dev/null
+++ b/moses/src/Timer.cpp
@@ -0,0 +1,113 @@
+#include <ctime>
+#include <iostream>
+#include <iomanip>
+#include "Util.h"
+#include "Timer.h"
+
+namespace Moses
+{
+
+/***
+ * Return the total time that the timer has been in the "running"
+ * state since it was first "started" or last "restarted". For
+ * "short" time periods (less than an hour), the actual cpu time
+ * used is reported instead of the elapsed time.
+ */
+double Timer::elapsed_time()
+{
+ time_t now;
+ time(&now);
+ return difftime(now, start_time);
+}
+
+/***
+ * Return the total time that the timer has been in the "running"
+ * state since it was first "started" or last "restarted". For
+ * "short" time periods (less than an hour), the actual cpu time
+ * used is reported instead of the elapsed time.
+ * This function is the public version of elapsed_time()
+ */
+double Timer::get_elapsed_time()
+{
+ return elapsed_time();
+}
+
+/***
+ * Start a timer. If it is already running, let it continue running.
+ * Print an optional message.
+ */
+void Timer::start(const char* msg)
+{
+ // Print an optional message, something like "Starting timer t";
+ if (msg) TRACE_ERR( msg << std::endl);
+
+ // Return immediately if the timer is already running
+ if (running) return;
+
+ // Change timer status to running
+ running = true;
+
+ // Set the start time;
+ time(&start_time);
+}
+
+/***
+ * Turn the timer off and start it again from 0. Print an optional message.
+ */
+/*
+inline void Timer::restart(const char* msg)
+{
+ // Print an optional message, something like "Restarting timer t";
+ if (msg) TRACE_ERR( msg << std::endl;
+
+ // Set the timer status to running
+ running = true;
+
+ // Set the accumulated time to 0 and the start time to now
+ acc_time = 0;
+ start_clock = clock();
+ start_time = time(0);
+}
+*/
+
+/***
+ * Stop the timer and print an optional message.
+ */
+/*
+inline void Timer::stop(const char* msg)
+{
+ // Print an optional message, something like "Stopping timer t";
+ check(msg);
+
+ // Recalculate and store the total accumulated time up until now
+ if (running) acc_time += elapsed_time();
+
+ running = false;
+}
+*/
+/***
+ * Print out an optional message followed by the current timer timing.
+ */
+void Timer::check(const char* msg)
+{
+ // Print an optional message, something like "Checking timer t";
+ if (msg) TRACE_ERR( msg << " : ");
+
+// TRACE_ERR( "[" << std::setiosflags(std::ios::fixed) << std::setprecision(2) << (running ? elapsed_time() : 0) << "] seconds\n");
+ TRACE_ERR( "[" << (running ? elapsed_time() : 0) << "] seconds\n");
+}
+
+/***
+ * Allow timers to be printed to ostreams using the syntax 'os << t'
+ * for an ostream 'os' and a timer 't'. For example, "cout << t" will
+ * print out the total amount of time 't' has been "running".
+ */
+std::ostream& operator<<(std::ostream& os, Timer& t)
+{
+ //os << std::setprecision(2) << std::setiosflags(std::ios::fixed) << (t.running ? t.elapsed_time() : 0);
+ os << (t.running ? t.elapsed_time() : 0);
+ return os;
+}
+
+}
+
diff --git a/moses/src/Timer.h b/moses/src/Timer.h
new file mode 100644
index 000000000..91749b885
--- /dev/null
+++ b/moses/src/Timer.h
@@ -0,0 +1,40 @@
+#ifndef moses_Time_H
+#define moses_Time_H
+
+#include <ctime>
+#include <iostream>
+#include <iomanip>
+#include "Util.h"
+
+namespace Moses
+{
+
+class Timer
+{
+ friend std::ostream& operator<<(std::ostream& os, Timer& t);
+
+ private:
+ bool running;
+ time_t start_time;
+
+ //TODO in seconds?
+ double elapsed_time();
+
+ public:
+ /***
+ * 'running' is initially false. A timer needs to be explicitly started
+ * using 'start' or 'restart'
+ */
+ Timer() : running(false), start_time(0) { }
+
+ void start(const char* msg = 0);
+// void restart(const char* msg = 0);
+// void stop(const char* msg = 0);
+ void check(const char* msg = 0);
+ double get_elapsed_time();
+
+};
+
+}
+
+#endif
diff --git a/moses/src/TranslationOption.cpp b/moses/src/TranslationOption.cpp
new file mode 100644
index 000000000..ec7f89582
--- /dev/null
+++ b/moses/src/TranslationOption.cpp
@@ -0,0 +1,175 @@
+// $Id: TranslationOption.cpp 2081 2009-02-05 17:37:09Z jdschroeder $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "TranslationOption.h"
+#include "WordsBitmap.h"
+#include "PhraseDictionaryMemory.h"
+#include "GenerationDictionary.h"
+#include "LMList.h"
+#include "StaticData.h"
+#include "InputType.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+//TODO this should be a factory function!
+TranslationOption::TranslationOption(const WordsRange &wordsRange
+ , const TargetPhrase &targetPhrase
+ , const InputType &inputType)
+: m_targetPhrase(targetPhrase)
+, m_sourceWordsRange(wordsRange)
+{
+ // set score
+ m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
+
+ if (inputType.GetType() == SentenceInput)
+ {
+ Phrase phrase = inputType.GetSubString(wordsRange);
+ m_sourcePhrase = new Phrase(phrase);
+ }
+ else
+ { // TODO lex reordering with confusion network
+ m_sourcePhrase = new Phrase(*targetPhrase.GetSourcePhrase());
+ }
+}
+
+//TODO this should be a factory function!
+TranslationOption::TranslationOption(const WordsRange &wordsRange
+ , const TargetPhrase &targetPhrase
+ , const InputType &inputType
+ , int /*whatever*/)
+: m_targetPhrase(targetPhrase)
+, m_sourceWordsRange (wordsRange)
+, m_futureScore(0)
+{
+ const UnknownWordPenaltyProducer *up = StaticData::Instance().GetUnknownWordPenaltyProducer();
+ if (up) {
+ const ScoreProducer *scoreProducer = (const ScoreProducer *)up; // not sure why none of the c++ cast works
+ vector<float> score(1);
+ score[0] = FloorScore(-numeric_limits<float>::infinity());
+ m_scoreBreakdown.Assign(scoreProducer, score);
+ }
+
+ if (inputType.GetType() == SentenceInput)
+ {
+ Phrase phrase = inputType.GetSubString(wordsRange);
+ m_sourcePhrase = new Phrase(phrase);
+ }
+ else
+ { // TODO lex reordering with confusion network
+ m_sourcePhrase = new Phrase(*targetPhrase.GetSourcePhrase());
+ //the target phrase from a confusion network/lattice has input scores that we want to keep
+ m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
+
+ }
+}
+
+TranslationOption::TranslationOption(const TranslationOption &copy)
+: m_targetPhrase(copy.m_targetPhrase)
+//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
+, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input) : new Phrase(*copy.m_sourcePhrase))
+, m_sourceWordsRange(copy.m_sourceWordsRange)
+, m_futureScore(copy.m_futureScore)
+, m_scoreBreakdown(copy.m_scoreBreakdown)
+, m_reordering(copy.m_reordering)
+{}
+
+TranslationOption::TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange)
+: m_targetPhrase(copy.m_targetPhrase)
+//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
+, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input) : new Phrase(*copy.m_sourcePhrase))
+, m_sourceWordsRange(sourceWordsRange)
+, m_futureScore(copy.m_futureScore)
+, m_scoreBreakdown(copy.m_scoreBreakdown)
+, m_reordering(copy.m_reordering)
+{}
+
+void TranslationOption::MergeNewFeatures(const Phrase& phrase, const ScoreComponentCollection& score, const std::vector<FactorType>& featuresToAdd)
+{
+ assert(phrase.GetSize() == m_targetPhrase.GetSize());
+ if (featuresToAdd.size() == 1) {
+ m_targetPhrase.MergeFactors(phrase, featuresToAdd[0]);
+ } else if (featuresToAdd.empty()) {
+ /* features already there, just update score */
+ } else {
+ m_targetPhrase.MergeFactors(phrase, featuresToAdd);
+ }
+ m_scoreBreakdown.PlusEquals(score);
+}
+
+bool TranslationOption::IsCompatible(const Phrase& phrase, const std::vector<FactorType>& featuresToCheck) const
+{
+ if (featuresToCheck.size() == 1) {
+ return m_targetPhrase.IsCompatible(phrase, featuresToCheck[0]);
+ } else if (featuresToCheck.empty()) {
+ return true;
+ /* features already there, just update score */
+ } else {
+ return m_targetPhrase.IsCompatible(phrase, featuresToCheck);
+ }
+}
+
+bool TranslationOption::Overlap(const Hypothesis &hypothesis) const
+{
+ const WordsBitmap &bitmap = hypothesis.GetWordsBitmap();
+ return bitmap.Overlap(GetSourceWordsRange());
+}
+
+void TranslationOption::CalcScore()
+{
+ // LM scores
+ float ngramScore = 0;
+ float retFullScore = 0;
+
+ const LMList &allLM = StaticData::Instance().GetAllLM();
+
+ allLM.CalcScore(GetTargetPhrase(), retFullScore, ngramScore, &m_scoreBreakdown);
+
+ size_t phraseSize = GetTargetPhrase().GetSize();
+ // future score
+ m_futureScore = retFullScore - ngramScore
+ + m_scoreBreakdown.InnerProduct(StaticData::Instance().GetAllWeights()) - phraseSize * StaticData::Instance().GetWeightWordPenalty();
+}
+
+TO_STRING_BODY(TranslationOption);
+
+// friend
+ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation)
+{
+ out << possibleTranslation.GetTargetPhrase()
+ << "c=" << possibleTranslation.GetFutureScore()
+ << " [" << possibleTranslation.GetSourceWordsRange() << "]"
+ << possibleTranslation.GetScoreBreakdown();
+ return out;
+}
+
+void TranslationOption::CacheReorderingProb(const LexicalReordering &lexreordering
+ , const Score &score)
+{
+ m_reordering.Assign(&lexreordering, score);
+}
+
+}
+
+
diff --git a/moses/src/TranslationOption.h b/moses/src/TranslationOption.h
new file mode 100644
index 000000000..701cafe26
--- /dev/null
+++ b/moses/src/TranslationOption.h
@@ -0,0 +1,190 @@
+// $Id: TranslationOption.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TranslationOption_h
+#define moses_TranslationOption_h
+
+#include <vector>
+#include "WordsBitmap.h"
+#include "WordsRange.h"
+#include "Phrase.h"
+#include "TargetPhrase.h"
+#include "Hypothesis.h"
+#include "Util.h"
+#include "TypeDef.h"
+#include "ScoreComponentCollection.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+
+class PhraseDictionary;
+class GenerationDictionary;
+
+/** Available phrase translation for a particular sentence pair.
+ * In a multi-factor model, this is expanded from the entries in the
+ * translation tables and generation tables (and pruned to the maximum
+ * number allowed). By pre-computing the allowable phrase translations,
+ * efficient beam search in Manager is possible when expanding instances
+ * of the class Hypothesis - the states in the search.
+ *
+ * A translation option contains source and target phrase, aggregate
+ * and details scores (in m_scoreBreakdown), including an estimate
+ * how expensive this option will be in search (used to build the
+ * future cost matrix.)
+ *
+ * m_targetPhrase points to a phrase-table entry.
+ * The source word range is zero-indexed, so it can't refer to an empty range. The target phrase may be empty.
+ */
+class TranslationOption
+{
+ friend std::ostream& operator<<(std::ostream& out, const TranslationOption& possibleTranslation);
+
+protected:
+
+ TargetPhrase m_targetPhrase; /*< output phrase when using this translation option */
+ Phrase *m_sourcePhrase; /*< input phrase translated by this */
+ const WordsRange m_sourceWordsRange; /*< word position in the input that are covered by this translation option */
+ float m_futureScore; /*< estimate of total cost when using this translation option, includes language model probabilities */
+ std::vector<TranslationOption*> m_linkedTransOpts; /* list of linked TOs which must be included with this in any hypothesis */
+
+ //! in TranslationOption, m_scoreBreakdown is not complete. It cannot,
+ //! for example, know the full n-gram score since the length of the
+ //! TargetPhrase may be shorter than the n-gram order. But, if it is
+ //! possible to estimate, it is included here.
+ ScoreComponentCollection m_scoreBreakdown;
+ ScoreComponentCollection m_reordering;
+
+public:
+ /** constructor. Used by initial translation step */
+ TranslationOption(const WordsRange &wordsRange
+ , const TargetPhrase &targetPhrase
+ , const InputType &inputType);
+ /** constructor. Used to create trans opt from unknown word */
+ TranslationOption(const WordsRange &wordsRange
+ , const TargetPhrase &targetPhrase
+ , const InputType &inputType
+ , int);
+ /** copy constructor */
+ TranslationOption(const TranslationOption &copy);
+
+ /** copy constructor, but change words range. used by caching */
+ TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange);
+
+ ~TranslationOption()
+ {
+ delete m_sourcePhrase;
+ }
+
+ /** returns true if all feature types in featuresToCheck are compatible between the two phrases */
+ bool IsCompatible(const Phrase& phrase, const std::vector<FactorType>& featuresToCheck) const;
+
+ /** used when precomputing (composing) translation options */
+ void MergeNewFeatures(const Phrase& phrase, const ScoreComponentCollection& score, const std::vector<FactorType>& featuresToMerge);
+
+ /** returns target phrase */
+ inline const TargetPhrase &GetTargetPhrase() const
+ {
+ return m_targetPhrase;
+ }
+
+ /** returns source word range */
+ inline const WordsRange &GetSourceWordsRange() const
+ {
+ return m_sourceWordsRange;
+ }
+
+ /** returns source phrase */
+ const Phrase *GetSourcePhrase() const
+ {
+ return m_sourcePhrase;
+ }
+
+ /** returns linked TOs */
+ inline const std::vector<TranslationOption*> &GetLinkedTransOpts() const
+ {
+ return m_linkedTransOpts;
+ }
+
+ /** add link to another TO */
+ inline void AddLinkedTransOpt(TranslationOption* to)
+ {
+ m_linkedTransOpts.push_back(to);
+ }
+
+ /** whether source span overlaps with those of a hypothesis */
+ bool Overlap(const Hypothesis &hypothesis) const;
+
+ /** return start index of source phrase */
+ inline size_t GetStartPos() const
+ {
+ return m_sourceWordsRange.GetStartPos();
+ }
+
+ /** return end index of source phrase */
+ inline size_t GetEndPos() const
+ {
+ return m_sourceWordsRange.GetEndPos();
+ }
+
+ /** return length of source phrase */
+ inline size_t GetSize() const
+ {
+ return m_sourceWordsRange.GetEndPos() - m_sourceWordsRange.GetStartPos() + 1;
+ }
+
+ /** return estimate of total cost of this option */
+ inline float GetFutureScore() const
+ {
+ return m_futureScore;
+ }
+
+ /** return true if the source phrase translates into nothing */
+ inline bool IsDeletionOption() const
+ {
+ return m_targetPhrase.GetSize() == 0;
+ }
+
+ /** returns detailed component scores */
+ inline const ScoreComponentCollection &GetScoreBreakdown() const
+ {
+ return m_scoreBreakdown;
+ }
+ /** returns detailed component scores */
+ inline const ScoreComponentCollection &GetReorderingScore() const
+ {
+ return m_reordering;
+ }
+
+ /** Calculate future score and n-gram score of this trans option, plus the score breakdowns */
+ void CalcScore();
+
+ void CacheReorderingProb(const LexicalReordering &lexreordering
+ , const Score &score);
+
+ TO_STRING();
+};
+
+}
+
+#endif
+
+
diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp
new file mode 100644
index 000000000..7ee51466f
--- /dev/null
+++ b/moses/src/TranslationOptionCollection.cpp
@@ -0,0 +1,655 @@
+// $Id: TranslationOptionCollection.cpp 2477 2009-08-07 16:47:54Z bhaddow $
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <algorithm>
+#include "TranslationOptionCollection.h"
+#include "Sentence.h"
+#include "DecodeStep.h"
+#include "LanguageModel.h"
+#include "PhraseDictionaryMemory.h"
+#include "FactorCollection.h"
+#include "InputType.h"
+#include "Util.h"
+#include "StaticData.h"
+#include "DecodeStepTranslation.h"
+#include "DecodeGraph.h"
+
+using namespace std;
+
+namespace Moses
+{
+/** helper for pruning */
+bool CompareTranslationOption(const TranslationOption *a, const TranslationOption *b)
+{
+ return a->GetFutureScore() > b->GetFutureScore();
+}
+
+/** constructor; since translation options are indexed by coverage span, the corresponding data structure is initialized here
+ * This fn should be called by inherited classes
+*/
+TranslationOptionCollection::TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
+ : m_source(src)
+ ,m_futureScore(src.GetSize())
+ ,m_maxNoTransOptPerCoverage(maxNoTransOptPerCoverage)
+ ,m_translationOptionThreshold(translationOptionThreshold)
+{
+ // create 2-d vector
+ size_t size = src.GetSize();
+ for (size_t startPos = 0 ; startPos < size ; ++startPos)
+ {
+ m_collection.push_back( vector< TranslationOptionList >() );
+
+ size_t maxSize = size - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for (size_t endPos = 0 ; endPos < maxSize ; ++endPos)
+ {
+ m_collection[startPos].push_back( TranslationOptionList() );
+ }
+ }
+}
+
+/** destructor, clears out data structures */
+TranslationOptionCollection::~TranslationOptionCollection()
+{
+ RemoveAllInColl(m_unksrcs);
+}
+
+void TranslationOptionCollection::Prune()
+{
+ // quit, if max size, threshold
+ if (m_maxNoTransOptPerCoverage == 0 && m_translationOptionThreshold == -std::numeric_limits<float>::infinity())
+ return;
+
+ // bookkeeping for how many options used, pruned
+ size_t total = 0;
+ size_t totalPruned = 0;
+
+ // loop through all spans
+ size_t size = m_source.GetSize();
+ for (size_t startPos = 0 ; startPos < size; ++startPos)
+ {
+ size_t maxSize = size - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
+ {
+ // consider list for a span
+ TranslationOptionList &fullList = GetTranslationOptionList(startPos, endPos);
+ total += fullList.size();
+
+ // size pruning
+ if (m_maxNoTransOptPerCoverage > 0 &&
+ fullList.size() > m_maxNoTransOptPerCoverage)
+ {
+ // sort in vector
+ nth_element(fullList.begin(), fullList.begin() + m_maxNoTransOptPerCoverage, fullList.end(), CompareTranslationOption);
+ totalPruned += fullList.size() - m_maxNoTransOptPerCoverage;
+
+ // delete the rest
+ for (size_t i = m_maxNoTransOptPerCoverage ; i < fullList.size() ; ++i)
+ {
+ delete fullList.Get(i);
+ }
+ fullList.resize(m_maxNoTransOptPerCoverage);
+ }
+
+ // threshold pruning
+ if (fullList.size() > 1 && m_translationOptionThreshold != -std::numeric_limits<float>::infinity())
+ {
+ // first, find the best score
+ float bestScore = -std::numeric_limits<float>::infinity();
+ for (size_t i=0; i < fullList.size() ; ++i)
+ {
+ if (fullList.Get(i)->GetFutureScore() > bestScore)
+ bestScore = fullList.Get(i)->GetFutureScore();
+ }
+ //std::cerr << "best score for span " << startPos << "-" << endPos << " is " << bestScore << "\n";
+ // then, remove items that are worse than best score + threshold
+ for (size_t i=0; i < fullList.size() ; ++i)
+ {
+ if (fullList.Get(i)->GetFutureScore() < bestScore + m_translationOptionThreshold)
+ {
+ //std::cerr << "\tremoving item " << i << ", score " << fullList.Get(i)->GetFutureScore() << ": " << fullList.Get(i)->GetTargetPhrase() << "\n";
+ delete fullList.Get(i);
+ fullList.Remove(i);
+ total--;
+ totalPruned++;
+ i--;
+ }
+ //else
+ //{
+ // std::cerr << "\tkeeping item " << i << ", score " << fullList.Get(i)->GetFutureScore() << ": " << fullList.Get(i)->GetTargetPhrase() << "\n";
+ //}
+ }
+ } // end of threshold pruning
+ }
+ } // end of loop through all spans
+
+ VERBOSE(2," Total translation options: " << total << std::endl
+ << "Total translation options pruned: " << totalPruned << std::endl);
+}
+
+/** Force a creation of a translation option where there are none for a particular source position.
+* ie. where a source word has not been translated, create a translation option by
+* 1. not observing the table limits on phrase/generation tables
+* 2. using the handler ProcessUnknownWord()
+* Call this function once translation option collection has been filled with translation options
+*
+* This function calls for unknown words is complicated by the fact it must handle different input types.
+* The call stack is
+* Base::ProcessUnknownWord()
+* Inherited::ProcessUnknownWord(position)
+* Base::ProcessOneUnknownWord()
+*
+* \param decodeStepList list of decoding steps
+* \param factorCollection input sentence with all factors
+*/
+
+void TranslationOptionCollection::ProcessUnknownWord(const std::vector <DecodeGraph*> &decodeStepVL)
+{
+ size_t size = m_source.GetSize();
+ // try to translation for coverage with no trans by expanding table limit
+ for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++)
+ {
+ const DecodeGraph &decodeStepList = *decodeStepVL[startVL];
+ for (size_t pos = 0 ; pos < size ; ++pos)
+ {
+ TranslationOptionList &fullList = GetTranslationOptionList(pos, pos);
+ size_t numTransOpt = fullList.size();
+ if (numTransOpt == 0)
+ {
+ CreateTranslationOptionsForRange(decodeStepList
+ , pos, pos, false);
+ }
+ }
+ }
+
+ bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
+ // create unknown words for 1 word coverage where we don't have any trans options
+ for (size_t pos = 0 ; pos < size ; ++pos)
+ {
+ TranslationOptionList &fullList = GetTranslationOptionList(pos, pos);
+ if (fullList.size() == 0 || alwaysCreateDirectTranslationOption)
+ ProcessUnknownWord(pos);
+ }
+}
+
+/** special handling of ONE unknown words. Either add temporarily add word to translation table,
+ * or drop the translation.
+ * This function should be called by the ProcessOneUnknownWord() in the inherited class
+ * At the moment, this unknown word handler is a bit of a hack, if copies over each factor from source
+ * to target word, or uses the 'UNK' factor.
+ * Ideally, this function should be in a class which can be expanded upon, for example,
+ * to create a morphologically aware handler.
+ *
+ * \param sourceWord the unknown word
+ * \param sourcePos
+ * \param length length covered by this word (may be > 1 for lattice input)
+ * \param inputScores a set of scores associated with unknown word (input scores from latties/CNs)
+ */
+void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,size_t sourcePos, size_t length, const Scores *inputScores)
+
+{
+ // unknown word, add as trans opt
+ FactorCollection &factorCollection = FactorCollection::Instance();
+
+ size_t isDigit = 0;
+
+ const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
+ const string &s = f->GetString();
+ bool isEpsilon = (s=="" || s==EPSILON);
+ if (StaticData::Instance().GetDropUnknown())
+ {
+
+
+ isDigit = s.find_first_of("0123456789");
+ if (isDigit == string::npos)
+ isDigit = 0;
+ else
+ isDigit = 1;
+ // modify the starting bitmap
+ }
+
+ Phrase* m_unksrc = new Phrase(Input); m_unksrc->AddWord() = sourceWord;
+ m_unksrcs.push_back(m_unksrc);
+
+ TranslationOption *transOpt;
+ TargetPhrase targetPhrase(Output);
+ targetPhrase.SetSourcePhrase(m_unksrc);
+ if (inputScores != NULL) {
+ targetPhrase.SetScore(*inputScores);
+ } else {
+ targetPhrase.SetScore();
+ }
+
+ if (!(StaticData::Instance().GetDropUnknown() || isEpsilon) || isDigit)
+ {
+ // add to dictionary
+
+ Word &targetWord = targetPhrase.AddWord();
+
+ for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+ {
+ FactorType factorType = static_cast<FactorType>(currFactor);
+
+ const Factor *sourceFactor = sourceWord[currFactor];
+ if (sourceFactor == NULL)
+ targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
+ else
+ targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
+ }
+ //create a one-to-one aignment between UNKNOWN_FACTOR and its verbatim translation
+
+
+
+
+
+ }
+ else
+ {
+ // drop source word. create blank trans opt
+
+ //targetPhrase.SetAlignment();
+
+ }
+ transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase, m_source, 0);
+ transOpt->CalcScore();
+ Add(transOpt);
+}
+
+/** compute future score matrix in a dynamic programming fashion.
+ * This matrix used in search.
+ * Call this function once translation option collection has been filled with translation options
+*/
+void TranslationOptionCollection::CalcFutureScore()
+{
+ // setup the matrix (ignore lower triangle, set upper triangle to -inf
+ size_t size = m_source.GetSize(); // the width of the matrix
+
+ for(size_t row=0; row<size; row++) {
+ for(size_t col=row; col<size; col++) {
+ m_futureScore.SetScore(row, col, -numeric_limits<float>::infinity());
+ }
+ }
+
+ // walk all the translation options and record the cheapest option for each span
+ for (size_t startPos = 0 ; startPos < size ; ++startPos)
+ {
+ size_t maxSize = m_source.GetSize() - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
+ {
+ TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
+
+ TranslationOptionList::const_iterator iterTransOpt;
+ for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt)
+ {
+ const TranslationOption &transOpt = **iterTransOpt;
+ float score = transOpt.GetFutureScore();
+ if (score > m_futureScore.GetScore(startPos, endPos))
+ m_futureScore.SetScore(startPos, endPos, score);
+ }
+ }
+ }
+
+ // now fill all the cells in the strictly upper triangle
+ // there is no way to modify the diagonal now, in the case
+ // where no translation option covers a single-word span,
+ // we leave the +inf in the matrix
+ // like in chart parsing we want each cell to contain the highest score
+ // of the full-span trOpt or the sum of scores of joining two smaller spans
+
+ for(size_t colstart = 1; colstart < size ; colstart++) {
+ for(size_t diagshift = 0; diagshift < size-colstart ; diagshift++) {
+ size_t startPos = diagshift;
+ size_t endPos = colstart+diagshift;
+ for(size_t joinAt = startPos; joinAt < endPos ; joinAt++) {
+ float joinedScore = m_futureScore.GetScore(startPos, joinAt)
+ + m_futureScore.GetScore(joinAt+1, endPos);
+ /* // uncomment to see the cell filling scheme
+ TRACE_ERR( "[" <<startPos<<","<<endPos<<"] <-? ["<<startPos<<","<<joinAt<<"]+["<<joinAt+1<<","<<endPos
+ << "] (colstart: "<<colstart<<", diagshift: "<<diagshift<<")"<<endl);
+ */
+ if (joinedScore > m_futureScore.GetScore(startPos, endPos))
+ m_futureScore.SetScore(startPos, endPos, joinedScore);
+ }
+ }
+ }
+
+ IFVERBOSE(3)
+ {
+ int total = 0;
+ for(size_t row=0; row<size; row++)
+ {
+ size_t maxSize = size - row;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for(size_t col=row; col<row+maxSize; col++)
+ {
+ int count = GetTranslationOptionList(row, col).size();
+ TRACE_ERR( "translation options spanning from "
+ << row <<" to "<< col <<" is "
+ << count <<endl);
+ total += count;
+ }
+ }
+ TRACE_ERR( "translation options generated in total: "<< total << endl);
+
+ for(size_t row=0; row<size; row++)
+ for(size_t col=row; col<size; col++)
+ TRACE_ERR( "future cost from "<< row <<" to "<< col <<" is "<< m_futureScore.GetScore(row, col) <<endl);
+ }
+}
+
+
+
+/** Create all possible translations from the phrase tables
+ * for a particular input sentence. This implies applying all
+ * translation and generation steps. Also computes future cost matrix.
+ * \param decodeStepList list of decoding steps
+ * \param factorCollection input sentence with all factors
+ */
+void TranslationOptionCollection::CreateTranslationOptions(const vector <DecodeGraph*> &decodeStepVL)
+{
+ // loop over all substrings of the source sentence, look them up
+ // in the phraseDictionary (which is the- possibly filtered-- phrase
+ // table loaded on initialization), generate TranslationOption objects
+ // for all phrases
+
+ size_t size = m_source.GetSize();
+ for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++)
+ {
+ const DecodeGraph &decodeStepList = *decodeStepVL[startVL];
+ for (size_t startPos = 0 ; startPos < size; startPos++)
+ {
+ size_t maxSize = size - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; endPos++)
+ {
+ CreateTranslationOptionsForRange( decodeStepList, startPos, endPos, true);
+ }
+ }
+ }
+
+ VERBOSE(3,"Translation Option Collection\n " << *this << endl);
+
+ ProcessUnknownWord(decodeStepVL);
+
+ // Prune
+ Prune();
+
+ Sort();
+
+ // future score matrix
+ CalcFutureScore();
+
+ // Cached lex reodering costs
+ CacheLexReordering();
+}
+
+void TranslationOptionCollection::Sort()
+{
+ size_t size = m_source.GetSize();
+ for (size_t startPos = 0 ; startPos < size; ++startPos)
+ {
+ size_t maxSize = size - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize; ++endPos)
+ {
+ TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
+ std::sort(transOptList.begin(), transOptList.end(), CompareTranslationOption);
+ }
+ }
+}
+
+
+/** create translation options that exactly cover a specific input span.
+ * Called by CreateTranslationOptions() and ProcessUnknownWord()
+ * \param decodeStepList list of decoding steps
+ * \param factorCollection input sentence with all factors
+ * \param startPos first position in input sentence
+ * \param lastPos last position in input sentence
+ * \param adhereTableLimit whether phrase & generation table limits are adhered to
+ */
+void TranslationOptionCollection::CreateTranslationOptionsForRange(
+ const DecodeGraph &decodeGraph
+ , size_t startPos
+ , size_t endPos
+ , bool adhereTableLimit)
+{
+ if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
+ {
+ Phrase *sourcePhrase = NULL; // can't initialise with substring, in case it's confusion network
+
+ // consult persistent (cross-sentence) cache for stored translation options
+ bool skipTransOptCreation = false
+ , useCache = StaticData::Instance().GetUseTransOptCache();
+ if (useCache)
+ {
+ const WordsRange wordsRange(startPos, endPos);
+ sourcePhrase = new Phrase(m_source.GetSubString(wordsRange));
+
+ const TranslationOptionList *transOptList = StaticData::Instance().FindTransOptListInCache(decodeGraph, *sourcePhrase);
+ // is phrase in cache?
+ if (transOptList != NULL) {
+ skipTransOptCreation = true;
+ TranslationOptionList::const_iterator iterTransOpt;
+ for (iterTransOpt = transOptList->begin() ; iterTransOpt != transOptList->end() ; ++iterTransOpt)
+ {
+ TranslationOption *transOpt = new TranslationOption(**iterTransOpt, wordsRange);
+ Add(transOpt);
+ }
+ }
+ } // useCache
+
+ if (!skipTransOptCreation)
+ {
+ // partial trans opt stored in here
+ PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
+ size_t totalEarlyPruned = 0;
+
+ // initial translation step
+ list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
+ const DecodeStep &decodeStep = **iterStep;
+
+ static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation
+ (m_source, *oldPtoc
+ , startPos, endPos, adhereTableLimit );
+
+ // do rest of decode steps
+ int indexStep = 0;
+ for (++iterStep ; iterStep != decodeGraph.end() ; ++iterStep)
+ {
+ const DecodeStep &decodeStep = **iterStep;
+ PartialTranslOptColl* newPtoc = new PartialTranslOptColl;
+
+ // go thru each intermediate trans opt just created
+ const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();
+ vector<TranslationOption*>::const_iterator iterPartialTranslOpt;
+ for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt)
+ {
+ TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
+ decodeStep.Process(inputPartialTranslOpt
+ , decodeStep
+ , *newPtoc
+ , this
+ , adhereTableLimit);
+ }
+ // last but 1 partial trans not required anymore
+ totalEarlyPruned += newPtoc->GetPrunedCount();
+ delete oldPtoc;
+ oldPtoc = newPtoc;
+ indexStep++;
+ } // for (++iterStep
+
+ // add to fully formed translation option list
+ PartialTranslOptColl &lastPartialTranslOptColl = *oldPtoc;
+ const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();
+ vector<TranslationOption*>::const_iterator iterColl;
+ for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl)
+ {
+ TranslationOption *transOpt = *iterColl;
+ transOpt->CalcScore();
+ Add(transOpt);
+ }
+
+ // storing translation options in persistent cache (kept across sentences)
+ if (useCache)
+ {
+ if (partTransOptList.size() > 0)
+ {
+ TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
+ StaticData::Instance().AddTransOptListToCache(decodeGraph, *sourcePhrase, transOptList);
+ }
+ }
+
+ lastPartialTranslOptColl.DetachAll();
+ totalEarlyPruned += oldPtoc->GetPrunedCount();
+ delete oldPtoc;
+ // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
+ } // if (!skipTransOptCreation)
+
+ if (useCache)
+ delete sourcePhrase;
+ } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
+
+ if ((StaticData::Instance().GetXmlInputType() != XmlPassThrough) && HasXmlOptionsOverlappingRange(startPos,endPos))
+ {
+ CreateXmlOptionsForRange(startPos, endPos);
+ }
+}
+
+ /** Check if this range overlaps with any XML options. This doesn't need to be an exact match, only an overlap.
+ * by default, we don't support XML options. subclasses need to override this function.
+ * called by CreateTranslationOptionsForRange()
+ * \param startPos first position in input sentence
+ * \param lastPos last position in input sentence
+ * \param adhereTableLimit whether phrase & generation table limits are adhered to
+ */
+ bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const {
+ return false;
+ //not implemented for base class
+ }
+
+ /** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing.
+ * called by CreateTranslationOptionsForRange()
+ * \param startPos first position in input sentence
+ * \param lastPos last position in input sentence
+ */
+ void TranslationOptionCollection::CreateXmlOptionsForRange(size_t, size_t) {
+ //not implemented for base class
+ };
+
+
+
+
+/** add translation option to the list
+ * \param translationOption translation option to be added */
+void TranslationOptionCollection::Add(TranslationOption *translationOption)
+{
+ const WordsRange &coverage = translationOption->GetSourceWordsRange();
+ m_collection[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()].Add(translationOption);
+}
+
+TO_STRING_BODY(TranslationOptionCollection);
+
+inline std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll)
+{
+ size_t size = coll.GetSize();
+ for (size_t startPos = 0 ; startPos < size ; ++startPos)
+ {
+ size_t maxSize = size - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
+ {
+ TranslationOptionList fullList = coll.GetTranslationOptionList(startPos, endPos);
+ size_t sizeFull = fullList.size();
+ for (size_t i = 0; i < sizeFull; i++)
+ {
+ out << *fullList.Get(i) << std::endl;
+ }
+ }
+ }
+
+ //std::vector< std::vector< TranslationOptionList > >::const_iterator i = coll.m_collection.begin();
+ //size_t j = 0;
+ //for (; i!=coll.m_collection.end(); ++i) {
+ //out << "s[" << j++ << "].size=" << i->size() << std::endl;
+ //}
+
+ return out;
+}
+
+void TranslationOptionCollection::CacheLexReordering()
+{
+ const std::vector<LexicalReordering*> &lexReorderingModels = StaticData::Instance().GetReorderModels();
+
+ std::vector<LexicalReordering*>::const_iterator iterLexreordering;
+
+ size_t size = m_source.GetSize();
+ for (iterLexreordering = lexReorderingModels.begin() ; iterLexreordering != lexReorderingModels.end() ; ++iterLexreordering)
+ {
+ LexicalReordering &lexreordering = **iterLexreordering;
+
+ for (size_t startPos = 0 ; startPos < size ; startPos++)
+ {
+ size_t maxSize = size - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize; endPos++)
+ {
+ TranslationOptionList &transOptList = GetTranslationOptionList( startPos, endPos);
+ TranslationOptionList::iterator iterTransOpt;
+ for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt)
+ {
+ TranslationOption &transOpt = **iterTransOpt;
+ //Phrase sourcePhrase = m_source.GetSubString(WordsRange(startPos,endPos));
+ const Phrase *sourcePhrase = transOpt.GetSourcePhrase();
+ if (sourcePhrase)
+ {
+ Score score = lexreordering.GetProb(*sourcePhrase
+ , transOpt.GetTargetPhrase());
+ // TODO should have better handling of unknown reordering entries
+ if (!score.empty())
+ transOpt.CacheReorderingProb(lexreordering, score);
+ }
+ }
+ }
+ }
+ }
+}
+
+}
+
diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h
new file mode 100644
index 000000000..6e19f7eac
--- /dev/null
+++ b/moses/src/TranslationOptionCollection.h
@@ -0,0 +1,154 @@
+// $Id: TranslationOptionCollection.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TranslationOptionCollection_h
+#define moses_TranslationOptionCollection_h
+
+#include <list>
+#include "TypeDef.h"
+#include "TranslationOption.h"
+#include "SquareMatrix.h"
+#include "WordsBitmap.h"
+#include "PartialTranslOptColl.h"
+#include "DecodeStep.h"
+
+namespace Moses
+{
+
+class LanguageModel;
+class FactorCollection;
+class PhraseDictionaryMemory;
+class GenerationDictionary;
+class InputType;
+class LMList;
+class FactorMask;
+class Word;
+
+/** Contains all phrase translations applicable to current input type (a sentence or confusion network).
+ * A key insight into efficient decoding is that various input
+ * conditions (trelliss, factored input, normal text, xml markup)
+ * all lead to the same decoding algorithm: hypotheses are expanded
+ * by applying phrase translations, which can be precomputed.
+ *
+ * The precomputation of a collection of instances of such TranslationOption
+ * depends on the input condition, but they all are presented to
+ * decoding algorithm in the same form, using this class.
+ *
+ * This class cannot, and should not be instantiated directly. Instantiate 1 of the inherited
+ * classes instead, for a particular input type
+ **/
+
+class DecodeGraph;
+
+class TranslationOptionCollection
+{
+ friend std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll);
+ TranslationOptionCollection(const TranslationOptionCollection&); /*< no copy constructor */
+protected:
+ std::vector< std::vector< TranslationOptionList > > m_collection; /*< contains translation options */
+ InputType const &m_source; /*< reference to the input */
+ SquareMatrix m_futureScore; /*< matrix of future costs for contiguous parts (span) of the input */
+ const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span */
+ const float m_translationOptionThreshold; /*< threshold for translation options with regard to best option for input span */
+ std::vector<Phrase*> m_unksrcs;
+
+ TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
+
+ void CalcFutureScore();
+
+ //! Force a creation of a translation option where there are none for a particular source position.
+ void ProcessUnknownWord(const std::vector <DecodeGraph*> &decodeStepVL);
+ //! special handling of ONE unknown words.
+ virtual void ProcessOneUnknownWord(const Word &sourceWord, size_t sourcePos, size_t length = 1, const Scores *inputScores = NULL);
+ //! pruning: only keep the top n (m_maxNoTransOptPerCoverage) elements */
+ void Prune();
+
+ //! sort all trans opt in each list for cube pruning */
+ void Sort();
+
+ //! list of trans opt for a particular span
+ TranslationOptionList &GetTranslationOptionList(size_t startPos, size_t endPos)
+ {
+ size_t maxSize = endPos - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ assert(maxSize < m_collection[startPos].size());
+ return m_collection[startPos][maxSize];
+ }
+ const TranslationOptionList &GetTranslationOptionList(size_t startPos, size_t endPos) const
+ {
+ size_t maxSize = endPos - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ assert(maxSize < m_collection[startPos].size());
+ return m_collection[startPos][maxSize];
+ }
+ void Add(TranslationOption *translationOption);
+
+ //! implemented by inherited class, called by this class
+ virtual void ProcessUnknownWord(size_t sourcePos)=0;
+ void CacheLexReordering();
+
+public:
+ virtual ~TranslationOptionCollection();
+
+ //! input sentence/confusion network
+ const InputType& GetSource() const { return m_source; }
+
+ //! get length/size of source input
+ size_t GetSize() const { return m_source.GetSize(); };
+
+ //! Create all possible translations from the phrase tables
+ virtual void CreateTranslationOptions(const std::vector <DecodeGraph*> &decodeStepVL);
+ //! Create translation options that exactly cover a specific input span.
+ virtual void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
+ , size_t startPosition
+ , size_t endPosition
+ , bool adhereTableLimit);
+
+ //!Check if this range has XML options
+ virtual bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
+
+ //! Create xml-based translation options for the specific input span
+ virtual void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
+
+
+ //! returns future cost matrix for sentence
+ inline virtual const SquareMatrix &GetFutureScore() const
+ {
+ return m_futureScore;
+ }
+
+ //! list of trans opt for a particular span
+ const TranslationOptionList &GetTranslationOptionList(const WordsRange &coverage) const
+ {
+ return GetTranslationOptionList(coverage.GetStartPos(), coverage.GetEndPos());
+ }
+
+ TO_STRING();
+};
+
+}
+
+#endif
+
diff --git a/moses/src/TranslationOptionCollectionConfusionNet.cpp b/moses/src/TranslationOptionCollectionConfusionNet.cpp
new file mode 100644
index 000000000..da9a043aa
--- /dev/null
+++ b/moses/src/TranslationOptionCollectionConfusionNet.cpp
@@ -0,0 +1,38 @@
+// $Id: TranslationOptionCollectionConfusionNet.cpp 2081 2009-02-05 17:37:09Z jdschroeder $
+
+#include "TranslationOptionCollectionConfusionNet.h"
+#include "ConfusionNet.h"
+#include "DecodeStep.h"
+#include "LanguageModel.h"
+#include "PhraseDictionaryMemory.h"
+#include "FactorCollection.h"
+#include "LMList.h"
+
+namespace Moses
+{
+/** constructor; just initialize the base class */
+TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet(
+ const ConfusionNet &input
+ , size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
+: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold) {}
+
+/* forcibly create translation option for a particular source word.
+ * call the base class' ProcessOneUnknownWord() for each possible word in the confusion network
+ * at a particular source position
+*/
+void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(
+ size_t sourcePos)
+{
+ ConfusionNet const& source=dynamic_cast<ConfusionNet const&>(m_source);
+
+ ConfusionNet::Column const& coll=source.GetColumn(sourcePos);
+ size_t j=0;
+ for(ConfusionNet::Column::const_iterator i=coll.begin();i!=coll.end();++i) {
+ ProcessOneUnknownWord(i->first ,sourcePos, source.GetColumnIncrement(sourcePos, j++),&(i->second));
+ }
+
+}
+
+}
+
+
diff --git a/moses/src/TranslationOptionCollectionConfusionNet.h b/moses/src/TranslationOptionCollectionConfusionNet.h
new file mode 100644
index 000000000..03c4d01f6
--- /dev/null
+++ b/moses/src/TranslationOptionCollectionConfusionNet.h
@@ -0,0 +1,21 @@
+// $Id: TranslationOptionCollectionConfusionNet.h 2939 2010-02-24 11:15:44Z jfouet $
+#ifndef moses_TranslationOptionCollectionConfusionNet_h
+#define moses_TranslationOptionCollectionConfusionNet_h
+
+#include "TranslationOptionCollection.h"
+
+namespace Moses
+{
+
+class ConfusionNet;
+
+class TranslationOptionCollectionConfusionNet : public TranslationOptionCollection {
+ public:
+ TranslationOptionCollectionConfusionNet(const ConfusionNet &source, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
+
+ void ProcessUnknownWord( size_t sourcePos);
+
+};
+
+}
+#endif
diff --git a/moses/src/TranslationOptionCollectionText.cpp b/moses/src/TranslationOptionCollectionText.cpp
new file mode 100644
index 000000000..37f0e668b
--- /dev/null
+++ b/moses/src/TranslationOptionCollectionText.cpp
@@ -0,0 +1,77 @@
+// $Id: TranslationOptionCollectionText.cpp 2343 2009-05-26 19:30:35Z phkoehn $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "TranslationOptionCollectionText.h"
+#include "Sentence.h"
+#include "DecodeStep.h"
+#include "LanguageModel.h"
+#include "PhraseDictionaryMemory.h"
+#include "FactorCollection.h"
+#include "WordsRange.h"
+#include "LMList.h"
+
+using namespace std;
+
+namespace Moses
+{
+/** constructor; just initialize the base class */
+TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
+ : TranslationOptionCollection(inputSentence, maxNoTransOptPerCoverage, translationOptionThreshold) {}
+
+/* forcibly create translation option for a particular source word.
+ * For text, this function is easy, just call the base class' ProcessOneUnknownWord()
+*/
+void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos)
+{
+ const Word &sourceWord = m_source.GetWord(sourcePos);
+ ProcessOneUnknownWord(sourceWord,sourcePos);
+}
+
+/**
+ * Check the source sentence for coverage data
+ */
+bool TranslationOptionCollectionText::HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const {
+ Sentence const& source=dynamic_cast<Sentence const&>(m_source);
+ return source.XmlOverlap(startPosition,endPosition);
+}
+
+/**
+ * Create xml-based translation options for the specific input span
+ */
+void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPosition, size_t endPosition) {
+ Sentence const& source=dynamic_cast<Sentence const&>(m_source);
+
+ vector <TranslationOption*> xmlOptions;
+
+ source.GetXmlTranslationOptions(xmlOptions,startPosition,endPosition);
+
+ //get vector of TranslationOptions from Sentence
+ for(size_t i=0;i<xmlOptions.size();i++) {
+ xmlOptions[i]->CalcScore();
+ Add(xmlOptions[i]);
+ }
+
+};
+
+}
+
+
+
diff --git a/moses/src/TranslationOptionCollectionText.h b/moses/src/TranslationOptionCollectionText.h
new file mode 100644
index 000000000..6ac2fc529
--- /dev/null
+++ b/moses/src/TranslationOptionCollectionText.h
@@ -0,0 +1,48 @@
+// $Id: TranslationOptionCollectionText.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TranslationOptionCollectionText_h
+#define moses_TranslationOptionCollectionText_h
+
+#include "TranslationOptionCollection.h"
+
+namespace Moses
+{
+
+class Sentence;
+class LMList;
+
+class TranslationOptionCollectionText : public TranslationOptionCollection {
+ public:
+ void ProcessUnknownWord( size_t sourcePos);
+
+ TranslationOptionCollectionText(Sentence const& inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
+
+ bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
+
+ void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
+
+
+};
+
+}
+
+#endif
diff --git a/moses/src/TranslationOptionList.cpp b/moses/src/TranslationOptionList.cpp
new file mode 100644
index 000000000..191c83cf4
--- /dev/null
+++ b/moses/src/TranslationOptionList.cpp
@@ -0,0 +1,26 @@
+
+#include "TranslationOptionList.h"
+#include "Util.h"
+#include "TranslationOption.h"
+
+namespace Moses
+{
+
+TranslationOptionList::TranslationOptionList(const TranslationOptionList &copy)
+{
+ const_iterator iter;
+ for (iter = copy.begin(); iter != copy.end(); ++iter)
+ {
+ const TranslationOption &origTransOpt = **iter;
+ TranslationOption *newTransOpt = new TranslationOption(origTransOpt);
+ Add(newTransOpt);
+ }
+}
+
+TranslationOptionList::~TranslationOptionList()
+{
+ RemoveAllInColl(m_coll);
+}
+
+}
+
diff --git a/moses/src/TranslationOptionList.h b/moses/src/TranslationOptionList.h
new file mode 100644
index 000000000..769118dc4
--- /dev/null
+++ b/moses/src/TranslationOptionList.h
@@ -0,0 +1,56 @@
+#ifndef moses_TranslationOptionList_h
+#define moses_TranslationOptionList_h
+
+#include <vector>
+#include <cassert>
+
+namespace Moses
+{
+
+class TranslationOption;
+
+class TranslationOptionList
+{
+protected:
+ typedef std::vector<TranslationOption*> CollType;
+ CollType m_coll;
+
+ public:
+ typedef CollType::iterator iterator;
+ typedef CollType::const_iterator const_iterator;
+ const_iterator begin() const { return m_coll.begin(); }
+ const_iterator end() const { return m_coll.end(); }
+ iterator begin() { return m_coll.begin(); }
+ iterator end() { return m_coll.end(); }
+
+ TranslationOptionList()
+ {
+ }
+ TranslationOptionList(const TranslationOptionList &copy);
+ ~TranslationOptionList();
+
+ void resize(size_t newSize)
+ { m_coll.resize(newSize); }
+ size_t size() const
+ { return m_coll.size(); }
+
+ const TranslationOption *Get(size_t ind) const
+ {
+ assert(ind < m_coll.size());
+ return m_coll[ind];
+ }
+ void Remove( size_t ind )
+ {
+ assert(ind < m_coll.size());
+ m_coll.erase( m_coll.begin()+ind );
+ }
+ void Add(TranslationOption *transOpt)
+ {
+ m_coll.push_back(transOpt);
+ }
+
+};
+
+}
+
+#endif
diff --git a/moses/src/TrellisPath.cpp b/moses/src/TrellisPath.cpp
new file mode 100644
index 000000000..93bf53a6e
--- /dev/null
+++ b/moses/src/TrellisPath.cpp
@@ -0,0 +1,231 @@
+// $Id: TrellisPath.cpp 2953 2010-03-07 07:57:48Z abarun $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "TrellisPath.h"
+#include "TrellisPathList.h"
+#include "TrellisPathCollection.h"
+#include "StaticData.h"
+
+using namespace std;
+
+namespace Moses
+{
+TrellisPath::TrellisPath(const Hypothesis *hypo)
+: m_prevEdgeChanged(NOT_FOUND)
+{
+ m_scoreBreakdown = hypo->GetScoreBreakdown();
+ m_totalScore = hypo->GetTotalScore();
+
+ // enumerate path using prevHypo
+ while (hypo != NULL)
+ {
+ m_path.push_back(hypo);
+ hypo = hypo->GetPrevHypo();
+ }
+}
+
+TrellisPath::TrellisPath(const TrellisPath &copy, size_t edgeIndex, const Hypothesis *arc)
+:m_prevEdgeChanged(edgeIndex)
+{
+ m_path.reserve(copy.m_path.size());
+ for (size_t currEdge = 0 ; currEdge < edgeIndex ; currEdge++)
+ { // copy path from parent
+ m_path.push_back(copy.m_path[currEdge]);
+ }
+
+ // 1 deviation
+ m_path.push_back(arc);
+
+ // rest of path comes from following best path backwards
+ const Hypothesis *prevHypo = arc->GetPrevHypo();
+ while (prevHypo != NULL)
+ {
+ m_path.push_back(prevHypo);
+ prevHypo = prevHypo->GetPrevHypo();
+ }
+
+ // Calc score
+ m_totalScore = m_path[0]->GetWinningHypo()->GetTotalScore();
+ m_scoreBreakdown= m_path[0]->GetWinningHypo()->GetScoreBreakdown();
+
+ size_t sizePath = m_path.size();
+ for (size_t pos = 0 ; pos < sizePath ; pos++)
+ {
+ const Hypothesis *hypo = m_path[pos];
+ const Hypothesis *winningHypo = hypo->GetWinningHypo();
+ if (hypo != winningHypo)
+ {
+ m_totalScore = m_totalScore - winningHypo->GetTotalScore() + hypo->GetTotalScore();
+ m_scoreBreakdown.MinusEquals(winningHypo->GetScoreBreakdown());
+ m_scoreBreakdown.PlusEquals(hypo->GetScoreBreakdown());
+ }
+ }
+}
+
+void TrellisPath::CreateDeviantPaths(TrellisPathCollection &pathColl) const
+{
+ const size_t sizePath = m_path.size();
+
+ if (m_prevEdgeChanged == NOT_FOUND)
+ { // initial enumration from a pure hypo
+ for (size_t currEdge = 0 ; currEdge < sizePath ; currEdge++)
+ {
+ const Hypothesis *hypo = static_cast<const Hypothesis*>(m_path[currEdge]);
+ const ArcList *pAL = hypo->GetArcList();
+ if (!pAL) continue;
+ const ArcList &arcList = *pAL;
+
+ // every possible Arc to replace this edge
+ ArcList::const_iterator iterArc;
+ for (iterArc = arcList.begin() ; iterArc != arcList.end() ; ++iterArc)
+ {
+ const Hypothesis *arc = *iterArc;
+ TrellisPath *deviantPath = new TrellisPath(*this, currEdge, arc);
+ pathColl.Add(deviantPath);
+ }
+ }
+ }
+ else
+ { // wiggle 1 of the edges only
+ for (size_t currEdge = m_prevEdgeChanged + 1 ; currEdge < sizePath ; currEdge++)
+ {
+ const ArcList *pAL = m_path[currEdge]->GetArcList();
+ if (!pAL) continue;
+ const ArcList &arcList = *pAL;
+ ArcList::const_iterator iterArc;
+
+ for (iterArc = arcList.begin() ; iterArc != arcList.end() ; ++iterArc)
+ { // copy this Path & change 1 edge
+ const Hypothesis *arcReplace = *iterArc;
+
+ TrellisPath *deviantPath = new TrellisPath(*this, currEdge, arcReplace);
+ pathColl.Add(deviantPath);
+ } // for (iterArc...
+ } // for (currEdge = 0 ...
+ }
+}
+
+void TrellisPath::CreateDeviantPaths(TrellisPathList &pathColl) const
+{
+ const size_t sizePath = m_path.size();
+
+ if (m_prevEdgeChanged == NOT_FOUND)
+ { // initial enumration from a pure hypo
+ for (size_t currEdge = 0 ; currEdge < sizePath ; currEdge++)
+ {
+ const Hypothesis *hypo = static_cast<const Hypothesis*>(m_path[currEdge]);
+ const ArcList *pAL = hypo->GetArcList();
+ if (!pAL) continue;
+ const ArcList &arcList = *pAL;
+
+ // every possible Arc to replace this edge
+ ArcList::const_iterator iterArc;
+ for (iterArc = arcList.begin() ; iterArc != arcList.end() ; ++iterArc)
+ {
+ const Hypothesis *arc = *iterArc;
+ TrellisPath *deviantPath = new TrellisPath(*this, currEdge, arc);
+ pathColl.Add(deviantPath);
+ }
+ }
+ }
+ else
+ { // wiggle 1 of the edges only
+ for (size_t currEdge = m_prevEdgeChanged + 1 ; currEdge < sizePath ; currEdge++)
+ {
+ const ArcList *pAL = m_path[currEdge]->GetArcList();
+ if (!pAL) continue;
+ const ArcList &arcList = *pAL;
+ ArcList::const_iterator iterArc;
+
+ for (iterArc = arcList.begin() ; iterArc != arcList.end() ; ++iterArc)
+ { // copy this Path & change 1 edge
+ const Hypothesis *arcReplace = *iterArc;
+
+ TrellisPath *deviantPath = new TrellisPath(*this, currEdge, arcReplace);
+ pathColl.Add(deviantPath);
+ } // for (iterArc...
+ } // for (currEdge = 0 ...
+ }
+}
+
+Phrase TrellisPath::GetTargetPhrase() const
+{
+ Phrase targetPhrase(Output);
+
+ int numHypo = (int) m_path.size();
+ for (int node = numHypo - 2 ; node >= 0 ; --node)
+ { // don't do the empty hypo - waste of time and decode step id is invalid
+ const Hypothesis &hypo = *m_path[node];
+ const Phrase &currTargetPhrase = hypo.GetCurrTargetPhrase();
+
+ targetPhrase.Append(currTargetPhrase);
+ }
+
+ return targetPhrase;
+}
+
+Phrase TrellisPath::GetSurfacePhrase() const
+{
+ const std::vector<FactorType> &outputFactor = StaticData::Instance().GetOutputFactorOrder();
+ Phrase targetPhrase = GetTargetPhrase()
+ ,ret(Output);
+
+ for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos)
+ {
+ Word &newWord = ret.AddWord();
+ for (size_t i = 0 ; i < outputFactor.size() ; i++)
+ {
+ FactorType factorType = outputFactor[i];
+ const Factor *factor = targetPhrase.GetFactor(pos, factorType);
+ assert(factor);
+ newWord[factorType] = factor;
+ }
+ }
+
+ return ret;
+}
+
+WordsRange TrellisPath::GetTargetWordsRange(const Hypothesis &hypo) const
+{
+ size_t startPos = 0;
+
+ for (int indEdge = (int) m_path.size() - 1 ; indEdge >= 0 ; --indEdge)
+ {
+ const Hypothesis *currHypo = m_path[indEdge];
+ size_t endPos = startPos + currHypo->GetCurrTargetLength() - 1;
+
+ if (currHypo == &hypo)
+ {
+ return WordsRange(startPos, endPos);
+ }
+ startPos = endPos + 1;
+ }
+
+ // have to give a hypo in the trellis path, but u didn't.
+ assert(false);
+ return WordsRange(NOT_FOUND, NOT_FOUND);
+}
+
+TO_STRING_BODY(TrellisPath);
+
+
+}
+
diff --git a/moses/src/TrellisPath.h b/moses/src/TrellisPath.h
new file mode 100644
index 000000000..ddde74113
--- /dev/null
+++ b/moses/src/TrellisPath.h
@@ -0,0 +1,116 @@
+// $Id: TrellisPath.h 2953 2010-03-07 07:57:48Z abarun $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TrellisPath_h
+#define moses_TrellisPath_h
+
+#include <iostream>
+#include <vector>
+#include <limits>
+#include "Hypothesis.h"
+#include "TypeDef.h"
+
+namespace Moses
+{
+
+class TrellisPathCollection;
+class TrellisPathList;
+
+/** Encapsulate the set of hypotheses/arcs that goes from decoding 1 phrase to all the source phrases
+ * to reach a final translation. For the best translation, this consist of all hypotheses, for the other
+ * n-best paths, the node on the path can consist of hypotheses or arcs
+ */
+class TrellisPath
+{
+ friend std::ostream& operator<<(std::ostream&, const TrellisPath&);
+
+protected:
+ std::vector<const Hypothesis *> m_path; //< list of hypotheses/arcs
+ size_t m_prevEdgeChanged; /**< the last node that was wiggled to create this path
+ , or NOT_FOUND if this path is the best trans so consist of only hypos
+ */
+
+ ScoreComponentCollection m_scoreBreakdown;
+ float m_totalScore;
+
+public:
+ TrellisPath(); // not implemented
+
+ //! create path OF pure hypo
+ TrellisPath(const Hypothesis *hypo);
+
+ /** create path from another path, deviate at edgeIndex by using arc instead,
+ * which may change other hypo back from there
+ */
+ TrellisPath(const TrellisPath &copy, size_t edgeIndex, const Hypothesis *arc);
+
+ //! get score for this path throught trellis
+ inline float GetTotalScore() const { return m_totalScore; }
+
+ /** list of each hypo/arcs in path. For anything other than the best hypo, it is not possible just to follow the
+ * m_prevHypo variable in the hypothesis object
+ */
+ inline const std::vector<const Hypothesis *> &GetEdges() const
+ {
+ return m_path;
+ }
+
+ //! create a set of next best paths by wiggling 1 of the node at a time.
+ void CreateDeviantPaths(TrellisPathCollection &pathColl) const;
+
+ //! create a list of next best paths by wiggling 1 of the node at a time.
+ void CreateDeviantPaths(TrellisPathList &pathColl) const;
+
+ inline const ScoreComponentCollection &GetScoreBreakdown() const
+ {
+ return m_scoreBreakdown;
+ }
+
+ //! get target words range of the hypo within n-best trellis. not necessarily the same as hypo.GetCurrTargetWordsRange()
+ WordsRange GetTargetWordsRange(const Hypothesis &hypo) const;
+
+ Phrase GetTargetPhrase() const;
+ Phrase GetSurfacePhrase() const;
+
+ TO_STRING();
+
+};
+
+// friend
+inline std::ostream& operator<<(std::ostream& out, const TrellisPath& path)
+{
+ const size_t sizePath = path.m_path.size();
+ for (int pos = (int) sizePath - 1 ; pos >= 0 ; pos--)
+ {
+ const Hypothesis *edge = path.m_path[pos];
+ const WordsRange &sourceRange = edge->GetCurrSourceWordsRange();
+ out << edge->GetId() << " " << sourceRange.GetStartPos() << "-" << sourceRange.GetEndPos() << ", ";
+ }
+ // scores
+ out << " total=" << path.GetTotalScore()
+ << " " << path.GetScoreBreakdown()
+ << std::endl;
+
+ return out;
+}
+
+}
+#endif
diff --git a/moses/src/TrellisPathCollection.cpp b/moses/src/TrellisPathCollection.cpp
new file mode 100644
index 000000000..cf8deb1d5
--- /dev/null
+++ b/moses/src/TrellisPathCollection.cpp
@@ -0,0 +1,34 @@
+#include "TrellisPathCollection.h"
+
+namespace Moses
+{
+
+void TrellisPathCollection::Prune(size_t newSize)
+{
+ size_t currSize = m_collection.size();
+
+ if (currSize <= newSize)
+ return; // don't need to prune
+
+ CollectionType::reverse_iterator iterRev;
+ for (iterRev = m_collection.rbegin() ; iterRev != m_collection.rend() ; ++iterRev)
+ {
+ TrellisPath *trellisPath = *iterRev;
+ delete trellisPath;
+
+ currSize--;
+ if (currSize == newSize)
+ break;
+ }
+
+ // delete path in m_collection
+ CollectionType::iterator iter = m_collection.begin();
+ for (size_t i = 0 ; i < newSize ; ++i)
+ iter++;
+
+ m_collection.erase(iter, m_collection.end());
+}
+
+}
+
+
diff --git a/moses/src/TrellisPathCollection.h b/moses/src/TrellisPathCollection.h
new file mode 100644
index 000000000..5b3d79ff6
--- /dev/null
+++ b/moses/src/TrellisPathCollection.h
@@ -0,0 +1,96 @@
+// $Id: TrellisPathCollection.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TrellisPathCollection_h
+#define moses_TrellisPathCollection_h
+
+#include <set>
+#include <iostream>
+#include "TrellisPath.h"
+
+namespace Moses
+{
+
+struct CompareTrellisPathCollection
+{
+ bool operator()(const TrellisPath* pathA, const TrellisPath* pathB) const
+ {
+ return (pathA->GetTotalScore() > pathB->GetTotalScore());
+ }
+};
+
+/** priority queue used in Manager to store list of contenders for N-Best list.
+ * Stored in order of total score so that the best path can just be popped from the top
+ */
+class TrellisPathCollection
+{
+ friend std::ostream& operator<<(std::ostream&, const TrellisPathCollection&);
+
+protected:
+ typedef std::multiset<TrellisPath*, CompareTrellisPathCollection> CollectionType;
+ CollectionType m_collection;
+
+public:
+ //iterator begin() { return m_collection.begin(); }
+ TrellisPath *pop()
+ {
+ TrellisPath *top = *m_collection.begin();
+
+ // Detach
+ m_collection.erase(m_collection.begin());
+ return top;
+ }
+
+ ~TrellisPathCollection()
+ {
+ // clean up
+ RemoveAllInColl(m_collection);
+ }
+
+ //! add a new entry into collection
+ void Add(TrellisPath *trellisPath)
+ {
+ m_collection.insert(trellisPath);
+ }
+
+ size_t GetSize() const
+ {
+ return m_collection.size();
+ }
+
+ void Prune(size_t newSize);
+};
+
+inline std::ostream& operator<<(std::ostream& out, const TrellisPathCollection& pathColl)
+{
+ TrellisPathCollection::CollectionType::const_iterator iter;
+
+ for (iter = pathColl.m_collection.begin() ; iter != pathColl.m_collection.end() ; ++iter)
+ {
+ const TrellisPath &path = **iter;
+ out << path << std::endl;
+ }
+ return out;
+}
+
+}
+
+#endif
diff --git a/moses/src/TrellisPathList.h b/moses/src/TrellisPathList.h
new file mode 100644
index 000000000..751bb4e58
--- /dev/null
+++ b/moses/src/TrellisPathList.h
@@ -0,0 +1,76 @@
+// $Id: TrellisPathList.h 2953 2010-03-07 07:57:48Z abarun $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TrellisPathList_h
+#define moses_TrellisPathList_h
+
+#include <list>
+#include <set>
+#include "TrellisPath.h"
+
+namespace Moses
+{
+
+/** used to return n-best list of Trellis Paths from the Manager to the caller */
+class TrellisPathList
+{
+protected:
+ std::list<const TrellisPath*> m_collection;
+public:
+ // iters
+ typedef std::list<const TrellisPath*>::iterator iterator;
+ typedef std::list<const TrellisPath*>::const_iterator const_iterator;
+
+ iterator begin() { return m_collection.begin(); }
+ iterator end() { return m_collection.end(); }
+ const_iterator begin() const { return m_collection.begin(); }
+ const_iterator end() const { return m_collection.end(); }
+
+ ~TrellisPathList()
+ {
+ // clean up
+ RemoveAllInColl(m_collection);
+ }
+
+ //! add a new entry into collection
+ void Add(TrellisPath *trellisPath)
+ {
+ m_collection.push_back(trellisPath);
+ }
+
+ const TrellisPath *pop()
+ {
+ const TrellisPath *top = m_collection.front();
+
+ // Detach
+ m_collection.pop_front();
+ return top;
+ }
+
+ size_t GetSize() const
+ {
+ return m_collection.size();
+ }
+};
+
+}
+
+#endif
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
new file mode 100644
index 000000000..8070fa65c
--- /dev/null
+++ b/moses/src/TypeDef.h
@@ -0,0 +1,194 @@
+// $Id: TypeDef.h 2938 2010-02-24 10:37:49Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TypeDef_h
+#define moses_TypeDef_h
+
+#include <list>
+#include <limits>
+#include <vector>
+#include <string>
+
+#ifdef WIN32
+#include <BaseTsd.h>
+#else
+#include <stdint.h>
+typedef uint32_t UINT32;
+#endif
+
+namespace Moses
+{
+
+#define PROJECT_NAME "moses"
+
+#ifndef BOS_
+#define BOS_ "<s>" //Beginning of sentence symbol
+#endif
+#ifndef EOS_
+#define EOS_ "</s>" //End of sentence symbol
+#endif
+
+#define UNKNOWN_FACTOR "UNK"
+#define EPSILON "*EPS*"
+
+#define NOT_FOUND std::numeric_limits<size_t>::max()
+#define MAX_NGRAM_SIZE 20
+
+const size_t DEFAULT_CUBE_PRUNING_POP_LIMIT = 1000;
+const size_t DEFAULT_CUBE_PRUNING_DIVERSITY = 0;
+const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
+const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
+const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 50;
+const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
+const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
+const size_t ARRAY_SIZE_INCR = 10; //amount by which a phrase gets resized when necessary
+const float LOWEST_SCORE = -100.0f;
+const float DEFAULT_BEAM_WIDTH = 0.00001f;
+const float DEFAULT_EARLY_DISCARDING_THRESHOLD = 0.0f;
+const float DEFAULT_TRANSLATION_OPTION_THRESHOLD = 0.0f;
+const size_t DEFAULT_VERBOSE_LEVEL = 1;
+
+/////////////////////////////////////////////////
+// for those using autoconf/automake
+#if HAVE_CONFIG_H
+#include "config.h"
+
+//#define TRACE_ENABLE 1 // REMOVE after we figure this out
+
+#define LM_INTERNAL 1
+#define LM_REMOTE 1
+
+# ifdef HAVE_SRILM
+# define LM_SRI 1
+# else
+# undef LM_SRI
+# endif
+
+# ifdef HAVE_IRSTLM
+# define LM_IRST 1
+# endif
+
+# ifdef HAVE_RANDLM
+# define LM_RAND 1
+# endif
+
+#endif
+/////////////////////////////////////////////////
+
+// enums.
+// must be 0, 1, 2, ..., unless otherwise stated
+
+// can only be 2 at the moment
+const int NUM_LANGUAGES = 2;
+
+const size_t MAX_NUM_FACTORS = 4;
+
+enum FactorDirection
+{
+ Input, //! Source factors
+ Output //! Target factors
+};
+
+enum DecodeType
+{
+ Translate
+ ,Generate
+ ,InsertNullFertilityWord //! an optional step that attempts to insert a few closed-class words to improve LM scores
+};
+
+namespace LexReorderType
+{
+ enum LexReorderType //TODO explain values
+ {
+ Backward
+ ,Forward
+ ,Bidirectional
+ ,Fe
+ ,F
+ };
+}
+
+namespace DistortionOrientationType
+{
+ enum DistortionOrientationOptions
+ {
+ Monotone, //distinguish only between monotone and non-monotone as possible orientations
+ Msd //further separate non-monotone into swapped and discontinuous
+ };
+}
+
+enum LMType
+{
+ SingleFactor
+ ,MultiFactor
+};
+enum LMImplementation
+{
+ SRI = 0
+ ,IRST = 1
+ ,Skip = 2
+ ,Joint = 3
+ ,Internal = 4
+ ,RandLM = 5
+ ,Remote = 6
+
+};
+
+
+enum InputTypeEnum
+{
+ SentenceInput = 0
+ ,ConfusionNetworkInput = 1
+ ,WordLatticeInput = 2
+};
+
+enum XmlInputType
+{
+ XmlPassThrough = 0,
+ XmlIgnore = 1,
+ XmlExclusive = 2,
+ XmlInclusive = 3
+};
+
+enum DictionaryFind
+{
+ Best = 0
+ ,All = 1
+};
+
+enum SearchAlgorithm
+{
+ Normal = 0
+ ,CubePruning = 1
+ ,CubeGrowing = 2
+};
+
+// typedef
+typedef size_t FactorType;
+
+typedef std::vector<float> Scores;
+typedef std::vector<std::string> WordAlignments;
+
+typedef std::pair<std::vector<std::string const*>,Scores > StringTgtCand;
+typedef std::pair<std::vector<std::string const*>,WordAlignments > StringWordAlignmentCand;
+
+}
+#endif
diff --git a/moses/src/UniqueObject.h b/moses/src/UniqueObject.h
new file mode 100644
index 000000000..a38d425a8
--- /dev/null
+++ b/moses/src/UniqueObject.h
@@ -0,0 +1,55 @@
+/* ---------------------------------------------------------------- */
+/* Copyright 2004 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
+/* Richard Zens */
+/* ---------------------------------------------------------------- */
+#ifndef moses_UniqueObject_h
+#define moses_UniqueObject_h
+
+#include <iostream>
+#include <set>
+
+template<class T> T const* uniqueObject(const T& x,int mode=0) {
+ typedef std::set<T> Pool;
+
+ static Pool pool;
+ static size_t Size=0;
+
+ if(mode==0) {
+ std::pair<typename Pool::iterator,bool> p=pool.insert(x);
+ if(p.second && (++Size%100000==0))
+ std::cerr<<"uniqueObjects -- size: "<<Size<<" object size: "<<sizeof(T)<<"\n";
+
+ return &(*(p.first));
+ }
+ else {
+ pool.clear();Size=0;return 0;
+ }
+}
+
+template<class T> class UniqueObjectManager {
+public:
+ typedef T Object;
+private:
+ typedef std::set<T> Pool;
+ Pool pool;
+public:
+ UniqueObjectManager() {}
+
+ void clear() {pool.clear();}
+ size_t size() const {return pool.size();}
+
+ Object const * operator()(const Object& x) {
+#ifdef DEBUG
+ std::pair<typename Pool::iterator,bool> p=pool.insert(x);
+ if(p.second && (size()%100000==0))
+ std::cerr<<"uniqueObjects -- size: "<<size()<<" object size: "<<sizeof(Object)<<"\n";
+ return &(*(p.first));
+#else
+ return &(*(pool.insert(x).first));
+#endif
+ }
+};
+
+
+
+#endif
diff --git a/moses/src/UserMessage.cpp b/moses/src/UserMessage.cpp
new file mode 100644
index 000000000..b4cc66f8d
--- /dev/null
+++ b/moses/src/UserMessage.cpp
@@ -0,0 +1,65 @@
+// $Id: UserMessage.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <sstream>
+#include <iostream>
+#include "UserMessage.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+const size_t MAX_MSG_QUEUE = 5;
+
+bool UserMessage::m_toStderr = true;
+bool UserMessage::m_toQueue = false;
+queue<string> UserMessage::m_msgQueue;
+
+void UserMessage::Add(const string &msg)
+{
+ if (m_toStderr)
+ {
+ cerr << "ERROR:" << msg << endl;
+ }
+ if (m_toQueue)
+ {
+ if (m_msgQueue.size() >= MAX_MSG_QUEUE)
+ m_msgQueue.pop();
+ m_msgQueue.push(msg);
+ }
+}
+
+string UserMessage::GetQueue()
+{
+ stringstream strme("");
+ while (!m_msgQueue.empty())
+ {
+ strme << m_msgQueue.front() << endl;
+ m_msgQueue.pop();
+ }
+ return strme.str();
+}
+
+}
+
+
+
diff --git a/moses/src/UserMessage.h b/moses/src/UserMessage.h
new file mode 100644
index 000000000..0930e92fc
--- /dev/null
+++ b/moses/src/UserMessage.h
@@ -0,0 +1,55 @@
+// $Id: UserMessage.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_UserMessage_h
+#define moses_UserMessage_h
+
+#include <string>
+#include <queue>
+
+namespace Moses
+{
+
+/** User warnings/error messages.
+ * Not the same as tracing messages, this should be usable even if Moses front-end if GUI
+*/
+class UserMessage
+{
+protected:
+ static bool m_toStderr, m_toQueue;
+ static std::queue<std::string> m_msgQueue;
+
+public:
+ //! whether messages to go to stderr, a queue to later display, or both
+ static void SetOutput(bool toStderr, bool toQueue)
+ {
+ m_toStderr = toStderr;
+ m_toQueue = toQueue;
+ }
+ //! add a message to be displayed
+ static void Add(const std::string &msg);
+ //! get all messages in queue. Each is on a separate line. Clear queue afterwards
+ static std::string GetQueue();
+};
+
+}
+
+#endif
diff --git a/moses/src/Util.cpp b/moses/src/Util.cpp
new file mode 100644
index 000000000..26ea2e750
--- /dev/null
+++ b/moses/src/Util.cpp
@@ -0,0 +1,184 @@
+// $Id: Util.cpp 2399 2009-07-23 10:29:30Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <sys/times.h>
+#include <sys/resource.h>
+#endif
+
+#include <cstring>
+#include <cctype>
+#include <algorithm>
+#include <stdio.h>
+#include <iostream>
+#include <iomanip>
+#include "TypeDef.h"
+#include "Util.h"
+#include "Timer.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+//global variable
+Timer g_timer;
+
+string GetTempFolder()
+{
+#ifdef _WIN32
+ char *tmpPath = getenv("TMP");
+ string str(tmpPath);
+ if (str.substr(str.size() - 1, 1) != "\\")
+ str += "\\";
+ return str;
+#else
+ return "/tmp/";
+#endif
+}
+
+void CreateTempFile(ofstream &fileStream, string &filePath)
+{
+#ifdef _WIN32
+ char buffer[BUFSIZ];
+ ::GetTempFileNameA(GetTempFolder().c_str(), "", 0, buffer);
+ filePath = buffer;
+#else
+ char buffer[L_tmpnam];
+ strcpy(buffer, GetTempFolder().c_str());
+ strcat(buffer, PROJECT_NAME);
+ strcat(buffer, "--XXXXXX");
+ mkstemp(buffer);
+ filePath = buffer;
+#endif
+ fileStream.open(filePath.c_str(), ofstream::out | ofstream::app);
+}
+
+
+const std::string ToLower(const std::string& str)
+{
+ std::string lc(str);
+ std::transform(lc.begin(), lc.end(), lc.begin(), (int(*)(int))std::tolower);
+ return lc;
+}
+
+template<>
+bool Scan<bool>(const std::string &input)
+{
+ std::string lc = ToLower(input);
+ if (lc == "yes" || lc == "y" || lc == "true" || lc == "1")
+ return true;
+ if (lc == "no" || lc == "n" || lc =="false" || lc == "0")
+ return false;
+ TRACE_ERR( "Scan<bool>: didn't understand '" << lc << "', returning false" << std::endl);
+ return false;
+}
+
+bool FileExists(const std::string& filePath)
+{
+ ifstream ifs(filePath.c_str());
+ return !ifs.fail();
+}
+
+const std::string Trim(const std::string& str, const std::string dropChars)
+{
+ std::string res = str;
+ res.erase(str.find_last_not_of(dropChars)+1);
+ return res.erase(0, res.find_first_not_of(dropChars));
+}
+
+void ResetUserTime()
+{
+ g_timer.start();
+};
+
+void PrintUserTime(const std::string &message)
+{
+ g_timer.check(message.c_str());
+}
+
+double GetUserTime()
+{
+ return g_timer.get_elapsed_time();
+}
+
+std::map<std::string, std::string> ProcessAndStripSGML(std::string &line)
+{
+ std::map<std::string, std::string> meta;
+ std::string lline = ToLower(line);
+ if (lline.find("<seg")!=0) return meta;
+ size_t close = lline.find(">");
+ if (close == std::string::npos) return meta; // error
+ size_t end = lline.find("</seg>");
+ std::string seg = Trim(lline.substr(4, close-4));
+ std::string text = line.substr(close+1, end - close - 1);
+ for (size_t i = 1; i < seg.size(); i++) {
+ if (seg[i] == '=' && seg[i-1] == ' ') {
+ std::string less = seg.substr(0, i-1) + seg.substr(i);
+ seg = less; i = 0; continue;
+ }
+ if (seg[i] == '=' && seg[i+1] == ' ') {
+ std::string less = seg.substr(0, i+1);
+ if (i+2 < seg.size()) less += seg.substr(i+2);
+ seg = less; i = 0; continue;
+ }
+ }
+ line = Trim(text);
+ if (seg == "") return meta;
+ for (size_t i = 1; i < seg.size(); i++) {
+ if (seg[i] == '=') {
+ std::string label = seg.substr(0, i);
+ std::string val = seg.substr(i+1);
+ if (val[0] == '"') {
+ val = val.substr(1);
+ size_t close = val.find('"');
+ if (close == std::string::npos) {
+ TRACE_ERR("SGML parse error: missing \"\n");
+ seg = "";
+ i = 0;
+ } else {
+ seg = val.substr(close+1);
+ val = val.substr(0, close);
+ i = 0;
+ }
+ } else {
+ size_t close = val.find(' ');
+ if (close == std::string::npos) {
+ seg = "";
+ i = 0;
+ } else {
+ seg = val.substr(close+1);
+ val = val.substr(0, close);
+ }
+ }
+ label = Trim(label);
+ seg = Trim(seg);
+ meta[label] = val;
+ }
+ }
+ return meta;
+}
+
+}
+
+
diff --git a/moses/src/Util.h b/moses/src/Util.h
new file mode 100644
index 000000000..487352d27
--- /dev/null
+++ b/moses/src/Util.h
@@ -0,0 +1,300 @@
+// $Id: Util.h 2938 2010-02-24 10:37:49Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_Util_h
+#define moses_Util_h
+
+#include <iostream>
+#include <cassert>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <cmath>
+#include <limits>
+#include <map>
+#include "TypeDef.h"
+#include <cstdlib>
+#include <cstring>
+
+namespace Moses
+{
+
+/** Outputting debugging/verbose information to stderr.
+ * Use TRACE_ENABLE flag to redirect tracing output into oblivion
+ * so that you can output your own ad-hoc debugging info.
+ * However, if you use stderr diretly, please delete calls to it once
+ * you finished debugging so that it won't clutter up.
+ * Also use TRACE_ENABLE to turn off output of any debugging info
+ * when compiling for a gui front-end so that running gui won't generate
+ * output on command line
+ * */
+#ifdef TRACE_ENABLE
+#define TRACE_ERR(str) do { std::cerr << str; } while (false)
+#else
+#define TRACE_ERR(str) do {} while (false)
+#endif
+
+/** verbose macros
+ * */
+#define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }
+#define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
+
+//! get string representation of any object/variable, as long as it can pipe to a stream
+template<typename T>
+inline std::string SPrint(const T &input)
+{
+ std::stringstream stream("");
+ stream << input;
+ return stream.str();
+}
+
+//! convert string to variable of type T. Used to reading floats, int etc from files
+template<typename T>
+inline T Scan(const std::string &input)
+{
+ std::stringstream stream(input);
+ T ret;
+ stream >> ret;
+ return ret;
+}
+
+//! just return input
+template<>
+inline std::string Scan<std::string>(const std::string &input)
+{
+ return input;
+}
+
+//! Specialisation to understand yes/no y/n true/false 0/1
+template<>
+bool Scan<bool>(const std::string &input);
+
+//! convert vectors of string to vectors of type T variables
+template<typename T>
+inline std::vector<T> Scan(const std::vector< std::string > &input)
+{
+ std::vector<T> output(input.size());
+ for (size_t i = 0 ; i < input.size() ; i++)
+ {
+ output[i] = Scan<T>( input[i] );
+ }
+ return output;
+}
+
+/** replace all occurrences of todelStr in str with the string toaddStr */
+inline std::string Replace(const std::string& str,
+ const std::string& todelStr,
+ const std::string& toaddStr)
+{
+ size_t pos=0;
+ std::string newStr=str;
+ while ((pos=newStr.find(todelStr,pos))!=std::string::npos){ newStr.replace(pos++,todelStr.size(),toaddStr); }
+ return newStr;
+}
+
+/** tokenise input string to vector of string. each element has been separated by a character in the delimiters argument.
+ The separator can only be 1 character long. The default delimiters are space or tab
+*/
+inline std::vector<std::string> Tokenize(const std::string& str,
+ const std::string& delimiters = " \t")
+{
+ std::vector<std::string> tokens;
+ // Skip delimiters at beginning.
+ std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
+ // Find first "non-delimiter".
+ std::string::size_type pos = str.find_first_of(delimiters, lastPos);
+
+ while (std::string::npos != pos || std::string::npos != lastPos)
+ {
+ // Found a token, add it to the vector.
+ tokens.push_back(str.substr(lastPos, pos - lastPos));
+ // Skip delimiters. Note the "not_of"
+ lastPos = str.find_first_not_of(delimiters, pos);
+ // Find next "non-delimiter"
+ pos = str.find_first_of(delimiters, lastPos);
+ }
+
+ return tokens;
+}
+
+//! tokenise input string to vector of type T
+template<typename T>
+inline std::vector<T> Tokenize( const std::string &input
+ , const std::string& delimiters = " \t")
+{
+ std::vector<std::string> stringVector = Tokenize(input, delimiters);
+ return Scan<T>( stringVector );
+}
+
+inline std::vector<std::string> TokenizeMultiCharSeparator(
+ const std::string& str,
+ const std::string& separator)
+{
+ std::vector<std::string> tokens;
+
+ size_t pos = 0;
+ // Find first "non-delimiter".
+ std::string::size_type nextPos = str.find(separator, pos);
+
+ while (nextPos != std::string::npos)
+ {
+ // Found a token, add it to the vector.
+ tokens.push_back(str.substr(pos, nextPos - pos));
+ // Skip delimiters. Note the "not_of"
+ pos = nextPos + separator.size();
+ // Find next "non-delimiter"
+ nextPos = str.find(separator, pos);
+ }
+ tokens.push_back(str.substr(pos, nextPos - pos));
+
+ return tokens;
+}
+
+/**
+ * Convert vector of type T to string
+ */
+template <typename T>
+std::string Join(const std::string& delimiter, const std::vector<T>& items)
+{
+ std::ostringstream outstr;
+ if(items.size() == 0) return "";
+ outstr << items[0];
+ for(unsigned int i = 1; i < items.size(); i++)
+ outstr << delimiter << items[i];
+ return outstr.str();
+}
+
+ //! transform prob to natural log score
+inline float TransformScore(float prob)
+{
+ return log(prob);
+}
+
+//! transform natural log score to prob. Not currently used
+inline float UntransformScore(float score)
+{
+ return exp(score);
+}
+
+//! irst number are in log 10, transform to natural log
+inline float TransformIRSTScore(float irstScore)
+{
+ return irstScore * 2.30258509299405f;
+}
+
+inline float UntransformIRSTScore(float logNScore)
+{ // opposite of above
+ return logNScore / 2.30258509299405f;
+}
+
+//! make sure score doesn't fall below LOWEST_SCORE
+inline float FloorScore(float logScore)
+{
+ return (std::max)(logScore , LOWEST_SCORE);
+}
+
+//! Should SRI & IRST transform functions be merged ???
+inline float TransformSRIScore(float sriScore)
+{
+ return sriScore * 2.30258509299405f;
+}
+
+inline float UntransformSRIScore(float logNScore)
+{ // opposite of above
+ return logNScore / 2.30258509299405f;
+}
+
+/** convert prob vector to log prob and calc inner product with weight vector.
+ * At least, that's what I think it does, fn is only 9 lines but can't figure out what it does.
+ * Not sure whether give zens a medal for being a genius, or shoot him for writing unreadable code. Mabe both...
+ */
+inline float CalcTranslationScore(const std::vector<float> &probVector,
+ const std::vector<float> &weightT)
+{
+ assert(weightT.size()==probVector.size());
+ float rv=0.0;
+ for(float const *sb=&probVector[0],*se=sb+probVector.size(),*wb=&weightT[0];
+ sb!=se; ++sb, ++wb)
+ rv += TransformScore(*sb) * (*wb);
+ return rv;
+}
+
+/** declaration of ToString() function to go in header for each class.
+ * This function, as well as the operator<< fn for each class, is
+ * for debugging purposes only. The output format is likely to change from
+ * time-to-time as classes are updated so shouldn't be relied upon
+ * for any decoding algorithm
+*/
+#define TO_STRING() std::string ToString() const;
+
+//! definition of ToString() function to go in .cpp file. Can be used for any class that can be piped to a stream
+#define TO_STRING_BODY(CLASS) \
+ std::string CLASS::ToString() const \
+ { \
+ std::stringstream out; \
+ out << *this; \
+ return out.str(); \
+ } \
+
+//! delete and remove every element of a collection object such as map, set, list etc
+template<class COLL>
+void RemoveAllInColl(COLL &coll)
+{
+ for (typename COLL::const_iterator iter = coll.begin() ; iter != coll.end() ; ++iter)
+ {
+ delete (*iter);
+ }
+ coll.clear();
+}
+
+//! x-platform reference to temp folder
+std::string GetTempFolder();
+//! Create temp file and return output stream and full file path as arguments
+void CreateTempFile(std::ofstream &fileStream, std::string &filePath);
+//! MD5 hash of a file
+std::string GetMD5Hash(const std::string &filePath);
+
+//! save memory by getting rid of spare, unused elements in a collection
+template<typename T>
+inline void ShrinkToFit(T& v)
+{
+ if(v.capacity()>v.size())
+ T(v).swap(v);
+ assert(v.capacity()==v.size());
+}
+
+bool FileExists(const std::string& filePath);
+//! delete white spaces at beginning and end of string
+const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r");
+const std::string ToLower(const std::string& str);
+
+// A couple of utilities to measure decoding time
+void ResetUserTime();
+void PrintUserTime(const std::string &message);
+double GetUserTime();
+
+// dump SGML parser for <seg> tags
+std::map<std::string, std::string> ProcessAndStripSGML(std::string &line);
+
+}
+
+#endif
diff --git a/moses/src/Word.cpp b/moses/src/Word.cpp
new file mode 100644
index 000000000..cb5621ef2
--- /dev/null
+++ b/moses/src/Word.cpp
@@ -0,0 +1,109 @@
+// $Id: Word.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+// vim::tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <sstream>
+#include "memory.h"
+#include "Word.h"
+#include "TypeDef.h"
+#include "StaticData.h" // needed to determine the FactorDelimiter
+
+using namespace std;
+
+namespace Moses
+{
+
+// static
+int Word::Compare(const Word &targetWord, const Word &sourceWord)
+{
+ for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++)
+ {
+ const Factor *targetFactor = targetWord[factorType]
+ ,*sourceFactor = sourceWord[factorType];
+
+ if (targetFactor == NULL || sourceFactor == NULL)
+ continue;
+ if (targetFactor == sourceFactor)
+ continue;
+
+ return (targetFactor<sourceFactor) ? -1 : +1;
+ }
+ return 0;
+
+}
+
+void Word::Merge(const Word &sourceWord)
+{
+ for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+ {
+ const Factor *sourcefactor = sourceWord.m_factorArray[currFactor]
+ ,*targetFactor = this ->m_factorArray[currFactor];
+ if (targetFactor == NULL && sourcefactor != NULL)
+ {
+ m_factorArray[currFactor] = sourcefactor;
+ }
+ }
+}
+
+std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlank) const
+{
+ stringstream strme;
+ assert(factorType.size() <= MAX_NUM_FACTORS);
+ const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+ bool firstPass = true;
+ for (unsigned int i = 0 ; i < factorType.size() ; i++)
+ {
+ const Factor *factor = m_factorArray[factorType[i]];
+ if (factor != NULL)
+ {
+ if (firstPass) { firstPass = false; } else { strme << factorDelimiter; }
+ strme << factor->GetString();
+ }
+ }
+ if(endWithBlank) strme << " ";
+ return strme.str();
+}
+
+TO_STRING_BODY(Word);
+
+// friend
+ostream& operator<<(ostream& out, const Word& word)
+{
+ stringstream strme;
+
+ const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+ bool firstPass = true;
+ for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+ {
+ FactorType factorType = static_cast<FactorType>(currFactor);
+ const Factor *factor = word.GetFactor(factorType);
+ if (factor != NULL)
+ {
+ if (firstPass) { firstPass = false; } else { strme << factorDelimiter; }
+ strme << *factor;
+ }
+ }
+ out << strme.str() << " ";
+ return out;
+}
+
+}
+
diff --git a/moses/src/Word.h b/moses/src/Word.h
new file mode 100644
index 000000000..1e238785d
--- /dev/null
+++ b/moses/src/Word.h
@@ -0,0 +1,123 @@
+// $Id: Word.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_Word_h
+#define moses_Word_h
+
+#include <cstring>
+#include <iostream>
+#include <vector>
+#include <list>
+#include "TypeDef.h"
+#include "Factor.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+class Phrase;
+
+/***
+ * hold a set of factors for a single word
+ */
+class Word
+{
+ friend std::ostream& operator<<(std::ostream&, const Word&);
+
+protected:
+
+ typedef const Factor * FactorArray[MAX_NUM_FACTORS];
+
+ FactorArray m_factorArray; /**< set of factors */
+
+public:
+ /** deep copy */
+ Word(const Word &copy) {
+ std::memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray));
+ }
+
+ /** empty word */
+ Word() {
+ std::memset(m_factorArray, 0, sizeof(FactorArray));
+ }
+
+ ~Word() {}
+
+ //! returns Factor pointer for particular FactorType
+ const Factor*& operator[](FactorType index) {
+ return m_factorArray[index];
+ }
+
+ const Factor * const & operator[](FactorType index) const {
+ return m_factorArray[index];
+ }
+
+ //! Deprecated. should use operator[]
+ inline const Factor* GetFactor(FactorType factorType) const {
+ return m_factorArray[factorType];
+ }
+ inline void SetFactor(FactorType factorType, const Factor *factor)
+ {
+ m_factorArray[factorType] = factor;
+ }
+
+ /** add the factors from sourceWord into this representation,
+ * NULL elements in sourceWord will be skipped */
+ void Merge(const Word &sourceWord);
+
+ /** get string representation of list of factors. Used by PDTimp so supposed
+ * to be invariant to changes in format of debuggin output, therefore, doesn't
+ * use streaming output or ToString() from any class so not dependant on
+ * these debugging functions.
+ */
+ std::string GetString(const std::vector<FactorType> factorType,bool endWithBlank) const;
+ TO_STRING();
+
+ //! transitive comparison of Word objects
+ inline bool operator< (const Word &compare) const
+ { // needed to store word in GenerationDictionary map
+ // uses comparison of FactorKey
+ // 'proper' comparison, not address/id comparison
+ return Compare(*this, compare) < 0;
+ }
+
+ /* static functions */
+
+ /** transitive comparison of 2 word objects. Used by operator<.
+ * Only compare the co-joined factors, ie. where factor exists for both words.
+ * Should make it non-static
+ */
+ static int Compare(const Word &targetWord, const Word &sourceWord);
+
+};
+
+struct WordComparer
+{
+ //! returns true if hypoA can be recombined with hypoB
+ bool operator()(const Word *a, const Word *b) const
+ {
+ return *a < *b;
+ }
+};
+
+}
+
+#endif
diff --git a/moses/src/WordLattice.cpp b/moses/src/WordLattice.cpp
new file mode 100644
index 000000000..396fdcb7c
--- /dev/null
+++ b/moses/src/WordLattice.cpp
@@ -0,0 +1,169 @@
+#include "StaticData.h"
+#include "WordLattice.h"
+#include "PCNTools.h"
+#include "Util.h"
+#include "FloydWarshall.h"
+
+namespace Moses
+{
+WordLattice::WordLattice() {}
+
+size_t WordLattice::GetColumnIncrement(size_t i, size_t j) const
+{
+ return next_nodes[i][j];
+}
+
+void WordLattice::Print(std::ostream& out) const {
+ out<<"word lattice: "<<data.size()<<"\n";
+ for(size_t i=0;i<data.size();++i) {
+ out<<i<<" -- ";
+ for(size_t j=0;j<data[i].size();++j) {
+ out<<"("<<data[i][j].first.ToString()<<", ";
+ for(std::vector<float>::const_iterator scoreIterator = data[i][j].second.begin();scoreIterator<data[i][j].second.end();scoreIterator++) {
+ out<<*scoreIterator<<", ";
+ }
+ out << GetColumnIncrement(i,j) << ") ";
+ }
+
+ out<<"\n";
+ }
+ out<<"\n\n";
+}
+
+int WordLattice::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
+{
+ Clear();
+ std::string line;
+ if(!getline(in,line)) return 0;
+ std::map<std::string, std::string> meta=ProcessAndStripSGML(line);
+ if (meta.find("id") != meta.end()) { this->SetTranslationId(atol(meta["id"].c_str())); }
+ size_t numLinkParams = StaticData::Instance().GetNumLinkParams();
+ size_t numLinkWeights = StaticData::Instance().GetNumInputScores();
+
+ //when we have one more weight than params, we add a word count feature
+ bool addRealWordCount = ((numLinkParams + 1) == numLinkWeights);
+
+ PCN::CN cn = PCN::parsePCN(line);
+ data.resize(cn.size());
+ next_nodes.resize(cn.size());
+ for(size_t i=0;i<cn.size();++i) {
+ PCN::CNCol& col = cn[i];
+ if (col.empty()) return false;
+ data[i].resize(col.size());
+ next_nodes[i].resize(col.size());
+ for (size_t j=0;j<col.size();++j) {
+ PCN::CNAlt& alt = col[j];
+
+
+ //check for correct number of link parameters
+ if (alt.first.second.size() != numLinkParams) {
+ TRACE_ERR("ERROR: need " << numLinkParams << " link parameters, found " << alt.first.second.size() << " while reading column " << i << " from " << line << "\n");
+ return false;
+ }
+
+ //check each element for bounds
+ std::vector<float>::iterator probsIterator;
+ data[i][j].second = std::vector<float>(0);
+ for(probsIterator = alt.first.second.begin(); probsIterator < alt.first.second.end(); probsIterator++) {
+ if (*probsIterator < 0.0f) {
+ TRACE_ERR("WARN: neg probability: " << *probsIterator << "\n");
+ //*probsIterator = 0.0f;
+ }
+ if (*probsIterator > 1.0f) {
+ TRACE_ERR("WARN: probability > 1: " << *probsIterator << "\n");
+ //*probsIterator = 1.0f;
+ }
+ data[i][j].second.push_back(std::max(static_cast<float>(log(*probsIterator)), LOWEST_SCORE));
+ }
+ //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
+ if (addRealWordCount) {
+ //only add count if not epsilon
+ float value = (alt.first.first=="" || alt.first.first==EPSILON) ? 0.0f : -1.0f;
+ data[i][j].second.push_back(value);
+ }
+ String2Word(alt.first.first,data[i][j].first,factorOrder);
+ next_nodes[i][j] = alt.second;
+ }
+ }
+ if (!cn.empty()) {
+ std::vector<std::vector<bool> > edges(0);
+ this->GetAsEdgeMatrix(edges);
+ floyd_warshall(edges,distances);
+
+ IFVERBOSE(2) {
+ TRACE_ERR("Shortest paths:\n");
+ for (size_t i=0; i<edges.size(); ++i) {
+ for (size_t j=0; j<edges.size(); ++j) {
+ int d = distances[i][j];
+ if (d > 99999) { d=-1; }
+ TRACE_ERR("\t" << d);
+ }
+ TRACE_ERR("\n");
+ }
+ }
+ }
+ return !cn.empty();
+}
+
+void WordLattice::GetAsEdgeMatrix(std::vector<std::vector<bool> >& edges) const
+{
+ edges.resize(data.size()+1,std::vector<bool>(data.size()+1, false));
+ for (size_t i=0;i<data.size();++i) {
+ for (size_t j=0;j<data[i].size(); ++j) {
+ edges[i][i+next_nodes[i][j]] = true;
+ }
+ }
+}
+
+int WordLattice::ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const
+{
+
+#if 1
+ int result;
+ if (prev.GetStartPos() == NOT_FOUND) {
+ //TRACE_ERR("returning initial distance from 0 to " << (current.GetStartPos()+1) << " which is " << (distances[0][current.GetStartPos()+1] - 1) <<"\n");
+ result = distances[0][current.GetStartPos()+1] - 1;
+ if (result < 0 || result > 99999) {
+ TRACE_ERR("prev: " << prev << "\n current: " << current << "\n");
+ TRACE_ERR("A: got a weird distance from 0 to " << (current.GetStartPos()+1) << " of " << result << "\n");
+ }
+ } else if (prev.GetEndPos() > current.GetStartPos()) {
+ //TRACE_ERR("returning forward distance from "<< current.GetStartPos() << " to " << (prev.GetEndPos()+1) << " which is " << distances[current.GetStartPos()][prev.GetEndPos()+1] <<"\n");
+ result = distances[current.GetStartPos()][prev.GetEndPos()+1];
+ if (result < 0 || result > 99999) {
+ TRACE_ERR("prev: " << prev << "\n current: " << current << "\n");
+
+ TRACE_ERR("B: got a weird distance from "<< current.GetStartPos() << " to " << prev.GetEndPos()+1 << " of " << result << "\n");
+ }
+ } else if (prev.GetEndPos()+1 == current.GetStartPos()) {
+ return 0;
+ } else {
+ //TRACE_ERR("returning reverse distance from "<< (prev.GetEndPos()+1) << " to " << (current.GetStartPos()+1) << " which is " << (distances[prev.GetEndPos()+1][current.GetStartPos()+1] - 1) <<"\n");
+ result = distances[prev.GetEndPos() + 1][current.GetStartPos() + 1] - 1;
+ if (result < 0 || result > 99999) {
+ TRACE_ERR("prev: " << prev << "\n current: " << current << "\n");
+
+ TRACE_ERR("C: got a weird distance from "<< prev.GetEndPos()+1 << " to " << current.GetStartPos()+1 << " of " << result << "\n");
+ }
+ }
+ return result;
+#else
+ int dist = 0;
+ if (prev.GetNumWordsCovered() == 0) {
+ dist = current.GetStartPos();
+ } else {
+ dist = (int)prev.GetEndPos() - (int)current.GetStartPos() + 1 ;
+ }
+ return abs(dist);
+#endif
+}
+
+bool WordLattice::CanIGetFromAToB(size_t start, size_t end) const
+{
+ // std::cerr << "CanIgetFromAToB(" << start << "," << end << ")=" << distances[start][end] << std::endl;
+ return distances[start][end] < 100000;
+}
+
+
+}
+
diff --git a/moses/src/WordLattice.h b/moses/src/WordLattice.h
new file mode 100644
index 000000000..d4cca44c7
--- /dev/null
+++ b/moses/src/WordLattice.h
@@ -0,0 +1,36 @@
+#ifndef moses_WordLattice_h
+#define moses_WordLattice_h
+
+#include <vector>
+#include "ConfusionNet.h"
+
+namespace Moses
+{
+
+/** General word lattice */
+class WordLattice: public ConfusionNet {
+private:
+ std::vector<std::vector<size_t> > next_nodes;
+ std::vector<std::vector<int> > distances;
+
+public:
+ WordLattice();
+ size_t GetColumnIncrement(size_t ic, size_t j) const;
+ void Print(std::ostream&) const;
+ /** Get shortest path between two nodes
+ */
+ virtual int ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const;
+ // is it possible to get from the edge of the previous word range to the current word range
+ virtual bool CanIGetFromAToB(size_t start, size_t end) const;
+
+ int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
+
+ /** Convert internal representation into an edge matrix
+ * @note edges[1][2] means there is an edge from 1 to 2
+ */
+ void GetAsEdgeMatrix(std::vector<std::vector<bool> >& edges) const;
+};
+
+}
+
+#endif
diff --git a/moses/src/WordsBitmap.cpp b/moses/src/WordsBitmap.cpp
new file mode 100644
index 000000000..f2d497c78
--- /dev/null
+++ b/moses/src/WordsBitmap.cpp
@@ -0,0 +1,64 @@
+// $Id: WordsBitmap.cpp 1897 2008-10-08 23:51:26Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "WordsBitmap.h"
+
+namespace Moses
+{
+
+TO_STRING_BODY(WordsBitmap);
+
+int WordsBitmap::GetFutureCosts(int lastPos) const
+{
+ int sum=0;
+ bool aim1=0,ai=0,aip1=m_bitmap[0];
+
+ for(size_t i=0;i<m_size;++i) {
+ aim1 = ai;
+ ai = aip1;
+ aip1 = (i+1==m_size || m_bitmap[i+1]);
+
+#ifndef NDEBUG
+ if( i>0 ) assert( aim1==(i==0||m_bitmap[i-1]==1));
+ //assert( ai==a[i] );
+ if( i+1<m_size ) assert( aip1==m_bitmap[i+1]);
+#endif
+ if((i==0||aim1)&&ai==0) {
+ sum+=abs(lastPos-static_cast<int>(i)+1);
+ // sum+=getJumpCosts(lastPos,i,maxJumpWidth);
+ }
+ // if(sum>1e5) return sum;
+ if(i>0 && ai==0 && (i+1==m_size||aip1) )
+ lastPos = (int) (i+1);
+ }
+
+ // sum+=getJumpCosts(lastPos,as,maxJumpWidth);
+ sum+=abs(lastPos-static_cast<int>(m_size)+1); //getCosts(lastPos,as);
+ assert(sum>=0);
+
+ // TRACE_ERR(sum<<"\n");
+
+ return sum;
+}
+
+
+}
+
diff --git a/moses/src/WordsBitmap.h b/moses/src/WordsBitmap.h
new file mode 100644
index 000000000..231f18e7b
--- /dev/null
+++ b/moses/src/WordsBitmap.h
@@ -0,0 +1,254 @@
+// $Id: WordsBitmap.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_WordsBitmap_h
+#define moses_WordsBitmap_h
+
+#include <limits>
+#include <vector>
+#include <iostream>
+#include <cstring>
+#include <cmath>
+#include <cstdlib>
+#include "TypeDef.h"
+#include "WordsRange.h"
+
+namespace Moses
+{
+typedef unsigned long WordsBitmapID;
+
+/** vector of boolean used to represent whether a word has been translated or not
+*/
+class WordsBitmap
+{
+ friend std::ostream& operator<<(std::ostream& out, const WordsBitmap& wordsBitmap);
+protected:
+ const size_t m_size; /**< number of words in sentence */
+ bool *m_bitmap; /**< ticks of words that have been done */
+
+ WordsBitmap(); // not implemented
+
+ //! set all elements to false
+ void Initialize()
+ {
+ for (size_t pos = 0 ; pos < m_size ; pos++)
+ {
+ m_bitmap[pos] = false;
+ }
+ }
+
+public:
+ //! create WordsBitmap of length size and initialise
+ WordsBitmap(size_t size)
+ :m_size (size)
+ {
+ m_bitmap = (bool*) malloc(sizeof(bool) * size);
+ Initialize();
+ }
+ //! deep copy
+ WordsBitmap(const WordsBitmap &copy)
+ :m_size (copy.m_size)
+ {
+ m_bitmap = (bool*) malloc(sizeof(bool) * m_size);
+ for (size_t pos = 0 ; pos < copy.m_size ; pos++)
+ {
+ m_bitmap[pos] = copy.GetValue(pos);
+ }
+ }
+ ~WordsBitmap()
+ {
+ free(m_bitmap);
+ }
+ //! count of words translated
+ size_t GetNumWordsCovered() const
+ {
+ size_t count = 0;
+ for (size_t pos = 0 ; pos < m_size ; pos++)
+ {
+ if (m_bitmap[pos])
+ count++;
+ }
+ return count;
+ }
+
+ //! position of 1st word not yet translated, or NOT_FOUND if everything already translated
+ size_t GetFirstGapPos() const
+ {
+ for (size_t pos = 0 ; pos < m_size ; pos++)
+ {
+ if (!m_bitmap[pos])
+ {
+ return pos;
+ }
+ }
+ // no starting pos
+ return NOT_FOUND;
+ }
+
+ //! position of last translated word
+ size_t GetLastPos() const
+ {
+ for (int pos = (int) m_size - 1 ; pos >= 0 ; pos--)
+ {
+ if (m_bitmap[pos])
+ {
+ return pos;
+ }
+ }
+ // no starting pos
+ return NOT_FOUND;
+ }
+
+ //! whether a word has been translated at a particular position
+ bool GetValue(size_t pos) const
+ {
+ return m_bitmap[pos];
+ }
+ //! set value at a particular position
+ void SetValue( size_t pos, bool value )
+ {
+ m_bitmap[pos] = value;
+ }
+ //! set value between 2 positions, inclusive
+ void SetValue( size_t startPos, size_t endPos, bool value )
+ {
+ for(size_t pos = startPos ; pos <= endPos ; pos++)
+ {
+ m_bitmap[pos] = value;
+ }
+ }
+ //! whether every word has been translated
+ bool IsComplete() const
+ {
+ return GetSize() == GetNumWordsCovered();
+ }
+ //! whether the wordrange overlaps with any translated word in this bitmap
+ bool Overlap(const WordsRange &compare) const
+ {
+ for (size_t pos = compare.GetStartPos() ; pos <= compare.GetEndPos() ; pos++)
+ {
+ if (m_bitmap[pos])
+ return true;
+ }
+ return false;
+ }
+ //! number of elements
+ size_t GetSize() const
+ {
+ return m_size;
+ }
+
+ //! transitive comparison of WordsBitmap
+ inline int Compare (const WordsBitmap &compare) const
+ {
+ // -1 = less than
+ // +1 = more than
+ // 0 = same
+
+ size_t thisSize = GetSize()
+ ,compareSize = compare.GetSize();
+
+ if (thisSize != compareSize)
+ {
+ return (thisSize < compareSize) ? -1 : 1;
+ }
+ return std::memcmp(m_bitmap, compare.m_bitmap, thisSize * sizeof(bool));
+ }
+
+ bool operator< (const WordsBitmap &compare) const
+ {
+ return Compare(compare) < 0;
+ }
+
+ inline size_t GetEdgeToTheLeftOf(size_t l) const
+ {
+ if (l == 0) return l;
+ while (l && !m_bitmap[l-1]) { --l; }
+ return l;
+ }
+
+ inline size_t GetEdgeToTheRightOf(size_t r) const
+ {
+ if (r+1 == m_size) return r;
+ while (r+1 < m_size && !m_bitmap[r+1]) { ++r; }
+ return r;
+ }
+
+
+ //! TODO - ??? no idea
+ int GetFutureCosts(int lastPos) const ;
+
+ //! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
+ WordsBitmapID GetID() const {
+ assert(m_size < (1<<16));
+
+ size_t start = GetFirstGapPos();
+ if (start == NOT_FOUND) start = m_size; // nothing left
+
+ size_t end = GetLastPos();
+ if (end == NOT_FOUND) end = 0; // nothing translated yet
+
+ assert(end < start || end-start <= 16);
+ WordsBitmapID id = 0;
+ for(size_t pos = end; pos > start; pos--) {
+ id = id*2 + (int) GetValue(pos);
+ }
+ return id + (1<<16) * start;
+ }
+
+ //! converts bitmap into an integer ID, with an additional span covered
+ WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
+ assert(m_size < (1<<16));
+
+ size_t start = GetFirstGapPos();
+ if (start == NOT_FOUND) start = m_size; // nothing left
+
+ size_t end = GetLastPos();
+ if (end == NOT_FOUND) end = 0; // nothing translated yet
+
+ if (start == startPos) start = endPos+1;
+ if (end < endPos) end = endPos;
+
+ assert(end < start || end-start <= 16);
+ WordsBitmapID id = 0;
+ for(size_t pos = end; pos > start; pos--) {
+ id = id*2;
+ if (GetValue(pos) || (startPos<=pos && pos<=endPos))
+ id++;
+ }
+ return id + (1<<16) * start;
+ }
+
+ TO_STRING();
+};
+
+// friend
+inline std::ostream& operator<<(std::ostream& out, const WordsBitmap& wordsBitmap)
+{
+ for (size_t i = 0 ; i < wordsBitmap.m_size ; i++)
+ {
+ out << (wordsBitmap.GetValue(i) ? 1 : 0);
+ }
+ return out;
+}
+
+}
+#endif
diff --git a/moses/src/WordsRange.cpp b/moses/src/WordsRange.cpp
new file mode 100644
index 000000000..5435cdbe3
--- /dev/null
+++ b/moses/src/WordsRange.cpp
@@ -0,0 +1,34 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "WordsRange.h"
+
+namespace Moses
+{
+
+TO_STRING_BODY(WordsRange);
+
+std::ostream& operator << (std::ostream& out, const WordsRange& range)
+{
+ out << "[" << range.m_startPos << ".." << range.m_endPos << "]";
+ return out;
+}
+
+}
+
diff --git a/moses/src/WordsRange.h b/moses/src/WordsRange.h
new file mode 100644
index 000000000..58cb1f14a
--- /dev/null
+++ b/moses/src/WordsRange.h
@@ -0,0 +1,95 @@
+// $Id: WordsRange.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_WordsRange_h
+#define moses_WordsRange_h
+
+#include <iostream>
+#include "TypeDef.h"
+#include "Util.h"
+
+namespace Moses
+{
+
+/***
+ * Efficient version of WordsBitmap for contiguous ranges
+ */
+class WordsRange
+{
+ friend std::ostream& operator << (std::ostream& out, const WordsRange& range);
+
+ size_t m_startPos, m_endPos;
+public:
+ inline WordsRange(size_t startPos, size_t endPos) : m_startPos(startPos), m_endPos(endPos) {}
+ inline WordsRange(const WordsRange &copy)
+ : m_startPos(copy.GetStartPos())
+ , m_endPos(copy.GetEndPos())
+ {}
+
+ inline size_t GetStartPos() const
+ {
+ return m_startPos;
+ }
+ inline size_t GetEndPos() const
+ {
+ return m_endPos;
+ }
+
+ //! count of words translated
+ inline size_t GetNumWordsCovered() const
+ {
+ return (m_startPos == NOT_FOUND) ? 0 : m_endPos - m_startPos + 1;
+ }
+
+ //! transitive comparison
+ inline bool operator<(const WordsRange& x) const
+ {
+ return (m_startPos<x.m_startPos
+ || (m_startPos==x.m_startPos && m_endPos<x.m_endPos));
+ }
+
+ // Whether two word ranges overlap or not
+ inline bool Overlap(const WordsRange& x) const
+ {
+
+ if ( x.m_endPos < m_startPos || x.m_startPos > m_endPos) return false;
+
+ return true;
+ }
+
+ inline size_t GetNumWordsBetween(const WordsRange& x) const
+ {
+ assert(!Overlap(x));
+
+ if (x.m_endPos < m_startPos) {
+ return m_startPos - x.m_endPos;
+ }
+
+ return x.m_startPos - m_endPos;
+ }
+
+
+ TO_STRING();
+};
+
+
+}
+#endif
diff --git a/moses/src/XmlOption.cpp b/moses/src/XmlOption.cpp
new file mode 100644
index 000000000..bf4b50de2
--- /dev/null
+++ b/moses/src/XmlOption.cpp
@@ -0,0 +1,406 @@
+// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
+// vim:tabstop=2
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#include "XmlOption.h"
+#include <vector>
+#include <string>
+#include <iostream>
+#include "Util.h"
+#include "StaticData.h"
+#include "WordsRange.h"
+#include "TargetPhrase.h"
+
+namespace Moses
+{
+
+string ParseXmlTagAttribute(const string& tag,const string& attributeName){
+ /*TODO deal with unescaping \"*/
+ string tagOpen = attributeName + "=\"";
+ size_t contentsStart = tag.find(tagOpen);
+ if (contentsStart == string::npos) return "";
+ contentsStart += tagOpen.size();
+ size_t contentsEnd = tag.find_first_of('"',contentsStart+1);
+ if (contentsEnd == string::npos) {
+ TRACE_ERR("Malformed XML attribute: "<< tag);
+ return "";
+ }
+ size_t possibleEnd;
+ while (tag.at(contentsEnd-1) == '\\' && (possibleEnd = tag.find_first_of('"',contentsEnd+1)) != string::npos) {
+ contentsEnd = possibleEnd;
+ }
+ return tag.substr(contentsStart,contentsEnd-contentsStart);
+}
+
+/**
+ * Remove "<" and ">" from XML tag
+ *
+ * \param str xml token to be stripped
+ */
+string TrimXml(const string& str)
+{
+ // too short to be xml token -> do nothing
+ if (str.size() < 2) return str;
+
+ // strip first and last character
+ if (str[0] == '<' && str[str.size() - 1] == '>')
+ {
+ return str.substr(1, str.size() - 2);
+ }
+ // not an xml token -> do nothing
+ else { return str; }
+}
+
+/**
+ * Check if the token is an XML tag, i.e. starts with "<"
+ *
+ * \param tag token to be checked
+ */
+bool isXmlTag(const string& tag)
+{
+ return tag[0] == '<';
+}
+
+/**
+ * Split up the input character string into tokens made up of
+ * either XML tags or text.
+ * example: this <b> is a </b> test .
+ * => (this ), (<b>), ( is a ), (</b>), ( test .)
+ *
+ * \param str input string
+ */
+inline vector<string> TokenizeXml(const string& str)
+{
+ string lbrack = "<";
+ string rbrack = ">";
+ vector<string> tokens; // vector of tokens to be returned
+ string::size_type cpos = 0; // current position in string
+ string::size_type lpos = 0; // left start of xml tag
+ string::size_type rpos = 0; // right end of xml tag
+
+ // walk thorugh the string (loop vver cpos)
+ while (cpos != str.size())
+ {
+ // find the next opening "<" of an xml tag
+ lpos = str.find_first_of(lbrack, cpos);
+ if (lpos != string::npos)
+ {
+ // find the end of the xml tag
+ rpos = str.find_first_of(rbrack, lpos);
+ // sanity check: there has to be closing ">"
+ if (rpos == string::npos)
+ {
+ TRACE_ERR("ERROR: malformed XML: " << str << endl);
+ return tokens;
+ }
+ }
+ else // no more tags found
+ {
+ // add the rest as token
+ tokens.push_back(str.substr(cpos));
+ break;
+ }
+
+ // add stuff before xml tag as token, if there is any
+ if (lpos - cpos > 0)
+ tokens.push_back(str.substr(cpos, lpos - cpos));
+
+ // add xml tag as token
+ tokens.push_back(str.substr(lpos, rpos-lpos+1));
+ cpos = rpos + 1;
+ }
+ return tokens;
+}
+
+/**
+ * Process a sentence with xml annotation
+ * Xml tags may specifiy additional/replacing translation options
+ * and reordering constraints
+ *
+ * \param line in: sentence, out: sentence without the xml
+ * \param res vector with translation options specified by xml
+ * \param reorderingConstraint reordering constraint zones specified by xml
+ * \param walls reordering constraint walls specified by xml
+ */
+/*TODO: we'd only have to return a vector of XML options if we dropped linking. 2-d vector
+ is so we can link things up afterwards. We can't create TranslationOptions as we
+ parse because we don't have the completed source parsed until after this function
+ removes all the markup from it (CreateFromString in Sentence::Read).
+ */
+bool ProcessAndStripXMLTags(string &line, vector<vector<XmlOption*> > &res, ReorderingConstraint &reorderingConstraint, vector< size_t > &walls ) {
+ //parse XML markup in translation line
+
+ // no xml tag? we're done.
+ if (line.find_first_of('<') == string::npos) { return true; }
+
+ // break up input into a vector of xml tags and text
+ // example: (this), (<b>), (is a), (</b>), (test .)
+ vector<string> xmlTokens = TokenizeXml(line);
+
+ // we need to store opened tags, until they are closed
+ // tags are stored as tripled (tagname, startpos, contents)
+ typedef pair< string, pair< size_t, string > > OpenedTag;
+ vector< OpenedTag > tagStack; // stack that contains active opened tags
+
+ string cleanLine; // return string (text without xml)
+ vector<XmlOption*> linkedOptions;
+ size_t wordPos = 0; // position in sentence (in terms of number of words)
+ bool isLinked = false;
+ const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+ const string &factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+
+ // loop through the tokens
+ for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++)
+ {
+ // not a xml tag, but regular text (may contain many words)
+ if(!isXmlTag(xmlTokens[xmlTokenPos]))
+ {
+ // add a space at boundary, if necessary
+ if (cleanLine.size()>0 &&
+ cleanLine[cleanLine.size() - 1] != ' ' &&
+ xmlTokens[xmlTokenPos][0] != ' ')
+ {
+ cleanLine += " ";
+ }
+ cleanLine += xmlTokens[xmlTokenPos]; // add to output
+ wordPos = Tokenize(cleanLine).size(); // count all the words
+ }
+
+ // process xml tag
+ else
+ {
+ // *** get essential information about tag ***
+
+ // strip extra boundary spaces and "<" and ">"
+ string tag = Trim(TrimXml(xmlTokens[xmlTokenPos]));
+ VERBOSE(3,"XML TAG IS: " << tag << std::endl);
+
+ if (tag.size() == 0)
+ {
+ TRACE_ERR("ERROR: empty tag name: " << line << endl);
+ return false;
+ }
+
+ // check if unary (e.g., "<wall/>")
+ bool isUnary = ( tag[tag.size() - 1] == '/' );
+
+ // check if opening tag (e.g. "<a>", not "</a>")g
+ bool isClosed = ( tag[0] == '/' );
+ bool isOpen = !isClosed;
+
+ if (isClosed && isUnary)
+ {
+ TRACE_ERR("ERROR: can't have both closed and unary tag <" << tag << ">: " << line << endl);
+ return false;
+ }
+
+ if (isClosed)
+ tag = tag.substr(1); // remove "/" at the beginning
+ if (isUnary)
+ tag = tag.substr(0,tag.size()-1); // remove "/" at the end
+
+ // find the tag name and contents
+ string::size_type endOfName = tag.find_first_of(' ');
+ string tagName = tag;
+ string tagContent = "";
+ if (endOfName != string::npos) {
+ tagName = tag.substr(0,endOfName);
+ tagContent = tag.substr(endOfName+1);
+ }
+
+ // *** process new tag ***
+
+ if (isOpen || isUnary)
+ {
+ // special case: linked tag turns on linked flag
+ if (tagName == "linked")
+ {
+ if (isLinked)
+ {
+ TRACE_ERR("ERROR: second linked tag opened before first one closed: " << line << endl);
+ return false;
+ }
+ isLinked = true;
+ }
+ // put the tag on the tag stack
+ OpenedTag openedTag = make_pair( tagName, make_pair( wordPos, tagContent ) );
+ tagStack.push_back( openedTag );
+ VERBOSE(3,"XML TAG " << tagName << " (" << tagContent << ") added to stack, now size " << tagStack.size() << endl);
+ }
+
+ // *** process completed tag ***
+
+ if (isClosed || isUnary)
+ {
+ // pop last opened tag from stack;
+ if (tagStack.size() == 0)
+ {
+ TRACE_ERR("ERROR: tag " << tagName << " closed, but not opened" << ":" << line << endl);
+ return false;
+ }
+ OpenedTag openedTag = tagStack.back();
+ tagStack.pop_back();
+
+ // tag names have to match
+ if (openedTag.first != tagName)
+ {
+ TRACE_ERR("ERROR: tag " << openedTag.first << " closed by tag " << tagName << ": " << line << endl );
+ return false;
+ }
+
+ // assemble remaining information about tag
+ size_t startPos = openedTag.second.first;
+ string tagContent = openedTag.second.second;
+ size_t endPos = wordPos;
+
+ // span attribute overwrites position
+ string span = ParseXmlTagAttribute(tagContent,"span");
+ if (! span.empty())
+ {
+ vector<string> ij = Tokenize(span, ",");
+ if (ij.size() != 1 && ij.size() != 2) {
+ TRACE_ERR("ERROR: span attribute must be of the form \"i,j\" or \"i\": " << line << endl);
+ return false;
+ }
+ startPos = atoi(ij[0].c_str());
+ if (ij.size() == 1) endPos = startPos;
+ else endPos = atoi(ij[1].c_str()) + 1;
+ }
+
+ VERBOSE(3,"XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl);
+ // special tag: <linked>
+ if (tagName == "linked")
+ {
+ isLinked = false;
+ }
+
+ // special tag: wall
+ if (tagName == "wall")
+ {
+ size_t start = (startPos == 0) ? 0 : startPos-1;
+ for(size_t pos = start; pos < endPos; pos++)
+ walls.push_back( pos );
+ }
+
+ // special tag: zone
+ else if (tagName == "zone")
+ {
+ if (startPos >= endPos)
+ {
+ TRACE_ERR("ERROR: zone must span at least one word: " << line << endl);
+ return false;
+ }
+ reorderingConstraint.SetZone( startPos, endPos-1 );
+ }
+
+ // default: opening tag that specifies translation options
+ else
+ {
+ if (startPos >= endPos)
+ {
+ TRACE_ERR("ERROR: tag " << tagName << " must span at least one word: " << line << endl);
+ return false;
+ }
+
+ // specified translations -> vector of phrases
+ // multiple translations may be specified, separated by "||"
+ vector<string> altTexts = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"translation"), "||");
+ if( altTexts.size() == 1 && altTexts[0] == "" )
+ altTexts.pop_back(); // happens when nothing specified
+ // deal with legacy annotations: "translation" was called "english"
+ vector<string> moreAltTexts = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"english"), "||");
+ if (moreAltTexts.size()>1 || moreAltTexts[0] != "")
+ {
+ for(vector<string>::iterator translation=moreAltTexts.begin();
+ translation != moreAltTexts.end();
+ translation++)
+ {
+ string t = *translation;
+ altTexts.push_back( t );
+ }
+ }
+
+ // specified probabilities for the translations -> vector of probs
+ vector<string> altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||");
+ if( altProbs.size() == 1 && altProbs[0] == "" )
+ altProbs.pop_back(); // happens when nothing specified
+
+ // report what we have processed so far
+ VERBOSE(3,"XML TAG NAME IS: '" << tagName << "'" << endl);
+ VERBOSE(3,"XML TAG TRANSLATION IS: '" << altTexts[0] << "'" << endl);
+ VERBOSE(3,"XML TAG PROB IS: '" << altProbs[0] << "'" << endl);
+ VERBOSE(3,"XML TAG SPAN IS: " << startPos << "-" << (endPos-1) << endl);
+ if (altProbs.size() > 0 && altTexts.size() != altProbs.size()) {
+ TRACE_ERR("ERROR: Unequal number of probabilities and translation alternatives: " << line << endl);
+ return false;
+ }
+
+ // store translation options into members
+ if (StaticData::Instance().GetXmlInputType() != XmlIgnore) {
+ // only store options if we aren't ignoring them
+ for (size_t i=0; i<altTexts.size(); ++i) {
+ // set default probability
+ float probValue = 1;
+ if (altProbs.size() > 0) probValue = Scan<float>(altProbs[i]);
+ // convert from prob to log-prob
+ float scoreValue = FloorScore(TransformScore(probValue));
+
+ WordsRange range(startPos,endPos-1); // span covered by phrase
+ TargetPhrase targetPhrase(Output);
+ targetPhrase.CreateFromString(outputFactorOrder,altTexts[i],factorDelimiter);
+ targetPhrase.SetScore(scoreValue);
+
+ XmlOption *option = new XmlOption(range,targetPhrase);
+ assert(option);
+
+ if (isLinked)
+ {
+ // push all linked items as one column in our list of xmloptions
+ linkedOptions.push_back(option);
+ }
+ else
+ {
+ // push one-item list (not linked to anything)
+ vector<XmlOption*> optList(0);
+ optList.push_back(option);
+ res.push_back(optList);
+ }
+ }
+ altTexts.clear();
+ altProbs.clear();
+ }
+ }
+ }
+ }
+ }
+ // we are done. check if there are tags that are still open
+ if (tagStack.size() > 0)
+ {
+ TRACE_ERR("ERROR: some opened tags were never closed: " << line << endl);
+ return false;
+ }
+
+ // return de-xml'ed sentence in line
+ line = cleanLine;
+ return true;
+}
+
+}
diff --git a/moses/src/XmlOption.h b/moses/src/XmlOption.h
new file mode 100644
index 000000000..2d91ae9fc
--- /dev/null
+++ b/moses/src/XmlOption.h
@@ -0,0 +1,32 @@
+#ifndef moses_XmlOption_h
+#define moses_XmlOption_h
+
+#include <vector>
+#include <string>
+#include "WordsRange.h"
+#include "TargetPhrase.h"
+#include "ReorderingConstraint.h"
+
+namespace Moses
+{
+
+class TranslationOption;
+
+/** This struct is used for storing XML force translation data for a given range in the sentence
+ */
+struct XmlOption {
+
+ WordsRange range;
+ TargetPhrase targetPhrase;
+ std::vector<XmlOption*> linkedOptions;
+
+ XmlOption(const WordsRange &r, const TargetPhrase &tp): range(r), targetPhrase(tp), linkedOptions(0) {}
+
+};
+
+bool ProcessAndStripXMLTags(std::string &line,std::vector<std::vector<XmlOption*> > &res, ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls );
+
+}
+
+#endif
+
diff --git a/moses/src/gzfilebuf.h b/moses/src/gzfilebuf.h
new file mode 100644
index 000000000..c427d04f4
--- /dev/null
+++ b/moses/src/gzfilebuf.h
@@ -0,0 +1,81 @@
+#ifndef moses_gzfile_buf_h
+#define moses_gzfile_buf_h
+
+#include <streambuf>
+#include <zlib.h>
+#include <cstring>
+
+class gzfilebuf : public std::streambuf {
+public:
+ gzfilebuf(const char *filename)
+ { _gzf = gzopen(filename, "rb");
+ setg (_buff+sizeof(int), // beginning of putback area
+ _buff+sizeof(int), // read position
+ _buff+sizeof(int)); // end position
+ }
+ ~gzfilebuf() { gzclose(_gzf); }
+protected:
+ virtual int_type overflow (int_type c) {
+ throw;
+ }
+
+ // write multiple characters
+ virtual
+ std::streamsize xsputn (const char* s,
+ std::streamsize num) {
+ throw;
+ }
+
+ virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ){ throw;
+ }
+
+ //read one character
+ virtual int_type underflow () {
+ // is read position before end of _buff?
+ if (gptr() < egptr()) {
+ return traits_type::to_int_type(*gptr());
+ }
+
+ /* process size of putback area
+ * - use number of characters read
+ * - but at most four
+ */
+ unsigned int numPutback = gptr() - eback();
+ if (numPutback > sizeof(int)) {
+ numPutback = sizeof(int);
+ }
+
+ /* copy up to four characters previously read into
+ * the putback _buff (area of first four characters)
+ */
+ std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
+ numPutback);
+
+ // read new characters
+ int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
+ if (num <= 0) {
+ // ERROR or EOF
+ return EOF;
+ }
+
+ // reset _buff pointers
+ setg (_buff+(sizeof(int)-numPutback), // beginning of putback area
+ _buff+sizeof(int), // read position
+ _buff+sizeof(int)+num); // end of buffer
+
+ // return next character
+ return traits_type::to_int_type(*gptr());
+ }
+
+ std::streamsize xsgetn (char* s,
+ std::streamsize num) {
+ return gzread(_gzf,s,num);
+ }
+
+private:
+ gzFile _gzf;
+ static const unsigned int _buffsize = 1024;
+ char _buff[_buffsize];
+};
+
+#endif
diff --git a/moses/src/hash.cpp b/moses/src/hash.cpp
new file mode 100644
index 000000000..f389858f5
--- /dev/null
+++ b/moses/src/hash.cpp
@@ -0,0 +1,60 @@
+// $Id: hash.cpp 400 2006-07-31 02:46:59Z hieuhoang1972 $
+
+#define mix(a,b,c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/* the key */
+/* the length of the key */
+/* the previous hash, or an arbitrary value */
+unsigned int quick_hash(register const char *k, register unsigned int length, register unsigned int initval)
+{
+ register unsigned int a,b,c,len;
+
+ /* Set up the internal state */
+ len = length;
+ a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ c = initval; /* the previous hash value */
+
+ /*---------------------------------------- handle most of the key */
+ while (len >= 12)
+ {
+ a += (k[0] +((unsigned int)k[1]<<8) +((unsigned int)k[2]<<16) +((unsigned int)k[3]<<24));
+ b += (k[4] +((unsigned int)k[5]<<8) +((unsigned int)k[6]<<16) +((unsigned int)k[7]<<24));
+ c += (k[8] +((unsigned int)k[9]<<8) +((unsigned int)k[10]<<16)+((unsigned int)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ /*------------------------------------- handle the last 11 bytes */
+ c += length;
+ switch(len) /* all the case statements fall through */
+ {
+ case 11: c+=((unsigned int)k[10]<<24);
+ case 10: c+=((unsigned int)k[9]<<16);
+ case 9 : c+=((unsigned int)k[8]<<8);
+ /* the first byte of c is reserved for the length */
+ case 8 : b+=((unsigned int)k[7]<<24);
+ case 7 : b+=((unsigned int)k[6]<<16);
+ case 6 : b+=((unsigned int)k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=((unsigned int)k[3]<<24);
+ case 3 : a+=((unsigned int)k[2]<<16);
+ case 2 : a+=((unsigned int)k[1]<<8);
+ case 1 : a+=k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a,b,c);
+ /*-------------------------------------------- report the result */
+ return c;
+}
+
diff --git a/moses/src/hash.h b/moses/src/hash.h
new file mode 100644
index 000000000..bef9c6002
--- /dev/null
+++ b/moses/src/hash.h
@@ -0,0 +1,8 @@
+#ifndef moses_hash_h
+#define moses_hash_h
+
+// taken from burtleburtle.net/bob/hash/doobs.html
+unsigned int quick_hash(register const char *k, register unsigned int length, register unsigned int initval);
+
+#endif
+
diff --git a/moses/src/hypergraph.proto b/moses/src/hypergraph.proto
new file mode 100644
index 000000000..e292eacca
--- /dev/null
+++ b/moses/src/hypergraph.proto
@@ -0,0 +1,30 @@
+package hgmert;
+
+import "rule.proto";
+
+message Hypergraph {
+
+ message Node {
+ optional string category = 1;
+ repeated string lm_state = 2;
+ }
+
+ message Edge {
+ repeated int32 tail_nodes = 4;
+ required int32 head_node = 5;
+ // must be as many as num_features below
+ repeated float feature_values = 6;
+ required Rule rule = 7;
+ }
+
+ repeated Node nodes = 8;
+ repeated Edge edges = 9;
+ required bool is_sorted = 10;
+ // the number of features in the feature vector
+ required int32 num_features = 11;
+ // string names of the features
+ repeated string feature_names = 12;
+ optional string src_string = 13;
+ optional string sysid = 14;
+}
+
diff --git a/moses/src/rule.proto b/moses/src/rule.proto
new file mode 100644
index 000000000..5f8084636
--- /dev/null
+++ b/moses/src/rule.proto
@@ -0,0 +1,10 @@
+package hgmert;
+
+message Rule {
+ // [[A-Z]+,[1-9]] defines a non-terminal, everything else is a terminal
+ repeated string trg_words = 1;
+ // [[A-Z]+,[1-9]] defines a non-terminal, everything else is a terminal
+ repeated string src_words = 2;
+ optional string category = 3;
+}
+