Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskyload <skyload@1f5c12ca-751b-0410-a591-d2e778427230>2010-04-21 15:56:25 +0400
committerskyload <skyload@1f5c12ca-751b-0410-a591-d2e778427230>2010-04-21 15:56:25 +0400
commit4ec8af8e42363d17b37b6615434cc40a5ad8448d (patch)
treee6ff20917ca0b86b01095ddaaf56aa414345fe9d
parente2767d8f76f6ffc36eb92d26cddc479a83b1b573 (diff)
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/DPR_MOSES@3159 1f5c12ca-751b-0410-a591-d2e778427230
-rw-r--r--misc/.cdtbuild83
-rw-r--r--misc/.cdtproject15
-rw-r--r--misc/.cproject173
-rw-r--r--misc/.project84
-rw-r--r--misc/GenerateTuples.cpp297
-rw-r--r--misc/GenerateTuples.h12
-rw-r--r--misc/Makefile.am13
-rw-r--r--misc/java-utils/.classpath6
-rw-r--r--misc/java-utils/.cvsignore1
-rw-r--r--misc/java-utils/.project17
-rw-r--r--misc/java-utils/CombineTags.java89
-rw-r--r--misc/java-utils/ProcessShallowParse.java82
-rw-r--r--misc/java-utils/ShrinkSentence.java48
-rw-r--r--misc/java-utils/TagHierarchy.java135
-rw-r--r--misc/processLexicalTable.cpp52
-rw-r--r--misc/processLexicalTable.vcproj354
-rw-r--r--misc/processPhraseTable.cpp200
-rw-r--r--misc/processPhraseTable.vcproj354
-rw-r--r--misc/queryLexicalTable.cpp110
19 files changed, 2125 insertions, 0 deletions
diff --git a/misc/.cdtbuild b/misc/.cdtbuild
new file mode 100644
index 000000000..351d81b4d
--- /dev/null
+++ b/misc/.cdtbuild
@@ -0,0 +1,83 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 3.1.0?>
+
+<ManagedProjectBuildInfo>
+<project id="misc.cdt.managedbuild.target.gnu.exe.720875320" name="Executable (Gnu)" projectType="cdt.managedbuild.target.gnu.exe">
+<configuration artifactName="processPhraseTable" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.exe.debug.968564372" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.9940797" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+<option id="gnu.cpp.compiler.option.include.paths.634924686" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../moses/src&quot;"/>
+</option>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.694708706" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug"/>
+<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1774294955" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1519446567" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+<option id="gnu.cpp.link.option.userobjs.1214926161" superClass="gnu.cpp.link.option.userobjs" valueType="userObjs">
+<listOptionValue builtIn="false" value="&quot;../../moses/${ConfigName}/libmoses.a&quot;"/>
+</option>
+<option id="gnu.cpp.link.option.libs.1358709998" superClass="gnu.cpp.link.option.libs" valueType="libs">
+<listOptionValue builtIn="false" value="z"/>
+<listOptionValue builtIn="false" value="irstlm"/>
+<listOptionValue builtIn="false" value="lattice"/>
+<listOptionValue builtIn="false" value="misc"/>
+<listOptionValue builtIn="false" value="dstruct"/>
+<listOptionValue builtIn="false" value="oolm"/>
+</option>
+<option id="gnu.cpp.link.option.paths.512401148" superClass="gnu.cpp.link.option.paths" valueType="stringList">
+<listOptionValue builtIn="false" value="&quot;../../srilm/lib/i686&quot;"/>
+<listOptionValue builtIn="false" value="&quot;../../irstlm/lib/i686-redhat-linux-gnu&quot;"/>
+</option>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.668977706" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug"/>
+<macros/>
+</toolChain>
+<resourceConfiguration exclude="true" id="cdt.managedbuild.config.gnu.exe.debug.968564372./misc/processLexicalTable.cpp" name="processLexicalTable.cpp" rcbsApplicability="disable" resourcePath="/misc/processLexicalTable.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323./misc/processLexicalTable.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323./misc/processLexicalTable.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323"/>
+</resourceConfiguration>
+<resourceConfiguration exclude="true" id="cdt.managedbuild.config.gnu.exe.debug.968564372./misc/queryLexicalTable.cpp" name="queryLexicalTable.cpp" rcbsApplicability="disable" resourcePath="/misc/queryLexicalTable.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323./misc/queryLexicalTable.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323./misc/queryLexicalTable.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323"/>
+</resourceConfiguration>
+</configuration>
+<configuration artifactName="processPhraseTable" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.exe.release.740659024" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.548693740" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+<option id="gnu.cpp.compiler.option.include.paths.1215088252" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../moses/src&quot;"/>
+</option>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.871712778" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release"/>
+<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.143684865" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.792459607" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+<option id="gnu.cpp.link.option.userobjs.1902362839" superClass="gnu.cpp.link.option.userobjs" valueType="userObjs">
+<listOptionValue builtIn="false" value="&quot;../../moses/${ConfigName}/libmoses.a&quot;"/>
+</option>
+<option id="gnu.cpp.link.option.libs.679824006" superClass="gnu.cpp.link.option.libs" valueType="libs">
+<listOptionValue builtIn="false" value="z"/>
+<listOptionValue builtIn="false" value="lattice"/>
+<listOptionValue builtIn="false" value="misc"/>
+<listOptionValue builtIn="false" value="dstruct"/>
+<listOptionValue builtIn="false" value="oolm"/>
+<listOptionValue builtIn="false" value="irstlm"/>
+</option>
+<option id="gnu.cpp.link.option.paths.1956617803" superClass="gnu.cpp.link.option.paths" valueType="stringList">
+<listOptionValue builtIn="false" value="&quot;../../srilm/lib/i686&quot;"/>
+<listOptionValue builtIn="false" value="&quot;../../irstlm/lib/i686-redhat-linux-gnu&quot;"/>
+</option>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1701666898" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release"/>
+<macros/>
+</toolChain>
+<resourceConfiguration exclude="true" id="cdt.managedbuild.config.gnu.exe.release.740659024./misc/GenerateTuples.cpp" name="GenerateTuples.cpp" rcbsApplicability="disable" resourcePath="/misc/GenerateTuples.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/GenerateTuples.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/GenerateTuples.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067"/>
+</resourceConfiguration>
+<resourceConfiguration exclude="true" id="cdt.managedbuild.config.gnu.exe.release.740659024./misc/queryLexicalTable.cpp" name="queryLexicalTable.cpp" rcbsApplicability="disable" resourcePath="/misc/queryLexicalTable.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/queryLexicalTable.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/queryLexicalTable.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067"/>
+</resourceConfiguration>
+<resourceConfiguration exclude="true" id="cdt.managedbuild.config.gnu.exe.release.740659024./misc/processLexicalTable.cpp" name="processLexicalTable.cpp" rcbsApplicability="disable" resourcePath="/misc/processLexicalTable.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/processLexicalTable.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/processLexicalTable.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067"/>
+</resourceConfiguration>
+</configuration>
+<macros/>
+</project>
+</ManagedProjectBuildInfo>
diff --git a/misc/.cdtproject b/misc/.cdtproject
new file mode 100644
index 000000000..6c0340e1f
--- /dev/null
+++ b/misc/.cdtproject
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?eclipse-cdt version="2.0"?>
+
+<cdtproject id="org.eclipse.cdt.managedbuilder.core.managedMake">
+<extension id="org.eclipse.cdt.managedbuilder.core.ManagedBuildManager" point="org.eclipse.cdt.core.ScannerInfoProvider"/>
+<extension id="org.eclipse.cdt.managedbuilder.core.ManagedBuildManager" point="org.eclipse.cdt.core.ScannerInfoProvider"/>
+<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+<data>
+<item id="org.eclipse.cdt.core.pathentry">
+<pathentry kind="src" path=""/>
+<pathentry kind="out" path=""/>
+<pathentry kind="con" path="org.eclipse.cdt.managedbuilder.MANAGED_CONTAINER"/>
+</item>
+</data>
+</cdtproject>
diff --git a/misc/.cproject b/misc/.cproject
new file mode 100644
index 000000000..405dd3c13
--- /dev/null
+++ b/misc/.cproject
@@ -0,0 +1,173 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?>
+
+<cproject>
+<storageModule moduleId="org.eclipse.cdt.core.settings">
+<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.968564372">
+<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.968564372" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+<externalSettings/>
+<extensions>
+<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+<extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+</extensions>
+</storageModule>
+<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+<configuration artifactExtension="" artifactName="processPhraseTable" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.exe.debug.968564372" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.968564372.540679682" name="/" resourcePath="">
+<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.9940797" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.2030392618" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+<builder buildPath="${workspace_loc:/misc/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.1148261757" keepEnvironmentInBuildfile="false" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+<option id="gnu.cpp.compiler.option.include.paths.634924686" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../moses/src&quot;"/>
+</option>
+<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.516647124" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1854615959" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1264389336" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.694708706" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1818506362" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+<option id="gnu.c.compiler.exe.debug.option.debugging.level.310245060" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1239923844" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1774294955" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1519446567" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+<option id="gnu.cpp.link.option.userobjs.1214926161" name="Other objects" superClass="gnu.cpp.link.option.userobjs" valueType="userObjs">
+<listOptionValue builtIn="false" value="&quot;../../moses/${ConfigName}/libmoses.a&quot;"/>
+</option>
+<option id="gnu.cpp.link.option.libs.1358709998" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+<listOptionValue builtIn="false" value="z"/>
+<listOptionValue builtIn="false" value="randlm"/>
+<listOptionValue builtIn="false" value="irstlm"/>
+<listOptionValue builtIn="false" value="lattice"/>
+<listOptionValue builtIn="false" value="misc"/>
+<listOptionValue builtIn="false" value="dstruct"/>
+<listOptionValue builtIn="false" value="oolm"/>
+</option>
+<option id="gnu.cpp.link.option.paths.512401148" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="stringList">
+<listOptionValue builtIn="false" value="&quot;../../srilm/lib/i686&quot;"/>
+<listOptionValue builtIn="false" value="&quot;../../randlm/lib&quot;"/>
+<listOptionValue builtIn="false" value="&quot;../../irstlm/lib/i686-redhat-linux-gnu&quot;"/>
+</option>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.668977706" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1887849438" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.archiver.base.1023715696" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+</toolChain>
+</folderInfo>
+<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.968564372./misc/processLexicalTable.cpp" name="processLexicalTable.cpp" rcbsApplicability="disable" resourcePath="processLexicalTable.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323./misc/processLexicalTable.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323./misc/processLexicalTable.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323">
+<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.998573704" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.109384265" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.884067908" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+</tool>
+</fileInfo>
+<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.968564372./misc/queryLexicalTable.cpp" name="queryLexicalTable.cpp" rcbsApplicability="disable" resourcePath="queryLexicalTable.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323./misc/queryLexicalTable.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323./misc/queryLexicalTable.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1174838323">
+<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1238173685" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.105211765" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1749305049" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+</tool>
+</fileInfo>
+<sourceEntries>
+<entry excluding="processLexicalTable.cpp|queryLexicalTable.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+</sourceEntries>
+</configuration>
+</storageModule>
+<storageModule moduleId="scannerConfiguration"/>
+<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+</cconfiguration>
+<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.740659024">
+<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.740659024" moduleId="org.eclipse.cdt.core.settings" name="Release">
+<externalSettings/>
+<extensions>
+<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+<extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+</extensions>
+</storageModule>
+<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+<configuration artifactExtension="" artifactName="processPhraseTable" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.exe.release.740659024" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+<folderInfo id="cdt.managedbuild.config.gnu.exe.release.740659024.1221432227" name="/" resourcePath="">
+<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.548693740" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1675623137" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+<builder buildPath="${workspace_loc:/misc/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.1102466478" keepEnvironmentInBuildfile="false" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+<option id="gnu.cpp.compiler.option.include.paths.1215088252" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+<listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../moses/src&quot;"/>
+</option>
+<option id="gnu.cpp.compiler.exe.release.option.optimization.level.1166195501" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+<option id="gnu.cpp.compiler.exe.release.option.debugging.level.384708418" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1468360366" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.871712778" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.153638233" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+<option id="gnu.c.compiler.exe.release.option.debugging.level.399452460" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.366352650" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.143684865" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.792459607" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+<option id="gnu.cpp.link.option.userobjs.1902362839" name="Other objects" superClass="gnu.cpp.link.option.userobjs" valueType="userObjs">
+<listOptionValue builtIn="false" value="&quot;../../moses/${ConfigName}/libmoses.a&quot;"/>
+</option>
+<option id="gnu.cpp.link.option.libs.679824006" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+<listOptionValue builtIn="false" value="z"/>
+<listOptionValue builtIn="false" value="randlm"/>
+<listOptionValue builtIn="false" value="lattice"/>
+<listOptionValue builtIn="false" value="misc"/>
+<listOptionValue builtIn="false" value="dstruct"/>
+<listOptionValue builtIn="false" value="oolm"/>
+<listOptionValue builtIn="false" value="irstlm"/>
+</option>
+<option id="gnu.cpp.link.option.paths.1956617803" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="stringList">
+<listOptionValue builtIn="false" value="&quot;../../srilm/lib/i686&quot;"/>
+<listOptionValue builtIn="false" value="&quot;../../randlm/lib&quot;"/>
+<listOptionValue builtIn="false" value="&quot;../../irstlm/lib/i686-redhat-linux-gnu&quot;"/>
+</option>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1701666898" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1631137698" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+</tool>
+<tool id="cdt.managedbuild.tool.gnu.archiver.base.1184465131" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+</toolChain>
+</folderInfo>
+<fileInfo id="cdt.managedbuild.config.gnu.exe.release.740659024./misc/GenerateTuples.cpp" name="GenerateTuples.cpp" rcbsApplicability="disable" resourcePath="GenerateTuples.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/GenerateTuples.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/GenerateTuples.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067">
+<option id="gnu.cpp.compiler.exe.release.option.optimization.level.952213694" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1956331211" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1501021312" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+</tool>
+</fileInfo>
+<fileInfo id="cdt.managedbuild.config.gnu.exe.release.740659024./misc/processLexicalTable.cpp" name="processLexicalTable.cpp" rcbsApplicability="disable" resourcePath="processLexicalTable.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/processLexicalTable.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/processLexicalTable.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067">
+<option id="gnu.cpp.compiler.exe.release.option.optimization.level.2140608126" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1705910784" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1360203886" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+</tool>
+</fileInfo>
+<fileInfo id="cdt.managedbuild.config.gnu.exe.release.740659024./misc/queryLexicalTable.cpp" name="queryLexicalTable.cpp" rcbsApplicability="disable" resourcePath="queryLexicalTable.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/queryLexicalTable.cpp">
+<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067./misc/queryLexicalTable.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1949270067">
+<option id="gnu.cpp.compiler.exe.release.option.optimization.level.655834898" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+<option id="gnu.cpp.compiler.exe.release.option.debugging.level.959500279" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.125175402" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+</tool>
+</fileInfo>
+<sourceEntries>
+<entry excluding="GenerateTuples.cpp|queryLexicalTable.cpp|processLexicalTable.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+</sourceEntries>
+</configuration>
+</storageModule>
+<storageModule moduleId="scannerConfiguration"/>
+<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+</cconfiguration>
+</storageModule>
+<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+<project id="misc.cdt.managedbuild.target.gnu.exe.720875320" name="Executable (Gnu)" projectType="cdt.managedbuild.target.gnu.exe"/>
+</storageModule>
+</cproject>
diff --git a/misc/.project b/misc/.project
new file mode 100644
index 000000000..0bbacfebc
--- /dev/null
+++ b/misc/.project
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>misc</name>
+ <comment></comment>
+ <projects>
+ <project>irstlm</project>
+ <project>moses</project>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+ <triggers>clean,full,incremental,</triggers>
+ <arguments>
+ <dictionary>
+ <key>?name?</key>
+ <value></value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.append_environment</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.autoBuildTarget</key>
+ <value>all</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.buildArguments</key>
+ <value></value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.buildCommand</key>
+ <value>make</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.buildLocation</key>
+ <value>${workspace_loc:/misc/Debug}</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
+ <value>clean</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.contents</key>
+ <value>org.eclipse.cdt.make.core.activeConfigSettings</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.enableAutoBuild</key>
+ <value>false</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.enableCleanBuild</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.enableFullBuild</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.fullBuildTarget</key>
+ <value>all</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.stopOnError</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
+ <value>true</value>
+ </dictionary>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.cdt.core.ccnature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+ <nature>org.eclipse.cdt.core.cnature</nature>
+ </natures>
+</projectDescription>
diff --git a/misc/GenerateTuples.cpp b/misc/GenerateTuples.cpp
new file mode 100644
index 000000000..81e40cc38
--- /dev/null
+++ b/misc/GenerateTuples.cpp
@@ -0,0 +1,297 @@
+
+////////////////////////////////////////////////////////////
+//
+// generate set of target candidates for confusion net
+//
+////////////////////////////////////////////////////////////
+
+
+
+#include <numeric>
+#include "Word.h"
+#include "Phrase.h"
+#include "ConfusionNet.h"
+#include "WordsRange.h"
+#include "PhraseDictionaryTree.h"
+
+using namespace Moses;
+
+#if 0
+// Generates all tuples from n indexes with ranges 0 to card[j]-1, respectively..
+// Input: number of indexes and ranges: ranges[0] ... ranges[num_idx-1]
+// Output: number of tuples and monodimensional array of tuples.
+// Reference: mixed-radix generation algorithm (D. E. Knuth, TAOCP v. 4.2)
+
+size_t GenerateTuples(unsigned num_idx,unsigned* ranges,unsigned *&tuples)
+{
+ unsigned* single_tuple= new unsigned[num_idx+1];
+ unsigned num_tuples=1;
+
+ for (unsigned k=0;k<num_idx;++k)
+ {
+ num_tuples *= ranges[k];
+ single_tuple[k]=0;
+ }
+
+ tuples=new unsigned[num_idx * num_tuples];
+
+ // we need this additional element for the last iteration
+ single_tuple[num_idx]=0;
+ unsigned j=0;
+ for (unsigned n=0;n<num_tuples;++n){
+ memcpy((void *)((tuples + n * num_idx)),(void *)single_tuple,num_idx * sizeof(unsigned));
+ j=0;
+ while (single_tuple[j]==ranges[j]-1){single_tuple[j]=0; ++j;}
+ ++single_tuple[j];
+ }
+ delete [] single_tuple;
+ return num_tuples;
+}
+
+
+typedef PhraseDictionaryTree::PrefixPtr PPtr;
+typedef std::vector<PPtr> vPPtr;
+typedef std::vector<std::vector<Factor const*> > mPhrase;
+
+std::ostream& operator<<(std::ostream& out,const mPhrase& p) {
+ for(size_t i=0;i<p.size();++i) {
+ out<<i<<" - ";
+ for(size_t j=0;j<p[i].size();++j)
+ out<<p[i][j]->ToString()<<" ";
+ out<<"|";
+ }
+
+ return out;
+}
+
+struct State {
+ vPPtr ptrs;
+ WordsRange range;
+ float score;
+
+ State() : range(0,0),score(0.0) {}
+ State(size_t b,size_t e,const vPPtr& v,float sc=0.0) : ptrs(v),range(b,e),score(sc) {}
+
+ size_t begin() const {return range.GetStartPos();}
+ size_t end() const {return range.GetEndPos();}
+ float GetScore() const {return score;}
+
+};
+
+std::ostream& operator<<(std::ostream& out,const State& s) {
+ out<<"["<<s.ptrs.size()<<" ("<<s.begin()<<","<<s.end()<<") "<<s.GetScore()<<"]";
+
+ return out;
+}
+
+typedef std::map<mPhrase,float> E2Costs;
+
+
+struct GCData {
+ const std::vector<PhraseDictionaryTree const*>& pdicts;
+ const std::vector<std::vector<float> >& weights;
+ std::vector<FactorType> inF,outF;
+ size_t distinctOutputFactors;
+ vPPtr root;
+ size_t totalTuples,distinctTuples;
+
+
+ GCData(const std::vector<PhraseDictionaryTree const*>& a,
+ const std::vector<std::vector<float> >& b)
+ : pdicts(a),weights(b),totalTuples(0),distinctTuples(0) {
+
+ assert(pdicts.size()==weights.size());
+ std::set<FactorType> distinctOutFset;
+ inF.resize(pdicts.size());
+ outF.resize(pdicts.size());
+ root.resize(pdicts.size());
+ for(size_t i=0;i<pdicts.size();++i)
+ {
+ root[i]=pdicts[i]->GetRoot();
+ inF[i]=pdicts[i]->GetInputFactorType();
+ outF[i]=pdicts[i]->GetOutputFactorType();
+ distinctOutFset.insert(pdicts[i]->GetOutputFactorType());
+ }
+ distinctOutputFactors=distinctOutFset.size();
+ }
+
+ FactorType OutFT(size_t i) const {return outF[i];}
+ FactorType InFT(size_t i) const {return inF[i];}
+ size_t DistinctOutFactors() const {return distinctOutputFactors;}
+
+ const vPPtr& GetRoot() const {return root;}
+
+};
+
+typedef std::vector<Factor const*> vFactor;
+typedef std::vector<std::pair<float,vFactor> > TgtCandList;
+
+typedef std::vector<TgtCandList> OutputFactor2TgtCandList;
+typedef std::vector<OutputFactor2TgtCandList*> Len2Cands;
+
+void GeneratePerFactorTgtList(size_t factorType,PPtr pptr,GCData& data,Len2Cands& len2cands)
+{
+ std::vector<FactorTgtCand> cands;
+ data.pdicts[factorType]->GetTargetCandidates(pptr,cands);
+
+ for(std::vector<FactorTgtCand>::const_iterator cand=cands.begin();cand!=cands.end();++cand) {
+ assert(data.weights[factorType].size()==cand->second.size());
+ float costs=std::inner_product(data.weights[factorType].begin(),
+ data.weights[factorType].end(),
+ cand->second.begin(),
+ 0.0);
+
+ size_t len=cand->first.size();
+ if(len>=len2cands.size()) len2cands.resize(len+1,0);
+ if(!len2cands[len]) len2cands[len]=new OutputFactor2TgtCandList(data.DistinctOutFactors());
+ OutputFactor2TgtCandList &outf2tcandlist=*len2cands[len];
+
+ outf2tcandlist[data.OutFT(factorType)].push_back(std::make_pair(costs,cand->first));
+ }
+}
+
+void GenerateTupleTgtCands(OutputFactor2TgtCandList& tCand,E2Costs& e2costs,GCData& data)
+{
+ // check if candidates are non-empty
+ bool gotCands=1;
+ for(size_t j=0;gotCands && j<tCand.size();++j)
+ gotCands &= !tCand[j].empty();
+
+ if(gotCands) {
+ // enumerate tuples
+ assert(data.DistinctOutFactors()==tCand.size());
+ std::vector<unsigned> radix(data.DistinctOutFactors());
+ for(size_t i=0;i<tCand.size();++i) radix[i]=tCand[i].size();
+
+ unsigned *tuples=0;
+ size_t numTuples=GenerateTuples(radix.size(),&radix[0],tuples);
+
+ data.totalTuples+=numTuples;
+
+ for(size_t i=0;i<numTuples;++i)
+ {
+ mPhrase e(radix.size());float costs=0.0;
+ for(size_t j=0;j<radix.size();++j)
+ {
+ assert(tuples[radix.size()*i+j]<tCand[j].size());
+ std::pair<float,vFactor> const& mycand=tCand[j][tuples[radix.size()*i+j]];
+ e[j]=mycand.second;
+ costs+=mycand.first;
+ }
+#ifdef DEBUG
+ bool mismatch=0;
+ for(size_t j=1;!mismatch && j<e.size();++j)
+ if(e[j].size()!=e[j-1].size()) mismatch=1;
+ assert(mismatch==0);
+#endif
+ std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(e,costs));
+ if(p.second) ++data.distinctTuples;
+ else {
+ // entry known, take min of costs, alternative: sum probs
+ if(costs<p.first->second) p.first->second=costs;
+ }
+ }
+ delete [] tuples;
+ }
+}
+
+void GenerateCandidates_(E2Costs& e2costs,const vPPtr& nextP,GCData& data)
+{
+ Len2Cands len2cands;
+ // generate candidates for each element of nextP:
+ for(size_t factorType=0;factorType<nextP.size();++factorType)
+ if(nextP[factorType])
+ GeneratePerFactorTgtList(factorType,nextP[factorType],data,len2cands);
+
+ // for each length: enumerate tuples, compute score, and insert in e2costs
+ for(size_t len=0;len<len2cands.size();++len) if(len2cands[len])
+ GenerateTupleTgtCands(*len2cands[len],e2costs,data);
+}
+
+void GenerateCandidates(const ConfusionNet& src,
+ const std::vector<PhraseDictionaryTree const*>& pdicts,
+ const std::vector<std::vector<float> >& weights,
+ int verbose) {
+ GCData data(pdicts,weights);
+
+ std::vector<State> stack;
+ for(size_t i=0;i<src.GetSize();++i) stack.push_back(State(i,i,data.GetRoot()));
+
+ std::map<WordsRange,E2Costs> cov2E;
+
+ // std::cerr<<"start while loop. initial stack size: "<<stack.size()<<"\n";
+
+ while(!stack.empty())
+ {
+ State curr(stack.back());
+ stack.pop_back();
+
+ //std::cerr<<"processing state "<<curr<<" stack size: "<<stack.size()<<"\n";
+
+ assert(curr.end()<src.GetSize());
+ const ConfusionNet::Column &currCol=src[curr.end()];
+ for(size_t colidx=0;colidx<currCol.size();++colidx)
+ {
+ const Word& w=currCol[colidx].first;
+ vPPtr nextP(curr.ptrs);
+ for(size_t j=0;j<nextP.size();++j)
+ nextP[j]=pdicts[j]->Extend(nextP[j],
+ w.GetFactor(data.InFT(j))->GetString());
+
+ bool valid=1;
+ for(size_t j=0;j<nextP.size();++j) if(!nextP[j]) {valid=0;break;}
+
+ if(valid)
+ {
+ if(curr.end()+1<src.GetSize())
+ stack.push_back(State(curr.begin(),curr.end()+1,nextP,
+ curr.GetScore()+currCol[colidx].second));
+
+ E2Costs &e2costs=cov2E[WordsRange(curr.begin(),curr.end()+1)];
+ GenerateCandidates_(e2costs,nextP,data);
+ }
+ }
+
+ // check if there are translations of one-word phrases ...
+ //if(curr.begin()==curr.end() && tCand.empty()) {}
+
+ } // end while(!stack.empty())
+
+ if(verbose) {
+ // print statistics for debugging purposes
+ std::cerr<<"tuple stats: total: "<<data.totalTuples
+ <<" distinct: "<<data.distinctTuples<<" ("
+ <<(data.distinctTuples/(0.01*data.totalTuples))
+ <<"%)\n";
+ std::cerr<<"per coverage set:\n";
+ for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
+ i!=cov2E.end();++i) {
+ std::cerr<<i->first<<" -- distinct cands: "
+ <<i->second.size()<<"\n";
+ }
+ std::cerr<<"\n\n";
+ }
+
+ if(verbose>10) {
+ std::cerr<<"full list:\n";
+ for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
+ i!=cov2E.end();++i) {
+ std::cerr<<i->first<<" -- distinct cands: "
+ <<i->second.size()<<"\n";
+ for(E2Costs::const_iterator j=i->second.begin();j!=i->second.end();++j)
+ std::cerr<<j->first<<" -- "<<j->second<<"\n";
+ }
+ }
+}
+
+#else
+
+void GenerateCandidates(const ConfusionNet&,
+ const std::vector<PhraseDictionaryTree const*>&,
+ const std::vector<std::vector<float> >&,
+ int)
+{
+ std::cerr<<"ERROR: GenerateCandidates is currently broken\n";
+}
+
+#endif
diff --git a/misc/GenerateTuples.h b/misc/GenerateTuples.h
new file mode 100644
index 000000000..e21df84c6
--- /dev/null
+++ b/misc/GenerateTuples.h
@@ -0,0 +1,12 @@
+// $Id: GenerateTuples.h 359 2006-07-28 18:14:20Z zens $
+#ifndef GENERATETUPLES_H_
+#define GENERATETUPLES_H_
+#include "PhraseDictionaryTree.h"
+
+class ConfusionNet;
+
+void GenerateCandidates(const ConfusionNet& src,
+ const std::vector<PhraseDictionaryTree const*>& pdicts,
+ const std::vector<std::vector<float> >& weights,
+ int verbose=0) ;
+#endif
diff --git a/misc/Makefile.am b/misc/Makefile.am
new file mode 100644
index 000000000..d08f0e8c9
--- /dev/null
+++ b/misc/Makefile.am
@@ -0,0 +1,13 @@
+bin_PROGRAMS = processPhraseTable processLexicalTable queryLexicalTable
+
+processPhraseTable_SOURCES = GenerateTuples.cpp processPhraseTable.cpp
+processLexicalTable_SOURCES = processLexicalTable.cpp
+queryLexicalTable_SOURCES = queryLexicalTable.cpp
+
+AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
+
+processPhraseTable_LDADD = $(top_builddir)/moses/src/libmoses.la $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
+
+processLexicalTable_LDADD = $(top_builddir)/moses/src/libmoses.la $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
+
+queryLexicalTable_LDADD = $(top_builddir)/moses/src/libmoses.la $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
diff --git a/misc/java-utils/.classpath b/misc/java-utils/.classpath
new file mode 100644
index 000000000..6d488d7cb
--- /dev/null
+++ b/misc/java-utils/.classpath
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" path=""/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+ <classpathentry kind="output" path=""/>
+</classpath>
diff --git a/misc/java-utils/.cvsignore b/misc/java-utils/.cvsignore
new file mode 100644
index 000000000..2d7e4dc50
--- /dev/null
+++ b/misc/java-utils/.cvsignore
@@ -0,0 +1 @@
+*.class
diff --git a/misc/java-utils/.project b/misc/java-utils/.project
new file mode 100644
index 000000000..15d85ee1a
--- /dev/null
+++ b/misc/java-utils/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>java-utils</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
diff --git a/misc/java-utils/CombineTags.java b/misc/java-utils/CombineTags.java
new file mode 100644
index 000000000..0c0d91c65
--- /dev/null
+++ b/misc/java-utils/CombineTags.java
@@ -0,0 +1,89 @@
+// $Id: CombineTags.java 677 2006-08-12 01:31:16Z hieuhoang1972 $
+
+import java.io.*;
+import java.util.*;
+
+// create sentences with all features combined from files with individual tags
+class CombineTags
+{
+ public static void main(String[] args) throws Exception
+ {
+ System.err.println("Starting...");
+
+ Vector vecInstream = new Vector();
+ for (int i = 0 ; i < args.length ; i++)
+ {
+ InputStreamReader temp = new InputStreamReader(new FileInputStream(args[i]), "Latin1");
+ BufferedReader inStream = new BufferedReader(temp);
+ vecInstream.add(inStream);
+ }
+ OutputStreamWriter outStream = new OutputStreamWriter((OutputStream)System.out, "Latin1");
+
+ new CombineTags(vecInstream, outStream);
+
+ System.err.println("End...");
+ }
+
+ public CombineTags(Vector vecInstream , OutputStreamWriter outStream) throws Exception
+ {
+ BufferedReader inFile = (BufferedReader) vecInstream.get(0);
+ String inLine;
+ while ((inLine = inFile.readLine()) != null)
+ {
+ Vector phrases = new Vector();
+
+ // do 1st stream
+ Vector phrase = new Vector();
+ StringTokenizer st = new StringTokenizer(inLine);
+ while (st.hasMoreTokens())
+ {
+ String tag = st.nextToken();
+ phrase.add(tag);
+ }
+ phrases.add(phrase);
+
+ // read other stream
+ for (int i = 1 ; i < vecInstream.size() ; i++)
+ {
+ BufferedReader otherFile = (BufferedReader) vecInstream.get(i);
+ String otherLine = otherFile.readLine();
+ StringTokenizer otherSt = new StringTokenizer(otherLine);
+ Vector otherPhrase = new Vector();
+
+ while (otherSt.hasMoreTokens())
+ {
+ String tag = otherSt.nextToken();
+ otherPhrase.add(tag);
+ }
+ phrases.add(otherPhrase);
+ }
+
+ // combine
+ phrase = (Vector) phrases.get(0);
+
+ for (int pos = 0 ; pos < phrase.size() ; pos++)
+ {
+ String outLine = (String) phrase.get(pos) + "|";
+
+ for (int stream = 1 ; stream < phrases.size() ; stream++)
+ {
+ Vector otherPhrase = (Vector) phrases.get(stream);
+ String otherTag;
+ if (otherPhrase.size() <= pos)
+ otherTag = (String) otherPhrase.get(0);
+ else
+ otherTag = (String) otherPhrase.get(pos);
+ outLine += otherTag + "|";
+ }
+ outLine = outLine.substring(0, outLine.length() - 1) + " ";
+ outStream.write(outLine);
+ }
+ outStream.write("\n");
+ }
+ // close stream
+ outStream.flush();
+ outStream.close();
+ outStream = null;
+ }
+}
+
diff --git a/misc/java-utils/ProcessShallowParse.java b/misc/java-utils/ProcessShallowParse.java
new file mode 100644
index 000000000..208faccad
--- /dev/null
+++ b/misc/java-utils/ProcessShallowParse.java
@@ -0,0 +1,82 @@
+// $Id: ProcessShallowParse.java 678 2006-08-12 03:32:10Z hieuhoang1972 $
+
+
+import java.io.*;
+import java.util.*;
+
+//input is the sentences with all features combined
+//output sentences combination of morphology, lopar tags and parsed tags
+// used to create generation table
+public class ProcessShallowParse
+{
+ public static void main(String[] args) throws Exception
+ {
+ System.err.println("Starting...");
+
+ InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
+ , "Latin1");
+ OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
+ , "Latin1");
+
+ new ProcessShallowParse2(inStream, outStream);
+
+ System.err.println("End...");
+ }
+}
+
+class ProcessShallowParse2
+{ // factored sentence
+
+ public ProcessShallowParse2(Reader inStream, Writer outStream) throws Exception
+ {
+ BufferedReader inFile = new BufferedReader(inStream);
+ BufferedWriter outFile = new BufferedWriter(outStream);
+
+ // tokenise
+ String inLine;
+ int i = 1;
+ while ((inLine = inFile.readLine()) != null)
+ {
+ StringTokenizer st = new StringTokenizer(inLine);
+ String ret = "";
+ while (st.hasMoreTokens())
+ {
+ String factoredWord = st.nextToken();
+ ret += Output(factoredWord);
+ }
+ outFile.write(ret + "\n");
+ i++;
+ }
+ outFile.flush();
+ outFile.close();
+ outFile = null;
+ System.err.print("no of lines = " + i);
+ }
+
+ protected String Output(String factoredWord) throws Exception
+ {
+ StringTokenizer st = new StringTokenizer(factoredWord, "|");
+
+ String surface = st.nextToken();
+ String posNormal = st.nextToken();
+ String morph = st.nextToken();
+ String posImproved = st.nextToken();
+ String ret = "";
+
+ if (posImproved.equals("ART-SB")
+ || posImproved.equals("NN-NK_NP-SB"))
+ {
+ ret = posImproved + "_" + morph + " ";
+ }
+ else if (posImproved.equals("???"))
+ {
+ ret = "??? ";
+ }
+ else
+ {
+ ret = surface + " ";
+ }
+
+ return ret;
+ }
+}
diff --git a/misc/java-utils/ShrinkSentence.java b/misc/java-utils/ShrinkSentence.java
new file mode 100644
index 000000000..12490301c
--- /dev/null
+++ b/misc/java-utils/ShrinkSentence.java
@@ -0,0 +1,48 @@
+// $Id: ShrinkSentence.java 678 2006-08-12 03:32:10Z hieuhoang1972 $
+
+import java.io.*;
+import java.util.*;
+
+//used to create language model
+public class ShrinkSentence
+{
+ public static void main(String[] args) throws Exception
+ {
+ System.err.println("Starting...");
+
+ InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
+ , "Latin1");
+ OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
+ , "Latin1");
+
+ new ShrinkSentence(inStream, outStream);
+
+ System.err.println("End...");
+ }
+
+ public ShrinkSentence(Reader inStream, Writer outStream) throws Exception
+ {
+ BufferedReader inFile = new BufferedReader(inStream);
+ BufferedWriter outFile = new BufferedWriter(outStream);
+
+ // tokenise
+ String inLine;
+ int i = 1;
+ while ((inLine = inFile.readLine()) != null)
+ {
+ StringTokenizer st = new StringTokenizer(inLine);
+ while (st.hasMoreTokens())
+ {
+ String word = st.nextToken();
+ if (!word.equals("???"))
+ outFile.write(word + " ");
+ }
+ outFile.write("\n");
+ i++;
+ }
+ outFile.flush();
+ outFile.close();
+ outFile = null;
+ System.err.print("no of lines = " + i);
+ }
+} \ No newline at end of file
diff --git a/misc/java-utils/TagHierarchy.java b/misc/java-utils/TagHierarchy.java
new file mode 100644
index 000000000..67c72bd78
--- /dev/null
+++ b/misc/java-utils/TagHierarchy.java
@@ -0,0 +1,135 @@
+// $Id: TagHierarchy.java 678 2006-08-12 03:32:10Z hieuhoang1972 $
+
+import java.io.*;
+import java.util.*;
+
+// create pos-tag sentences from LISP-like input tree.
+// NN-NK tag augmented with NP-SP if parent is NP-SB
+class TagHierarchy
+{
+ public static void main(String[] args) throws Exception
+ {
+ System.err.println("Starting...");
+
+ InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
+ , "Latin1");
+ OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
+ , "Latin1");
+
+ new TagHierarchy(inStream, outStream);
+
+ System.err.println("End...");
+ }
+
+ public TagHierarchy(Reader inStream, OutputStreamWriter outStream) throws Exception
+ {
+ BufferedReader inFile = new BufferedReader(inStream);
+ BufferedWriter outFile = new BufferedWriter(outStream);
+
+ // tokenise
+ String inLine;
+ int nullLines = 0;
+ while ((inLine = inFile.readLine()) != null)
+ {
+ if (inLine.equals("null"))
+ {
+ nullLines++;
+ outFile.write("null\n");
+ }
+ else
+ {
+ OutputHierarchy2(inLine, outFile);
+ }
+ }
+ outFile.flush();
+ outFile.close();
+ outFile = null;
+ System.err.println(nullLines + " null lines\n");
+ }
+
+ // indent parsed tree to make it easier to look at
+ public void OutputHierarchy(String inLine, BufferedWriter outFile) throws Exception
+ {
+ int level = 0;
+ StringTokenizer st = new StringTokenizer(inLine);
+ while (st.hasMoreTokens())
+ {
+ String parsed = st.nextToken();
+ if (parsed.substring(0, 1).compareTo("(") == 0)
+ { // start of new node
+ outFile.write('\n');
+ for (int currLevel = 0 ; currLevel < level ; currLevel++)
+ {
+ outFile.write(' ');
+ }
+ String tag = parsed.substring(1, parsed.length());
+ outFile.write(tag);
+ level++;
+ }
+ else
+ { // closing nodes
+ int firstBracket = parsed.indexOf(')');
+ int noBracket = parsed.length() - firstBracket;
+ String tag = parsed.substring(0, firstBracket);
+ outFile.write(" == " + tag);
+ level -= noBracket;
+ }
+ }
+ outFile.write('\n');
+ }
+
+ public void OutputHierarchy2(String inLine, BufferedWriter outFile) throws Exception
+ {
+ int level = 0;
+ Stack prevTags = new Stack();
+
+ StringTokenizer st = new StringTokenizer(inLine);
+
+ while (st.hasMoreTokens())
+ {
+ String parsed = st.nextToken();
+ if (parsed.substring(0, 1).compareTo("(") == 0)
+ { // start of new node
+ String tag = parsed.substring(1, parsed.length());
+ prevTags.push(tag);
+ level++;
+ }
+ else
+ { // closing nodes
+
+ String parentTag = (String) prevTags.get(prevTags.size() - 2)
+ , currTag = (String) prevTags.get(prevTags.size() - 1);
+ if (currTag.equals("NN-NK") && parentTag.equals("NP-SB"))
+ currTag += "_" + parentTag;
+
+ int firstBracket = parsed.indexOf(')');
+ int noBracket = parsed.length() - firstBracket;
+ String word = parsed.substring(0, firstBracket);
+
+ if (currTag.equals("ART-SB")
+ || currTag.equals("NN-NK_NP-SB")
+ || currTag.equals("VAFIN-HD")
+ || currTag.equals("VVFIN-HD")
+ || currTag.equals("VMFIN-HD")
+ || currTag.equals("PPER-SB")
+ || currTag.equals("PRELS-SB")
+ || currTag.equals("PDS-SB")
+ || currTag.equals("PPER-PH")
+ || currTag.equals("PPER-EP")
+ )
+ outFile.write(currTag + " ");
+ else
+ outFile.write("??? ");
+
+ level -= noBracket;
+
+ // pop the rest
+ for (int i = 0 ; i < noBracket ; ++i)
+ {
+ prevTags.pop();
+ }
+ }
+ }
+ outFile.write('\n');
+ }
+}
diff --git a/misc/processLexicalTable.cpp b/misc/processLexicalTable.cpp
new file mode 100644
index 000000000..cbd4bf8d9
--- /dev/null
+++ b/misc/processLexicalTable.cpp
@@ -0,0 +1,52 @@
+#include <iostream>
+#include <string>
+
+#include "Timer.h"
+#include "InputFileStream.h"
+#include "LexicalReorderingTable.h"
+
+using namespace Moses;
+
+Timer timer;
+
+void printHelp(){
+ std::cerr << "Usage:\n"
+ "options: \n"
+ "\t-in string -- input table file name\n"
+ "\t-out string -- prefix of binary table files\n"
+ "If -in is not specified reads from stdin\n"
+ "\n";
+}
+
+int main(int argc, char** argv){
+ std::cerr << "processLexicalTable v0.1 by Konrad Rawlik\n";
+ std::string inFilePath;
+ std::string outFilePath("out");
+ if(1 >= argc){
+ printHelp();
+ return 1;
+ }
+ for(int i = 1; i < argc; ++i){
+ std::string arg(argv[i]);
+ if("-in" == arg && i+1 < argc){
+ ++i;
+ inFilePath = argv[i];
+ } else if("-out" == arg && i+1 < argc){
+ ++i;
+ outFilePath = argv[i];
+ } else {
+ //somethings wrong... print help
+ printHelp();
+ return 1;
+ }
+ }
+
+ if(inFilePath.empty()){
+ std::cerr << "processing stdin to " << outFilePath << ".*\n";
+ return LexicalReorderingTableTree::Create(std::cin, outFilePath);
+ } else {
+ std::cerr << "processing " << inFilePath<< " to " << outFilePath << ".*\n";
+ InputFileStream file(inFilePath);
+ return LexicalReorderingTableTree::Create(file, outFilePath);
+ }
+}
diff --git a/misc/processLexicalTable.vcproj b/misc/processLexicalTable.vcproj
new file mode 100644
index 000000000..8fe3dd9e4
--- /dev/null
+++ b/misc/processLexicalTable.vcproj
@@ -0,0 +1,354 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="9.00"
+ Name="processLexicalTable"
+ ProjectGUID="{9834EABB-2033-4607-9DAC-36D16E0725B5}"
+ RootNamespace="processLexicalTable"
+ Keyword="Win32Proj"
+ TargetFrameworkVersion="131072"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ </Platforms>
+ <ToolFiles>
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)\moses\src&quot;"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="3"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="4"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalDependencies="&quot;$(SolutionDir)$(ConfigurationName)\moses.lib&quot; zdll.lib"
+ LinkIncremental="2"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)\moses\src&quot;"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE"
+ RuntimeLibrary="2"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="3"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalDependencies="&quot;$(SolutionDir)$(ConfigurationName)\moses.lib&quot; zdll.lib"
+ LinkIncremental="1"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ OptimizeReferences="2"
+ EnableCOMDATFolding="2"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release-withSRILM|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)\moses\src&quot;"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE"
+ RuntimeLibrary="2"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="3"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalOptions="/FORCE:MULTIPLE"
+ AdditionalDependencies="&quot;$(SolutionDir)$(ConfigurationName)\moses.lib&quot; zdll.lib"
+ LinkIncremental="1"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ OptimizeReferences="2"
+ EnableCOMDATFolding="2"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug-withSRILM|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)\moses\src&quot;"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="3"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="4"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalOptions="/FORCE:MULTIPLE"
+ AdditionalDependencies="&quot;$(SolutionDir)$(ConfigurationName)\moses.lib&quot; zdll.lib"
+ LinkIncremental="2"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Source Files"
+ Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+ UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+ >
+ <File
+ RelativePath=".\processLexicalTable.cpp"
+ >
+ </File>
+ </Filter>
+ <Filter
+ Name="Header Files"
+ Filter="h;hpp;hxx;hm;inl;inc;xsd"
+ UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+ >
+ </Filter>
+ <Filter
+ Name="Resource Files"
+ Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+ UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+ >
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
diff --git a/misc/processPhraseTable.cpp b/misc/processPhraseTable.cpp
new file mode 100644
index 000000000..c7fc5e36b
--- /dev/null
+++ b/misc/processPhraseTable.cpp
@@ -0,0 +1,200 @@
+#include <iostream>
+//#include <fstream>
+#include <sstream>
+#include <vector>
+#include <string>
+#include <iterator>
+#include <functional>
+#include <sys/stat.h>
+#include "TypeDef.h"
+#include "PhraseDictionaryTree.h"
+#include "ConfusionNet.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "InputFileStream.h"
+#include "Timer.h"
+
+using namespace std;
+using namespace Moses;
+
+Timer timer;
+
+template<typename T>
+std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
+{
+ out<<x.size()<<" ";
+ typename std::vector<T>::const_iterator iend=x.end();
+ for(typename std::vector<T>::const_iterator i=x.begin();i!=iend;++i)
+ out<<*i<<' ';
+ return out;
+}
+
+inline bool existsFile(const char* filename) {
+ struct stat mystat;
+ return (stat(filename,&mystat)==0);
+}
+inline bool existsFile(const std::string& filename) {
+ return existsFile(filename.c_str());
+}
+
+int main(int argc,char **argv) {
+ std::string fto;size_t noScoreComponent=5;int cn=0;
+ bool aligninfo=false;
+ std::vector<std::pair<std::string,std::pair<char*,char*> > > ftts;
+ int verb=0;
+ for(int i=1;i<argc;++i) {
+ std::string s(argv[i]);
+ if(s=="-ttable") {
+ std::pair<char*,char*> p;
+ p.first=argv[++i];
+ p.second=argv[++i];
+ ftts.push_back(std::make_pair(std::string(argv[++i]),p));
+ }
+ else if(s=="-nscores") noScoreComponent=atoi(argv[++i]);
+ else if(s=="-out") fto=std::string(argv[++i]);
+ else if(s=="-cn") cn=1;
+ else if(s=="-irst") cn=2;
+ else if(s=="-alignment-info") aligninfo=true;
+ else if(s=="-v") verb=atoi(argv[++i]);
+ else if(s=="-h")
+ {
+ std::cerr<<"usage "<<argv[0]<<" :\n\n"
+ "options:\n"
+ "\t-ttable int int string -- translation table file, use '-' for stdin\n"
+ "\t-out string -- output file name prefix for binary ttable\n"
+ "\t-nscores int -- number of scores in ttable\n"
+ "\t-alignment-info -- include alignment info in the binary ttable (suffix \".wa\")\n"
+ "\nfunctions:\n"
+ "\t - convert ascii ttable in binary format\n"
+ "\t - if ttable is not read from stdin:\n"
+ "\t treat each line as source phrase an print tgt candidates\n"
+ "\n";
+ return 1;
+ }
+ else
+ {
+ std::cerr<<"ERROR: unknown option '"<<s<<"'\n";
+ return 1;
+ }
+ }
+
+
+ if(ftts.size()) {
+
+ if(ftts.size()==1){
+ std::cerr<<"processing ptree for ";
+ PhraseDictionaryTree pdt(noScoreComponent);
+
+ pdt.PrintWordAlignment(aligninfo);
+
+ if (ftts[0].first=="-") {
+ std::cerr<< "stdin\n";
+ pdt.Create(std::cin,fto);
+ }
+ else{
+ std::cerr<< ftts[0].first << "\n";
+ InputFileStream in(ftts[0].first);
+ pdt.Create(in,fto);
+ }
+ }
+ else
+ {
+#if 0
+ std::vector<PhraseDictionaryTree const*> pdicts;
+ std::vector<FactorType> factorOrder;
+ for(size_t i=0;i<ftts.size();++i) {
+
+ PhraseDictionaryTree *pdtptr=new PhraseDictionaryTree(noScoreComponent,
+ &factorCollection,
+ getFactorType(atoi(ftts[i].second.first)),
+ getFactorType(atoi(ftts[i].second.second))
+ );
+ factorOrder.push_back(pdtptr->GetInputFactorType());
+ PhraseDictionaryTree &pdt=*pdtptr;
+ pdicts.push_back(pdtptr);
+
+ std::string facStr="."+std::string(ftts[i].second.first)+"-"+std::string(ftts[i].second.second);
+ std::string prefix=ftts[i].first+facStr;
+ if(!existsFile(prefix+".binphr.idx")) {
+ std::cerr<<"bin ttable does not exist -> create it\n";
+ InputFileStream in(prefix);
+ pdt.Create(in,prefix);
+ }
+ std::cerr<<"reading bin ttable\n";
+ pdt.Read(prefix);
+
+ }
+
+ std::cerr<<"processing stdin\n";
+ if(!cn) {
+ std::string line;
+ while(getline(std::cin,line)) {
+ std::istringstream is(line);
+#if 0
+ std::vector<std::string> f;
+ std::copy(std::istream_iterator<std::string>(is),
+ std::istream_iterator<std::string>(),
+ std::back_inserter(f));
+#endif
+ std::cerr<<"got source phrase '"<<line<<"'\n";
+
+ Phrase F(Input);
+ F.CreateFromString(factorOrder,line,factorCollection);
+
+ for(size_t k=0;k<pdicts.size();++k) {
+ PhraseDictionaryTree const& pdt=*pdicts[k];
+
+ std::vector<std::string> f(F.GetSize());
+ for(size_t i=0;i<F.GetSize();++i)
+ f[i]=F.GetFactor(i,pdt.GetInputFactorType())->ToString();
+
+ std::stringstream iostA,iostB;
+ std::cerr<<"full phrase processing "<<f<<"\n";
+ pdt.PrintTargetCandidates(f,iostA);
+
+ std::cerr<<"processing with prefix ptr\n";
+ PhraseDictionaryTree::PrefixPtr p(pdt.GetRoot());
+
+ for(size_t i=0;i<f.size() && p;++i) {
+ std::cerr<<"pre "<<i<<" "<<(p?"1":"0")<<"\n";
+ p=pdt.Extend(p,f[i]);
+ std::cerr<<"post "<<i<<" "<<(p?"1":"0")<<"\n";
+ }
+ if(p) {
+ std::cerr<<"retrieving candidates from prefix ptr\n";
+ pdt.PrintTargetCandidates(p,iostB);}
+ else {
+ std::cerr<<"final ptr is invalid\n";
+ iostB<<"there are 0 target candidates\n";
+ }
+ if(iostA.str() != iostB.str())
+ std::cerr<<"ERROR: translation candidates mismatch '"<<iostA.str()<<"' and for prefix pointer: '"<<iostB.str()<<"'\n";
+
+ std::cerr<<"translation candidates:\n"<<iostA.str()<<"\n";
+ pdt.FreeMemory();
+
+ }
+
+ }
+ }
+ else {
+ // process confusion net input
+ ConfusionNet net(&factorCollection);
+ std::vector<std::vector<float> > weights;
+ for(size_t i=0;i<pdicts.size();++i)
+ weights.push_back(std::vector<float>(noScoreComponent,1/(1.0*noScoreComponent)));
+
+ while(net.ReadF(std::cin,factorOrder,cn-1)) {
+ net.Print(std::cerr);
+ GenerateCandidates(net,pdicts,weights,verb);
+ }
+
+ }
+#else
+ std::cerr<<"ERROR: these functions are currently broken...\n";
+ exit(1);
+#endif
+ }
+ }
+
+}
diff --git a/misc/processPhraseTable.vcproj b/misc/processPhraseTable.vcproj
new file mode 100644
index 000000000..88ea23ba2
--- /dev/null
+++ b/misc/processPhraseTable.vcproj
@@ -0,0 +1,354 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="9.00"
+ Name="processPhraseTable"
+ ProjectGUID="{AA230564-6DF1-4662-9BF9-7AD73DE53B76}"
+ RootNamespace="processPhraseTable"
+ Keyword="Win32Proj"
+ TargetFrameworkVersion="131072"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ </Platforms>
+ <ToolFiles>
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)\moses\src&quot;"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="3"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="4"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalDependencies="&quot;$(SolutionDir)$(ConfigurationName)\moses.lib&quot; zdll.lib"
+ LinkIncremental="2"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)\moses\src&quot;"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+ RuntimeLibrary="2"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="3"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalDependencies="&quot;$(SolutionDir)$(ConfigurationName)\moses.lib&quot; zdll.lib"
+ LinkIncremental="1"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ OptimizeReferences="2"
+ EnableCOMDATFolding="2"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release-withSRILM|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)\moses\src&quot;"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+ RuntimeLibrary="2"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="3"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalOptions="/FORCE:MULTIPLE"
+ AdditionalDependencies="&quot;$(SolutionDir)$(ConfigurationName)\moses.lib&quot; zdll.lib"
+ LinkIncremental="1"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ OptimizeReferences="2"
+ EnableCOMDATFolding="2"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug-withSRILM|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)\moses\src&quot;"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="3"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="4"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalOptions="/FORCE:MULTIPLE"
+ AdditionalDependencies="&quot;$(SolutionDir)$(ConfigurationName)\moses.lib&quot; zdll.lib"
+ LinkIncremental="2"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Source Files"
+ Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+ UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+ >
+ <File
+ RelativePath=".\processPhraseTable.cpp"
+ >
+ </File>
+ </Filter>
+ <Filter
+ Name="Header Files"
+ Filter="h;hpp;hxx;hm;inl;inc;xsd"
+ UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+ >
+ </Filter>
+ <Filter
+ Name="Resource Files"
+ Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+ UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+ >
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
diff --git a/misc/queryLexicalTable.cpp b/misc/queryLexicalTable.cpp
new file mode 100644
index 000000000..0ebf66910
--- /dev/null
+++ b/misc/queryLexicalTable.cpp
@@ -0,0 +1,110 @@
+#include <iostream>
+#include <string>
+
+#include "Phrase.h"
+#include "FactorCollection.h"
+#include "Timer.h"
+#include "InputFileStream.h"
+#include "LexicalReorderingTable.h"
+
+using namespace Moses;
+
+Timer timer;
+
+void printHelp(){
+ std::cerr << "Usage:\n"
+ "options: \n"
+ "\t-table file -- input table file name\n"
+ "\t-f string -- f query phrase\n"
+ "\t-e string -- e query phrase\n"
+ "\t-c string -- context query phrase\n"
+ "\n";
+}
+
+std::ostream& operator<<(std::ostream& o, Score s){
+ for(int i = 0; i < s.size(); ++i){
+ o << s[i] << " ";
+ }
+ //o << std::endln;
+ return o;
+};
+
+int main(int argc, char** argv){
+ std::cerr << "queryLexicalTable v0.2 by Konrad Rawlik\n";
+ std::string inFilePath;
+ std::string outFilePath("out");
+ bool cache = false;
+ std::string query_e, query_f, query_c;
+ bool use_context = false;
+ bool use_e = false;
+ if(1 >= argc){
+ printHelp();
+ return 1;
+ }
+ for(int i = 1; i < argc; ++i){
+ std::string arg(argv[i]);
+ if("-table" == arg && i+1 < argc){
+ //std::cerr << "Table is " << argv[i];
+ ++i;
+ inFilePath = argv[i];
+ } else if("-f" == arg && i+1 < argc){
+ ++i;
+ //std::cerr << "F is " << argv[i];
+ query_f = argv[i];
+ } else if("-e" == arg && i+1 < argc){
+ ++i;
+ query_e = argv[i];
+ use_e = true;
+ } else if("-c" == arg){
+ if(i+1 < argc && '-' != argv[i+1][0]){
+ ++i;
+ query_c = argv[i];
+ use_context = true;
+ } else {
+ use_context = false;
+ }
+ } else if("-cache" == arg){
+ ++i;
+ cache = true;
+ } else {
+ //somethings wrong... print help
+ printHelp();
+ return 1;
+ }
+ }
+
+ FactorList f_mask;
+ FactorList e_mask;
+ FactorList c_mask;
+ f_mask.push_back(0);
+ if(use_e){
+ e_mask.push_back(0);
+ }
+ if(use_context){
+ c_mask.push_back(0);
+ }
+ Phrase e(Output),f(Input),c(Output);
+ e.CreateFromString(e_mask, query_e, "|");
+ f.CreateFromString(f_mask, query_f, "|");
+ c.CreateFromString(c_mask, query_c,"|");
+ LexicalReorderingTable* table;
+ if(FileExists(inFilePath+".binlexr.idx")){
+ std::cerr << "Loading binary table...\n";
+ table = new LexicalReorderingTableTree(inFilePath, f_mask, e_mask, c_mask);
+ } else {
+ std::cerr << "Loading ordinary table...\n";
+ table = new LexicalReorderingTableMemory(inFilePath, f_mask, e_mask, c_mask);
+ }
+ //table->DbgDump(&std::cerr);
+ if(cache){
+ std::cerr << "Caching for f\n";
+ table->InitializeForInputPhrase(f);
+ }
+ std::cerr << "Querying: f='" << f.GetStringRep(f_mask) << "' e='" << e.GetStringRep(e_mask) << "' c='" << c.GetStringRep(c_mask) << "'\n";
+ std::cerr << table->GetScore(f,e,c) << "\n";
+ //table->DbgDump(&std::cerr);
+ delete table;
+ return 0;
+}
+
+