diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2013-07-03 23:24:20 +0400 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2013-07-03 23:24:20 +0400 |
commit | 0e46cd377c8e9f1a15c67fe7a38184827abc4e43 (patch) | |
tree | af3e8488379ff41c1eabf076f62b1259c4c3bc56 | |
parent | fbdb07a94cb3e07c2d677bca9d9cdccfb438d77e (diff) | |
parent | b10159a29f9ab020b3e606ce04247dd7265a8590 (diff) |
Merge branch 'master' into nadir_osm
m--------- | contrib/arrow-pipelines/python/pcl | 0 | ||||
-rw-r--r-- | contrib/other-builds/extract-ghkm/.cproject | 130 | ||||
-rw-r--r-- | contrib/other-builds/extract-ghkm/.project | 209 | ||||
-rw-r--r-- | contrib/other-builds/mert_lib/.project | 10 | ||||
-rw-r--r-- | contrib/other-builds/moses/.project | 10 | ||||
-rw-r--r-- | lm/model.hh | 2 | ||||
-rw-r--r-- | mert/BleuDocScorer.cpp | 207 | ||||
-rw-r--r-- | mert/BleuDocScorer.h | 67 | ||||
-rw-r--r-- | mert/BleuScorer.h | 8 | ||||
-rw-r--r-- | mert/Jamfile | 1 | ||||
-rw-r--r-- | mert/ScorerFactory.cpp | 4 | ||||
-rw-r--r-- | moses/TranslationModel/PhraseDictionaryTreeAdaptor.h | 10 | ||||
-rw-r--r-- | moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h | 6 | ||||
-rw-r--r-- | moses/TranslationOptionCollection.cpp | 1 | ||||
-rwxr-xr-x | scripts/generic/ph_numbers.perl | 51 |
15 files changed, 702 insertions, 14 deletions
diff --git a/contrib/arrow-pipelines/python/pcl b/contrib/arrow-pipelines/python/pcl -Subproject 6d5d13e1e06a871fbf7adf86dffda5113e315c1 +Subproject 1315185203a90b6f80acf2e47b4ea85b420b0d4 diff --git a/contrib/other-builds/extract-ghkm/.cproject b/contrib/other-builds/extract-ghkm/.cproject new file mode 100644 index 000000000..8b549ee0c --- /dev/null +++ b/contrib/other-builds/extract-ghkm/.cproject @@ -0,0 +1,130 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage"> + <storageModule moduleId="org.eclipse.cdt.core.settings"> + <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002"> + <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" moduleId="org.eclipse.cdt.core.settings" name="Debug"> + <externalSettings/> + <extensions> + <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/> + <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + </extensions> + </storageModule> + <storageModule moduleId="cdtBuildSystem" version="4.0.0"> + <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug"> + <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002." name="/" resourcePath=""> + <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1035891586" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug"> + <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.242178856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/> + <builder buildPath="${workspace_loc:/extract-ghkm/Debug}" id="cdt.managedbuild.builder.gnu.cross.430400318" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/> + <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.251687262" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler"> + <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.962699619" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/> + <option id="gnu.c.compiler.option.debugging.level.230503798" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/> + <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.433137197" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler"> + <option id="gnu.cpp.compiler.option.optimization.level.971749711" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/> + <option id="gnu.cpp.compiler.option.debugging.level.984190691" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/> + <option id="gnu.cpp.compiler.option.include.paths.1374841264" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath"> + <listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/> + <listOptionValue builtIn="false" value=""${workspace_loc}/../../phrase-extract""/> + </option> + <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1026620601" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/> + <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1419857560" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker"> + <option id="gnu.cpp.link.option.paths.668926503" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths"> + <listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/lib64""/> + </option> + <option id="gnu.cpp.link.option.libs.2091468346" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs"> + <listOptionValue builtIn="false" value="boost_program_options-mt"/> + <listOptionValue builtIn="false" value="boost_thread-mt"/> + <listOptionValue builtIn="false" value="boost_filesystem-mt"/> + <listOptionValue builtIn="false" value="boost_iostreams-mt"/> + <listOptionValue builtIn="false" value="z"/> + <listOptionValue builtIn="false" value="bz2"/> + </option> + <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1684298294" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input"> + <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/> + <additionalInput kind="additionalinput" paths="$(LIBS)"/> + </inputType> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.archiver.320160974" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/> + <tool id="cdt.managedbuild.tool.gnu.cross.assembler.2021657841" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler"> + <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1689419664" superClass="cdt.managedbuild.tool.gnu.assembler.input"/> + </tool> + </toolChain> + </folderInfo> + </configuration> + </storageModule> + <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/> + </cconfiguration> + <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494"> + <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" moduleId="org.eclipse.cdt.core.settings" name="Release"> + <externalSettings/> + <extensions> + <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/> + <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + </extensions> + </storageModule> + <storageModule moduleId="cdtBuildSystem" version="4.0.0"> + <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release"> + <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494." name="/" resourcePath=""> + <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.2000920404" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release"> + <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1106451881" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/> + <builder buildPath="${workspace_loc:/extract-ghkm/Release}" id="cdt.managedbuild.builder.gnu.cross.727887705" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/> + <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.819016498" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler"> + <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1057468997" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/> + <option id="gnu.c.compiler.option.debugging.level.1130475273" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/> + <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.164617278" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler"> + <option id="gnu.cpp.compiler.option.optimization.level.406333630" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/> + <option id="gnu.cpp.compiler.option.debugging.level.1059243022" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/> + <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1068655225" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/> + <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1213865062" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker"> + <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.764325642" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input"> + <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/> + <additionalInput kind="additionalinput" paths="$(LIBS)"/> + </inputType> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1299258961" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/> + <tool id="cdt.managedbuild.tool.gnu.cross.assembler.896866692" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler"> + <inputType id="cdt.managedbuild.tool.gnu.assembler.input.276294580" superClass="cdt.managedbuild.tool.gnu.assembler.input"/> + </tool> + </toolChain> + </folderInfo> + </configuration> + </storageModule> + <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/> + </cconfiguration> + </storageModule> + <storageModule moduleId="cdtBuildSystem" version="4.0.0"> + <project id="extract-ghkm.cdt.managedbuild.target.gnu.cross.exe.1830080171" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/> + </storageModule> + <storageModule moduleId="scannerConfiguration"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641;cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/> + </scannerConfigBuildInfo> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.c.compiler.251687262;cdt.managedbuild.tool.gnu.c.compiler.input.433137197"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/> + </scannerConfigBuildInfo> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.c.compiler.819016498;cdt.managedbuild.tool.gnu.c.compiler.input.164617278"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/> + </scannerConfigBuildInfo> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268;cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/> + </scannerConfigBuildInfo> + </storageModule> + <storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/> + <storageModule moduleId="refreshScope"/> +</cproject> diff --git a/contrib/other-builds/extract-ghkm/.project b/contrib/other-builds/extract-ghkm/.project new file mode 100644 index 000000000..b7c40f069 --- /dev/null +++ b/contrib/other-builds/extract-ghkm/.project @@ -0,0 +1,209 @@ +<?xml version="1.0" encoding="UTF-8"?> +<projectDescription> + <name>extract-ghkm</name> + <comment></comment> + <projects> + </projects> + <buildSpec> + <buildCommand> + <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name> + <triggers>clean,full,incremental,</triggers> + <arguments> + </arguments> + </buildCommand> + <buildCommand> + <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name> + <triggers>full,incremental,</triggers> + <arguments> + </arguments> + </buildCommand> + </buildSpec> + <natures> + <nature>org.eclipse.cdt.core.cnature</nature> + <nature>org.eclipse.cdt.core.ccnature</nature> + <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature> + <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature> + </natures> + <linkedResources> + <link> + <name>Alignment.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.cpp</locationURI> + </link> + <link> + <name>Alignment.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.h</locationURI> + </link> + <link> + <name>AlignmentGraph.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.cpp</locationURI> + </link> + <link> + <name>AlignmentGraph.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.h</locationURI> + </link> + <link> + <name>ComposedRule.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.cpp</locationURI> + </link> + <link> + <name>ComposedRule.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.h</locationURI> + </link> + <link> + <name>Exception.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Exception.h</locationURI> + </link> + <link> + <name>ExtractGHKM.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.cpp</locationURI> + </link> + <link> + <name>ExtractGHKM.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.h</locationURI> + </link> + <link> + <name>InputFileStream.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp</locationURI> + </link> + <link> + <name>InputFileStream.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI> + </link> + <link> + <name>Jamfile</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Jamfile</locationURI> + </link> + <link> + <name>Main.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Main.cpp</locationURI> + </link> + <link> + <name>Node.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.cpp</locationURI> + </link> + <link> + <name>Node.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.h</locationURI> + </link> + <link> + <name>Options.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Options.h</locationURI> + </link> + <link> + <name>OutputFileStream.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI> + </link> + <link> + <name>OutputFileStream.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI> + </link> + <link> + <name>ParseTree.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.cpp</locationURI> + </link> + <link> + <name>ParseTree.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.h</locationURI> + </link> + <link> + <name>ScfgRule.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.cpp</locationURI> + </link> + <link> + <name>ScfgRule.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.h</locationURI> + </link> + <link> + <name>ScfgRuleWriter.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp</locationURI> + </link> + <link> + <name>ScfgRuleWriter.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.h</locationURI> + </link> + <link> + <name>Span.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.cpp</locationURI> + </link> + <link> + <name>Span.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.h</locationURI> + </link> + <link> + <name>Subgraph.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.cpp</locationURI> + </link> + <link> + <name>Subgraph.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.h</locationURI> + </link> + <link> + <name>SyntaxTree.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI> + </link> + <link> + <name>SyntaxTree.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.h</locationURI> + </link> + <link> + <name>XmlTree.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.cpp</locationURI> + </link> + <link> + <name>XmlTree.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI> + </link> + <link> + <name>XmlTreeParser.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.cpp</locationURI> + </link> + <link> + <name>XmlTreeParser.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.h</locationURI> + </link> + <link> + <name>tables-core.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp</locationURI> + </link> + <link> + <name>tables-core.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h</locationURI> + </link> + </linkedResources> +</projectDescription> diff --git a/contrib/other-builds/mert_lib/.project b/contrib/other-builds/mert_lib/.project index 687798e56..f6517ac90 100644 --- a/contrib/other-builds/mert_lib/.project +++ b/contrib/other-builds/mert_lib/.project @@ -82,6 +82,16 @@ </natures> <linkedResources> <link> + <name>BleuDocScorer.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/mert/BleuDocScorer.cpp</locationURI> + </link> + <link> + <name>BleuDocScorer.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/mert/BleuDocScorer.h</locationURI> + </link> + <link> <name>mert</name> <type>2</type> <locationURI>PARENT-3-PROJECT_LOC/mert</locationURI> diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index 593955b4d..6081d2d3f 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -1617,16 +1617,6 @@ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/OpSequenceModel.h</locationURI> </link> <link> - <name>FF/OSM-Feature/SRILM-API.cpp</name> - <type>1</type> - <locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/SRILM-API.cpp</locationURI> - </link> - <link> - <name>FF/OSM-Feature/SRILM-API.h</name> - <type>1</type> - <locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/SRILM-API.h</locationURI> - </link> - <link> <name>FF/OSM-Feature/osmHyp.cpp</name> <type>1</type> <locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/osmHyp.cpp</locationURI> diff --git a/lm/model.hh b/lm/model.hh index 60f55110b..c9c17c4b3 100644 --- a/lm/model.hh +++ b/lm/model.hh @@ -67,7 +67,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const; /* Get the state for a context. Don't use this if you can avoid it. Use - * BeginSentenceState or EmptyContextState and extend from those. If + * BeginSentenceState or NullContextState and extend from those. If * you're only going to use this state to call FullScore once, use * FullScoreForgotState. * To use this function, make an array of WordIndex containing the context diff --git a/mert/BleuDocScorer.cpp b/mert/BleuDocScorer.cpp new file mode 100644 index 000000000..558757cef --- /dev/null +++ b/mert/BleuDocScorer.cpp @@ -0,0 +1,207 @@ +#include "BleuDocScorer.h" + +#include <sys/types.h> +#include <algorithm> +#include <cassert> +#include <cmath> +#include <climits> +#include <fstream> +#include <iostream> +#include <stdexcept> + +#include "util/check.hh" +#include "Ngram.h" +#include "Reference.h" +#include "Util.h" +#include "Vocabulary.h" + + +using namespace std; + +namespace +{ + +// configure regularisation +const char KEY_REFLEN[] = "reflen"; +const char REFLEN_AVERAGE[] = "average"; +const char REFLEN_SHORTEST[] = "shortest"; +const char REFLEN_CLOSEST[] = "closest"; + +} // namespace + +namespace MosesTuning +{ + + +BleuDocScorer::BleuDocScorer(const string& config) + : BleuScorer("BLEUDOC", config), + m_ref_length_type(CLOSEST) +{ + const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST); + if (reflen == REFLEN_AVERAGE) { + m_ref_length_type = AVERAGE; + } else if (reflen == REFLEN_SHORTEST) { + m_ref_length_type = SHORTEST; + } else if (reflen == REFLEN_CLOSEST) { + m_ref_length_type = CLOSEST; + } else { + throw runtime_error("Unknown reference length strategy: " + reflen); + } +} + +BleuDocScorer::~BleuDocScorer() {} + + +bool BleuDocScorer::OpenReferenceStream(istream* is, size_t file_id) +{ + if (is == NULL) return false; + + string line; + size_t doc_id = -1; + size_t sid = 0; + while (getline(*is, line)) { + + if (line.find("<doc docid") != std::string::npos) { // new document + doc_id++; + m_references.push_back(new ScopedVector<Reference>()); + sid = 0; + } + else if (line.find("<seg") != std::string::npos) { //new sentence + int start = line.find_first_of('>') + 1; + std::string trans = line.substr(start, line.find_last_of('<')-start); + trans = preprocessSentence(trans); + + if (file_id == 0) { + Reference* ref = new Reference; + m_references[doc_id]->push_back(ref); // Take ownership of the Reference object. + } + + if (m_references[doc_id]->size() <= sid) { + return false; + } + NgramCounts counts; + size_t length = CountNgrams(trans, counts, kBleuNgramOrder); + + //for any counts larger than those already there, merge them in + for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) { + const NgramCounts::Key& ngram = ci->first; + const NgramCounts::Value newcount = ci->second; + + NgramCounts::Value oldcount = 0; + m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount); + if (newcount > oldcount) { + m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount; + } + } + //add in the length + + m_references[doc_id]->get().at(sid)->push_back(length); + if (sid > 0 && sid % 100 == 0) { + TRACE_ERR("."); + } + ++sid; + } + } + return true; +} + +void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry) +{ + if (sid >= m_references.size()) { + stringstream msg; + msg << "Sentence id (" << sid << ") not found in reference set"; + throw runtime_error(msg.str()); + } + + std::vector<std::string> sentences = splitDoc(text); + + vector<ScoreStatsType> totStats(kBleuNgramOrder * 2 + 1); + + for (uint i=0; i<sentences.size(); ++i) { + + NgramCounts testcounts; + // stats for this line + vector<ScoreStatsType> stats(kBleuNgramOrder * 2); + string sentence = preprocessSentence(sentences[i]); + const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder); + + //precision on each ngram type + for (NgramCounts::const_iterator testcounts_it = testcounts.begin(); + testcounts_it != testcounts.end(); ++testcounts_it) { + const NgramCounts::Value guess = testcounts_it->second; + const size_t len = testcounts_it->first.size(); + NgramCounts::Value correct = 0; + + NgramCounts::Value v = 0; + if (m_references[sid]->get().at(i)->get_counts()->Lookup(testcounts_it->first, &v)) { + correct = min(v, guess); + } + stats[len * 2 - 2] += correct; + stats[len * 2 - 1] += guess; + } + + const int reference_len = CalcReferenceLength(sid, i, length); + stats.push_back(reference_len); + + //ADD stats to totStats + std::transform(stats.begin(), stats.end(), totStats.begin(), + totStats.begin(), std::plus<int>()); + } + entry.set(totStats); +} + +std::vector<std::string> BleuDocScorer::splitDoc(const std::string& text) +{ + std::vector<std::string> res; + + uint index = 0; + std::string::size_type end; + + while ((end = text.find(" \\n ", index)) != std::string::npos) { + res.push_back(text.substr(index,end-index)); + index = end + 4; + } + return res; +} + +statscore_t BleuDocScorer::calculateScore(const vector<int>& comps) const +{ + CHECK(comps.size() == kBleuNgramOrder * 2 + 1); + + float logbleu = 0.0; + for (int i = 0; i < kBleuNgramOrder; ++i) { + if (comps[2*i] == 0) { + return 0.0; + } + logbleu += log(comps[2*i]) - log(comps[2*i+1]); + + } + logbleu /= kBleuNgramOrder; + // reflength divided by test length + const float brevity = 1.0 - static_cast<float>(comps[kBleuNgramOrder * 2]) / comps[1]; + if (brevity < 0.0) { + logbleu += brevity; + } + return exp(logbleu); +} + +int BleuDocScorer::CalcReferenceLength(size_t doc_id, size_t sentence_id, size_t length) +{ + switch (m_ref_length_type) { + case AVERAGE: + return m_references[doc_id]->get().at(sentence_id)->CalcAverage(); + break; + case CLOSEST: + return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length); + break; + case SHORTEST: + return m_references[doc_id]->get().at(sentence_id)->CalcShortest(); + break; + default: + cerr << "unknown reference types." << endl; + exit(1); + } +} + +} + diff --git a/mert/BleuDocScorer.h b/mert/BleuDocScorer.h new file mode 100644 index 000000000..349745825 --- /dev/null +++ b/mert/BleuDocScorer.h @@ -0,0 +1,67 @@ +#ifndef MERT_BLEU_DOC_SCORER_H_ +#define MERT_BLEU_DOC_SCORER_H_ + +#include <ostream> +#include <string> +#include <vector> + +#include "Types.h" +#include "ScoreData.h" +#include "StatisticsBasedScorer.h" +#include "ScopedVector.h" +#include "BleuScorer.h" + +namespace MosesTuning +{ + +/** + * Bleu document scoring + * + * Needs xml reference files, and nbest lists where sentences are separated by '\n' + */ +class BleuDocScorer : public BleuScorer +{ +public: + + explicit BleuDocScorer(const std::string& config = ""); + ~BleuDocScorer(); + + virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry); + virtual statscore_t calculateScore(const std::vector<int>& comps) const; + + int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length); + + // NOTE: this function is used for unit testing. + virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id); + +private: + ReferenceLengthType m_ref_length_type; + + // reference translations. + ScopedVector<ScopedVector<Reference> > m_references; + + // no copying allowed + BleuDocScorer(const BleuDocScorer&); + BleuDocScorer& operator=(const BleuDocScorer&); + + std::vector<std::string> splitDoc(const std::string& text); +}; + +/* /\** Computes sentence-level BLEU+1 score. */ +/* * This function is used in PRO. */ +/* *\/ */ +/* float sentenceLevelBleuPlusOne(const std::vector<float>& stats); */ + +/* /\** Computes sentence-level BLEU score given a background corpus. */ +/* * This function is used in batch MIRA. */ +/* *\/ */ +/* float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg); */ + +/* /\** */ +/* * Computes plain old BLEU from a vector of stats */ +/* *\/ */ +/* float unsmoothedBleu(const std::vector<float>& stats); */ + +} + +#endif // MERT_BLEU_DOC_SCORER_H_ diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h index 248b3e1d1..92d7fb9d5 100644 --- a/mert/BleuScorer.h +++ b/mert/BleuScorer.h @@ -65,14 +65,18 @@ public: bool OpenReference(const char* filename, std::size_t file_id); // NOTE: this function is used for unit testing. - bool OpenReferenceStream(std::istream* is, std::size_t file_id); + virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id); -private: + //private: +protected: ReferenceLengthType m_ref_length_type; // reference translations. ScopedVector<Reference> m_references; + // constructor used by subclasses + BleuScorer(const std::string& name, const std::string& config): StatisticsBasedScorer(name,config) {} + // no copying allowed BleuScorer(const BleuScorer&); BleuScorer& operator=(const BleuScorer&); diff --git a/mert/Jamfile b/mert/Jamfile index 0ee32638e..bb4073f52 100644 --- a/mert/Jamfile +++ b/mert/Jamfile @@ -20,6 +20,7 @@ MiraWeightVector.cpp HypPackEnumerator.cpp Data.cpp BleuScorer.cpp +BleuDocScorer.cpp SemposScorer.cpp SemposOverlapping.cpp InterpolatedScorer.cpp diff --git a/mert/ScorerFactory.cpp b/mert/ScorerFactory.cpp index 02000c1bc..446ecb36b 100644 --- a/mert/ScorerFactory.cpp +++ b/mert/ScorerFactory.cpp @@ -3,6 +3,7 @@ #include <stdexcept> #include "Scorer.h" #include "BleuScorer.h" +#include "BleuDocScorer.h" #include "PerScorer.h" #include "TerScorer.h" #include "CderScorer.h" @@ -20,6 +21,7 @@ vector<string> ScorerFactory::getTypes() { vector<string> types; types.push_back(string("BLEU")); + types.push_back(string("BLEUDOC")); types.push_back(string("PER")); types.push_back(string("TER")); types.push_back(string("CDER")); @@ -34,6 +36,8 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config) { if (type == "BLEU") { return new BleuScorer(config); + } else if (type == "BLEUDOC") { + return new BleuDocScorer(config); } else if (type == "PER") { return new PerScorer(config); } else if (type == "TER") { diff --git a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h index 63d2887b3..6c17bfc85 100644 --- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h +++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h @@ -7,9 +7,13 @@ #include "moses/TargetPhraseCollection.h" #include "moses/TranslationModel/PhraseDictionary.h" #include "util/check.hh" +#include <vector> +#ifdef WITH_THREADS #include <boost/thread/tss.hpp> -#include <vector> +#else +#include <boost/scoped_ptr.hpp> +#endif namespace Moses { @@ -26,7 +30,11 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary { typedef PhraseDictionary MyBase; +#ifdef WITH_THREADS boost::thread_specific_ptr<PDTAimp> m_implementation; +#else + boost::scoped_ptr<PDTAimp> m_implementation; +#endif friend class PDTAimp; PhraseDictionaryTreeAdaptor(); diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h index 3e0e99c28..e3eb52b48 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h @@ -32,6 +32,8 @@ #ifdef WITH_THREADS #include <boost/thread/tss.hpp> +#else +#include <boost/scoped_ptr.hpp> #endif namespace Moses @@ -47,7 +49,11 @@ class PhraseDictionaryOnDisk : public PhraseDictionary friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryOnDisk&); protected: +#ifdef WITH_THREADS boost::thread_specific_ptr<OnDiskPt::OnDiskWrapper> m_implementation; +#else + boost::scoped_ptr<OnDiskPt::OnDiskWrapper> m_implementation; +#endif OnDiskPt::OnDiskWrapper &GetImplementation(); const OnDiskPt::OnDiskWrapper &GetImplementation() const; diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp index bf464862b..443b2c8ab 100644 --- a/moses/TranslationOptionCollection.cpp +++ b/moses/TranslationOptionCollection.cpp @@ -21,6 +21,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include <algorithm> +#include <typeinfo> #include "TranslationOptionCollection.h" #include "Sentence.h" #include "DecodeStep.h" diff --git a/scripts/generic/ph_numbers.perl b/scripts/generic/ph_numbers.perl new file mode 100755 index 000000000..1d64a046d --- /dev/null +++ b/scripts/generic/ph_numbers.perl @@ -0,0 +1,51 @@ +#!/usr/bin/perl -w
+
+# Script to recognize and replace numbers in Moses training corpora
+# and decoder input
+#
+# (c) 2013 TAUS
+
+use strict;
+
+use Getopt::Std;
+
+my $debug = $ENV{DEBUG} || 0;
+
+my %opts;
+if(!getopts('s:t:cm:hl',\%opts) || $opts{h}) {
+ print "Usage: perl $0 [-s source_locale][-t target_locale][-c][-h][-l][-m symbol] < in > out\n";
+ exit;
+}
+my $sourceLocale = $opts{s} || "";
+my $targetLocale = $opts{t} || "";
+my $numberSymbol = $opts{m} || '@NUM@';
+
+while(<>) {
+ # [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?
+ # while(/\G(.*?)\s*([+-]?\p{Digit}+[+-\p{Digit}\.,eE])/) {
+ chomp;
+ my $output = "";
+ my $remainder = "";
+ while(/\G(.*?)(\s*)([+-]?\p{Digit}*[\.,]?\p{Digit}+[\p{Digit}\.,+-eE]*)/g) {
+ print STDERR "Between: x$1x\n" if $debug;
+ print STDERR "Number: x$3x\n" if $debug;
+ $output .= $1;
+ if($opts{c}) {
+ $output .= $2.$numberSymbol;
+ }
+ else {
+ if($opts{l}) {
+ $output .= $2."<ne translation=\"$3\">$numberSymbol</ne>";
+ }
+ else {
+ $output .= $2."<ne translation=\"$numberSymbol\" entity=\"$3\">$numberSymbol</ne>";
+ }
+ }
+ $remainder = $';
+ }
+ print STDERR "Remainder: x".$remainder."x\n" if $debug;
+ print STDERR "\n" if $debug;
+ $output .= $remainder if $remainder;
+ $output .= "\n";
+ print $output;
+}
|