Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2013-07-03 16:35:10 +0400
committerHieu Hoang <hieu@hoang.co.uk>2013-07-03 16:35:10 +0400
commit159911a92b4e36395aa8b5f85a7fa6a383ecb5cd (patch)
tree9bb06e4937a46976b364de472f4da9be44239481
parentc38e1a768218067970ddd2275850f548cd798f7f (diff)
parentfb4a6fa2bb323acb883e4bf26eb5ef0bcf5cef29 (diff)
Merge github.com:moses-smt/mosesdecoder into hieu_opt_input2
m---------contrib/arrow-pipelines/python/pcl0
-rw-r--r--contrib/other-builds/extract-ghkm/.cproject130
-rw-r--r--contrib/other-builds/extract-ghkm/.project209
-rw-r--r--mert/BleuDocScorer.cpp206
-rw-r--r--mert/BleuDocScorer.h67
-rw-r--r--mert/BleuScorer.h8
-rw-r--r--mert/Jamfile1
-rw-r--r--mert/ScorerFactory.cpp4
8 files changed, 623 insertions, 2 deletions
diff --git a/contrib/arrow-pipelines/python/pcl b/contrib/arrow-pipelines/python/pcl
-Subproject 6d5d13e1e06a871fbf7adf86dffda5113e315c1
+Subproject 1315185203a90b6f80acf2e47b4ea85b420b0d4
diff --git a/contrib/other-builds/extract-ghkm/.cproject b/contrib/other-builds/extract-ghkm/.cproject
new file mode 100644
index 000000000..8b549ee0c
--- /dev/null
+++ b/contrib/other-builds/extract-ghkm/.cproject
@@ -0,0 +1,130 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+ <storageModule moduleId="org.eclipse.cdt.core.settings">
+ <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <externalSettings/>
+ <extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ </extensions>
+ </storageModule>
+ <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1035891586" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
+ <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.242178856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+ <builder buildPath="${workspace_loc:/extract-ghkm/Debug}" id="cdt.managedbuild.builder.gnu.cross.430400318" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.251687262" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.962699619" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.debugging.level.230503798" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.433137197" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+ <option id="gnu.cpp.compiler.option.optimization.level.971749711" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.debugging.level.984190691" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.1374841264" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../phrase-extract&quot;"/>
+ </option>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1026620601" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1419857560" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
+ <option id="gnu.cpp.link.option.paths.668926503" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64&quot;"/>
+ </option>
+ <option id="gnu.cpp.link.option.libs.2091468346" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+ <listOptionValue builtIn="false" value="boost_program_options-mt"/>
+ <listOptionValue builtIn="false" value="boost_thread-mt"/>
+ <listOptionValue builtIn="false" value="boost_filesystem-mt"/>
+ <listOptionValue builtIn="false" value="boost_iostreams-mt"/>
+ <listOptionValue builtIn="false" value="z"/>
+ <listOptionValue builtIn="false" value="bz2"/>
+ </option>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1684298294" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+ <additionalInput kind="additionalinput" paths="$(LIBS)"/>
+ </inputType>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.archiver.320160974" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.assembler.2021657841" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1689419664" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ </tool>
+ </toolChain>
+ </folderInfo>
+ </configuration>
+ </storageModule>
+ <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+ </cconfiguration>
+ <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <externalSettings/>
+ <extensions>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ </extensions>
+ </storageModule>
+ <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.2000920404" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
+ <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1106451881" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
+ <builder buildPath="${workspace_loc:/extract-ghkm/Release}" id="cdt.managedbuild.builder.gnu.cross.727887705" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.819016498" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1057468997" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.option.debugging.level.1130475273" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.164617278" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
+ <option id="gnu.cpp.compiler.option.optimization.level.406333630" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.debugging.level.1059243022" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1068655225" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1213865062" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.764325642" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+ <additionalInput kind="additionalinput" paths="$(LIBS)"/>
+ </inputType>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1299258961" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
+ <tool id="cdt.managedbuild.tool.gnu.cross.assembler.896866692" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.276294580" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+ </tool>
+ </toolChain>
+ </folderInfo>
+ </configuration>
+ </storageModule>
+ <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+ </cconfiguration>
+ </storageModule>
+ <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+ <project id="extract-ghkm.cdt.managedbuild.target.gnu.cross.exe.1830080171" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
+ </storageModule>
+ <storageModule moduleId="scannerConfiguration">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641;cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.c.compiler.251687262;cdt.managedbuild.tool.gnu.c.compiler.input.433137197">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.c.compiler.819016498;cdt.managedbuild.tool.gnu.c.compiler.input.164617278">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268;cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ </scannerConfigBuildInfo>
+ </storageModule>
+ <storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+ <storageModule moduleId="refreshScope"/>
+</cproject>
diff --git a/contrib/other-builds/extract-ghkm/.project b/contrib/other-builds/extract-ghkm/.project
new file mode 100644
index 000000000..b7c40f069
--- /dev/null
+++ b/contrib/other-builds/extract-ghkm/.project
@@ -0,0 +1,209 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>extract-ghkm</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+ <triggers>clean,full,incremental,</triggers>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+ <triggers>full,incremental,</triggers>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.cdt.core.cnature</nature>
+ <nature>org.eclipse.cdt.core.ccnature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+ </natures>
+ <linkedResources>
+ <link>
+ <name>Alignment.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.cpp</locationURI>
+ </link>
+ <link>
+ <name>Alignment.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.h</locationURI>
+ </link>
+ <link>
+ <name>AlignmentGraph.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.cpp</locationURI>
+ </link>
+ <link>
+ <name>AlignmentGraph.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.h</locationURI>
+ </link>
+ <link>
+ <name>ComposedRule.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.cpp</locationURI>
+ </link>
+ <link>
+ <name>ComposedRule.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.h</locationURI>
+ </link>
+ <link>
+ <name>Exception.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Exception.h</locationURI>
+ </link>
+ <link>
+ <name>ExtractGHKM.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.cpp</locationURI>
+ </link>
+ <link>
+ <name>ExtractGHKM.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.h</locationURI>
+ </link>
+ <link>
+ <name>InputFileStream.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp</locationURI>
+ </link>
+ <link>
+ <name>InputFileStream.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI>
+ </link>
+ <link>
+ <name>Jamfile</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Jamfile</locationURI>
+ </link>
+ <link>
+ <name>Main.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Main.cpp</locationURI>
+ </link>
+ <link>
+ <name>Node.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.cpp</locationURI>
+ </link>
+ <link>
+ <name>Node.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.h</locationURI>
+ </link>
+ <link>
+ <name>Options.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Options.h</locationURI>
+ </link>
+ <link>
+ <name>OutputFileStream.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI>
+ </link>
+ <link>
+ <name>OutputFileStream.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
+ </link>
+ <link>
+ <name>ParseTree.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.cpp</locationURI>
+ </link>
+ <link>
+ <name>ParseTree.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.h</locationURI>
+ </link>
+ <link>
+ <name>ScfgRule.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.cpp</locationURI>
+ </link>
+ <link>
+ <name>ScfgRule.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.h</locationURI>
+ </link>
+ <link>
+ <name>ScfgRuleWriter.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp</locationURI>
+ </link>
+ <link>
+ <name>ScfgRuleWriter.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.h</locationURI>
+ </link>
+ <link>
+ <name>Span.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.cpp</locationURI>
+ </link>
+ <link>
+ <name>Span.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.h</locationURI>
+ </link>
+ <link>
+ <name>Subgraph.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.cpp</locationURI>
+ </link>
+ <link>
+ <name>Subgraph.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.h</locationURI>
+ </link>
+ <link>
+ <name>SyntaxTree.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
+ </link>
+ <link>
+ <name>SyntaxTree.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.h</locationURI>
+ </link>
+ <link>
+ <name>XmlTree.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.cpp</locationURI>
+ </link>
+ <link>
+ <name>XmlTree.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
+ </link>
+ <link>
+ <name>XmlTreeParser.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.cpp</locationURI>
+ </link>
+ <link>
+ <name>XmlTreeParser.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.h</locationURI>
+ </link>
+ <link>
+ <name>tables-core.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp</locationURI>
+ </link>
+ <link>
+ <name>tables-core.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h</locationURI>
+ </link>
+ </linkedResources>
+</projectDescription>
diff --git a/mert/BleuDocScorer.cpp b/mert/BleuDocScorer.cpp
new file mode 100644
index 000000000..53ef0e506
--- /dev/null
+++ b/mert/BleuDocScorer.cpp
@@ -0,0 +1,206 @@
+#include "BleuDocScorer.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <climits>
+#include <fstream>
+#include <iostream>
+#include <stdexcept>
+
+#include "util/check.hh"
+#include "Ngram.h"
+#include "Reference.h"
+#include "Util.h"
+#include "Vocabulary.h"
+
+
+using namespace std;
+
+namespace
+{
+
+// configure regularisation
+const char KEY_REFLEN[] = "reflen";
+const char REFLEN_AVERAGE[] = "average";
+const char REFLEN_SHORTEST[] = "shortest";
+const char REFLEN_CLOSEST[] = "closest";
+
+} // namespace
+
+namespace MosesTuning
+{
+
+
+BleuDocScorer::BleuDocScorer(const string& config)
+ : BleuScorer("BLEUDOC", config),
+ m_ref_length_type(CLOSEST)
+{
+ const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
+ if (reflen == REFLEN_AVERAGE) {
+ m_ref_length_type = AVERAGE;
+ } else if (reflen == REFLEN_SHORTEST) {
+ m_ref_length_type = SHORTEST;
+ } else if (reflen == REFLEN_CLOSEST) {
+ m_ref_length_type = CLOSEST;
+ } else {
+ throw runtime_error("Unknown reference length strategy: " + reflen);
+ }
+}
+
+BleuDocScorer::~BleuDocScorer() {}
+
+
+bool BleuDocScorer::OpenReferenceStream(istream* is, size_t file_id)
+{
+ if (is == NULL) return false;
+
+ string line;
+ size_t doc_id = -1;
+ size_t sid = 0;
+ while (getline(*is, line)) {
+
+ if (line.find("<doc docid") != std::string::npos) { // new document
+ doc_id++;
+ m_references.push_back(new ScopedVector<Reference>());
+ sid = 0;
+ }
+ else if (line.find("<seg") != std::string::npos) { //new sentence
+ int start = line.find_first_of('>') + 1;
+ std::string trans = line.substr(start, line.find_last_of('<')-start);
+ trans = preprocessSentence(trans);
+
+ if (file_id == 0) {
+ Reference* ref = new Reference;
+ m_references[doc_id]->push_back(ref); // Take ownership of the Reference object.
+ }
+
+ if (m_references[doc_id]->size() <= sid) {
+ return false;
+ }
+ NgramCounts counts;
+ size_t length = CountNgrams(trans, counts, kBleuNgramOrder);
+
+ //for any counts larger than those already there, merge them in
+ for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
+ const NgramCounts::Key& ngram = ci->first;
+ const NgramCounts::Value newcount = ci->second;
+
+ NgramCounts::Value oldcount = 0;
+ m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount);
+ if (newcount > oldcount) {
+ m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount;
+ }
+ }
+ //add in the length
+
+ m_references[doc_id]->get().at(sid)->push_back(length);
+ if (sid > 0 && sid % 100 == 0) {
+ TRACE_ERR(".");
+ }
+ ++sid;
+ }
+ }
+ return true;
+}
+
+void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
+{
+ if (sid >= m_references.size()) {
+ stringstream msg;
+ msg << "Sentence id (" << sid << ") not found in reference set";
+ throw runtime_error(msg.str());
+ }
+
+ std::vector<std::string> sentences = splitDoc(text);
+
+ vector<ScoreStatsType> totStats(kBleuNgramOrder * 2 + 1);
+
+ for (uint i=0; i<sentences.size(); ++i) {
+
+ NgramCounts testcounts;
+ // stats for this line
+ vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
+ string sentence = preprocessSentence(sentences[i]);
+ const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder);
+
+ //precision on each ngram type
+ for (NgramCounts::const_iterator testcounts_it = testcounts.begin();
+ testcounts_it != testcounts.end(); ++testcounts_it) {
+ const NgramCounts::Value guess = testcounts_it->second;
+ const size_t len = testcounts_it->first.size();
+ NgramCounts::Value correct = 0;
+
+ NgramCounts::Value v = 0;
+ if (m_references[sid]->get().at(i)->get_counts()->Lookup(testcounts_it->first, &v)) {
+ correct = min(v, guess);
+ }
+ stats[len * 2 - 2] += correct;
+ stats[len * 2 - 1] += guess;
+ }
+
+ const int reference_len = CalcReferenceLength(sid, i, length);
+ stats.push_back(reference_len);
+
+ //ADD stats to totStats
+ std::transform(stats.begin(), stats.end(), totStats.begin(),
+ totStats.begin(), std::plus<int>());
+ }
+ entry.set(totStats);
+}
+
+std::vector<std::string> BleuDocScorer::splitDoc(const std::string& text)
+{
+ std::vector<std::string> res;
+
+ uint index = 0;
+ std::string::size_type end;
+
+ while ((end = text.find(" \\n ", index)) != std::string::npos) {
+ res.push_back(text.substr(index,end-index));
+ index = end + 4;
+ }
+ return res;
+}
+
+statscore_t BleuDocScorer::calculateScore(const vector<int>& comps) const
+{
+ CHECK(comps.size() == kBleuNgramOrder * 2 + 1);
+
+ float logbleu = 0.0;
+ for (int i = 0; i < kBleuNgramOrder; ++i) {
+ if (comps[2*i] == 0) {
+ return 0.0;
+ }
+ logbleu += log(comps[2*i]) - log(comps[2*i+1]);
+
+ }
+ logbleu /= kBleuNgramOrder;
+ // reflength divided by test length
+ const float brevity = 1.0 - static_cast<float>(comps[kBleuNgramOrder * 2]) / comps[1];
+ if (brevity < 0.0) {
+ logbleu += brevity;
+ }
+ return exp(logbleu);
+}
+
+int BleuDocScorer::CalcReferenceLength(size_t doc_id, size_t sentence_id, size_t length)
+{
+ switch (m_ref_length_type) {
+ case AVERAGE:
+ return m_references[doc_id]->get().at(sentence_id)->CalcAverage();
+ break;
+ case CLOSEST:
+ return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length);
+ break;
+ case SHORTEST:
+ return m_references[doc_id]->get().at(sentence_id)->CalcShortest();
+ break;
+ default:
+ cerr << "unknown reference types." << endl;
+ exit(1);
+ }
+}
+
+}
+
diff --git a/mert/BleuDocScorer.h b/mert/BleuDocScorer.h
new file mode 100644
index 000000000..349745825
--- /dev/null
+++ b/mert/BleuDocScorer.h
@@ -0,0 +1,67 @@
+#ifndef MERT_BLEU_DOC_SCORER_H_
+#define MERT_BLEU_DOC_SCORER_H_
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "Types.h"
+#include "ScoreData.h"
+#include "StatisticsBasedScorer.h"
+#include "ScopedVector.h"
+#include "BleuScorer.h"
+
+namespace MosesTuning
+{
+
+/**
+ * Bleu document scoring
+ *
+ * Needs xml reference files, and nbest lists where sentences are separated by '\n'
+ */
+class BleuDocScorer : public BleuScorer
+{
+public:
+
+ explicit BleuDocScorer(const std::string& config = "");
+ ~BleuDocScorer();
+
+ virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
+ virtual statscore_t calculateScore(const std::vector<int>& comps) const;
+
+ int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length);
+
+ // NOTE: this function is used for unit testing.
+ virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
+
+private:
+ ReferenceLengthType m_ref_length_type;
+
+ // reference translations.
+ ScopedVector<ScopedVector<Reference> > m_references;
+
+ // no copying allowed
+ BleuDocScorer(const BleuDocScorer&);
+ BleuDocScorer& operator=(const BleuDocScorer&);
+
+ std::vector<std::string> splitDoc(const std::string& text);
+};
+
+/* /\** Computes sentence-level BLEU+1 score. */
+/* * This function is used in PRO. */
+/* *\/ */
+/* float sentenceLevelBleuPlusOne(const std::vector<float>& stats); */
+
+/* /\** Computes sentence-level BLEU score given a background corpus. */
+/* * This function is used in batch MIRA. */
+/* *\/ */
+/* float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg); */
+
+/* /\** */
+/* * Computes plain old BLEU from a vector of stats */
+/* *\/ */
+/* float unsmoothedBleu(const std::vector<float>& stats); */
+
+}
+
+#endif // MERT_BLEU_DOC_SCORER_H_
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 248b3e1d1..92d7fb9d5 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -65,14 +65,18 @@ public:
bool OpenReference(const char* filename, std::size_t file_id);
// NOTE: this function is used for unit testing.
- bool OpenReferenceStream(std::istream* is, std::size_t file_id);
+ virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
-private:
+ //private:
+protected:
ReferenceLengthType m_ref_length_type;
// reference translations.
ScopedVector<Reference> m_references;
+ // constructor used by subclasses
+ BleuScorer(const std::string& name, const std::string& config): StatisticsBasedScorer(name,config) {}
+
// no copying allowed
BleuScorer(const BleuScorer&);
BleuScorer& operator=(const BleuScorer&);
diff --git a/mert/Jamfile b/mert/Jamfile
index 0ee32638e..bb4073f52 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -20,6 +20,7 @@ MiraWeightVector.cpp
HypPackEnumerator.cpp
Data.cpp
BleuScorer.cpp
+BleuDocScorer.cpp
SemposScorer.cpp
SemposOverlapping.cpp
InterpolatedScorer.cpp
diff --git a/mert/ScorerFactory.cpp b/mert/ScorerFactory.cpp
index 02000c1bc..446ecb36b 100644
--- a/mert/ScorerFactory.cpp
+++ b/mert/ScorerFactory.cpp
@@ -3,6 +3,7 @@
#include <stdexcept>
#include "Scorer.h"
#include "BleuScorer.h"
+#include "BleuDocScorer.h"
#include "PerScorer.h"
#include "TerScorer.h"
#include "CderScorer.h"
@@ -20,6 +21,7 @@ vector<string> ScorerFactory::getTypes()
{
vector<string> types;
types.push_back(string("BLEU"));
+ types.push_back(string("BLEUDOC"));
types.push_back(string("PER"));
types.push_back(string("TER"));
types.push_back(string("CDER"));
@@ -34,6 +36,8 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config)
{
if (type == "BLEU") {
return new BleuScorer(config);
+ } else if (type == "BLEUDOC") {
+ return new BleuDocScorer(config);
} else if (type == "PER") {
return new PerScorer(config);
} else if (type == "TER") {