diff options
author | Hieu Hoang <hieu@hoang.co.uk> | 2013-07-03 16:35:10 +0400 |
---|---|---|
committer | Hieu Hoang <hieu@hoang.co.uk> | 2013-07-03 16:35:10 +0400 |
commit | 159911a92b4e36395aa8b5f85a7fa6a383ecb5cd (patch) | |
tree | 9bb06e4937a46976b364de472f4da9be44239481 | |
parent | c38e1a768218067970ddd2275850f548cd798f7f (diff) | |
parent | fb4a6fa2bb323acb883e4bf26eb5ef0bcf5cef29 (diff) |
Merge github.com:moses-smt/mosesdecoder into hieu_opt_input2
m--------- | contrib/arrow-pipelines/python/pcl | 0 | ||||
-rw-r--r-- | contrib/other-builds/extract-ghkm/.cproject | 130 | ||||
-rw-r--r-- | contrib/other-builds/extract-ghkm/.project | 209 | ||||
-rw-r--r-- | mert/BleuDocScorer.cpp | 206 | ||||
-rw-r--r-- | mert/BleuDocScorer.h | 67 | ||||
-rw-r--r-- | mert/BleuScorer.h | 8 | ||||
-rw-r--r-- | mert/Jamfile | 1 | ||||
-rw-r--r-- | mert/ScorerFactory.cpp | 4 |
8 files changed, 623 insertions, 2 deletions
diff --git a/contrib/arrow-pipelines/python/pcl b/contrib/arrow-pipelines/python/pcl -Subproject 6d5d13e1e06a871fbf7adf86dffda5113e315c1 +Subproject 1315185203a90b6f80acf2e47b4ea85b420b0d4 diff --git a/contrib/other-builds/extract-ghkm/.cproject b/contrib/other-builds/extract-ghkm/.cproject new file mode 100644 index 000000000..8b549ee0c --- /dev/null +++ b/contrib/other-builds/extract-ghkm/.cproject @@ -0,0 +1,130 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage"> + <storageModule moduleId="org.eclipse.cdt.core.settings"> + <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002"> + <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" moduleId="org.eclipse.cdt.core.settings" name="Debug"> + <externalSettings/> + <extensions> + <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/> + <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + </extensions> + </storageModule> + <storageModule moduleId="cdtBuildSystem" version="4.0.0"> + <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug"> + <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002." name="/" resourcePath=""> + <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1035891586" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug"> + <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.242178856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/> + <builder buildPath="${workspace_loc:/extract-ghkm/Debug}" id="cdt.managedbuild.builder.gnu.cross.430400318" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/> + <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.251687262" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler"> + <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.962699619" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/> + <option id="gnu.c.compiler.option.debugging.level.230503798" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/> + <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.433137197" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler"> + <option id="gnu.cpp.compiler.option.optimization.level.971749711" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/> + <option id="gnu.cpp.compiler.option.debugging.level.984190691" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/> + <option id="gnu.cpp.compiler.option.include.paths.1374841264" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath"> + <listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/> + <listOptionValue builtIn="false" value=""${workspace_loc}/../../phrase-extract""/> + </option> + <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1026620601" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/> + <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1419857560" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker"> + <option id="gnu.cpp.link.option.paths.668926503" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths"> + <listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/lib64""/> + </option> + <option id="gnu.cpp.link.option.libs.2091468346" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs"> + <listOptionValue builtIn="false" value="boost_program_options-mt"/> + <listOptionValue builtIn="false" value="boost_thread-mt"/> + <listOptionValue builtIn="false" value="boost_filesystem-mt"/> + <listOptionValue builtIn="false" value="boost_iostreams-mt"/> + <listOptionValue builtIn="false" value="z"/> + <listOptionValue builtIn="false" value="bz2"/> + </option> + <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1684298294" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input"> + <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/> + <additionalInput kind="additionalinput" paths="$(LIBS)"/> + </inputType> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.archiver.320160974" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/> + <tool id="cdt.managedbuild.tool.gnu.cross.assembler.2021657841" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler"> + <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1689419664" superClass="cdt.managedbuild.tool.gnu.assembler.input"/> + </tool> + </toolChain> + </folderInfo> + </configuration> + </storageModule> + <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/> + </cconfiguration> + <cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494"> + <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" moduleId="org.eclipse.cdt.core.settings" name="Release"> + <externalSettings/> + <extensions> + <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/> + <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + </extensions> + </storageModule> + <storageModule moduleId="cdtBuildSystem" version="4.0.0"> + <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release"> + <folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494." name="/" resourcePath=""> + <toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.2000920404" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release"> + <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1106451881" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/> + <builder buildPath="${workspace_loc:/extract-ghkm/Release}" id="cdt.managedbuild.builder.gnu.cross.727887705" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/> + <tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.819016498" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler"> + <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1057468997" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/> + <option id="gnu.c.compiler.option.debugging.level.1130475273" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/> + <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.164617278" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler"> + <option id="gnu.cpp.compiler.option.optimization.level.406333630" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/> + <option id="gnu.cpp.compiler.option.debugging.level.1059243022" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/> + <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1068655225" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/> + <tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1213865062" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker"> + <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.764325642" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input"> + <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/> + <additionalInput kind="additionalinput" paths="$(LIBS)"/> + </inputType> + </tool> + <tool id="cdt.managedbuild.tool.gnu.cross.archiver.1299258961" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/> + <tool id="cdt.managedbuild.tool.gnu.cross.assembler.896866692" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler"> + <inputType id="cdt.managedbuild.tool.gnu.assembler.input.276294580" superClass="cdt.managedbuild.tool.gnu.assembler.input"/> + </tool> + </toolChain> + </folderInfo> + </configuration> + </storageModule> + <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/> + </cconfiguration> + </storageModule> + <storageModule moduleId="cdtBuildSystem" version="4.0.0"> + <project id="extract-ghkm.cdt.managedbuild.target.gnu.cross.exe.1830080171" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/> + </storageModule> + <storageModule moduleId="scannerConfiguration"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641;cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/> + </scannerConfigBuildInfo> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.c.compiler.251687262;cdt.managedbuild.tool.gnu.c.compiler.input.433137197"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/> + </scannerConfigBuildInfo> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.c.compiler.819016498;cdt.managedbuild.tool.gnu.c.compiler.input.164617278"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/> + </scannerConfigBuildInfo> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268;cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/> + </scannerConfigBuildInfo> + </storageModule> + <storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/> + <storageModule moduleId="refreshScope"/> +</cproject> diff --git a/contrib/other-builds/extract-ghkm/.project b/contrib/other-builds/extract-ghkm/.project new file mode 100644 index 000000000..b7c40f069 --- /dev/null +++ b/contrib/other-builds/extract-ghkm/.project @@ -0,0 +1,209 @@ +<?xml version="1.0" encoding="UTF-8"?> +<projectDescription> + <name>extract-ghkm</name> + <comment></comment> + <projects> + </projects> + <buildSpec> + <buildCommand> + <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name> + <triggers>clean,full,incremental,</triggers> + <arguments> + </arguments> + </buildCommand> + <buildCommand> + <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name> + <triggers>full,incremental,</triggers> + <arguments> + </arguments> + </buildCommand> + </buildSpec> + <natures> + <nature>org.eclipse.cdt.core.cnature</nature> + <nature>org.eclipse.cdt.core.ccnature</nature> + <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature> + <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature> + </natures> + <linkedResources> + <link> + <name>Alignment.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.cpp</locationURI> + </link> + <link> + <name>Alignment.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.h</locationURI> + </link> + <link> + <name>AlignmentGraph.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.cpp</locationURI> + </link> + <link> + <name>AlignmentGraph.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.h</locationURI> + </link> + <link> + <name>ComposedRule.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.cpp</locationURI> + </link> + <link> + <name>ComposedRule.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.h</locationURI> + </link> + <link> + <name>Exception.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Exception.h</locationURI> + </link> + <link> + <name>ExtractGHKM.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.cpp</locationURI> + </link> + <link> + <name>ExtractGHKM.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.h</locationURI> + </link> + <link> + <name>InputFileStream.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp</locationURI> + </link> + <link> + <name>InputFileStream.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI> + </link> + <link> + <name>Jamfile</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Jamfile</locationURI> + </link> + <link> + <name>Main.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Main.cpp</locationURI> + </link> + <link> + <name>Node.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.cpp</locationURI> + </link> + <link> + <name>Node.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.h</locationURI> + </link> + <link> + <name>Options.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Options.h</locationURI> + </link> + <link> + <name>OutputFileStream.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI> + </link> + <link> + <name>OutputFileStream.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI> + </link> + <link> + <name>ParseTree.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.cpp</locationURI> + </link> + <link> + <name>ParseTree.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.h</locationURI> + </link> + <link> + <name>ScfgRule.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.cpp</locationURI> + </link> + <link> + <name>ScfgRule.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.h</locationURI> + </link> + <link> + <name>ScfgRuleWriter.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp</locationURI> + </link> + <link> + <name>ScfgRuleWriter.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.h</locationURI> + </link> + <link> + <name>Span.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.cpp</locationURI> + </link> + <link> + <name>Span.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.h</locationURI> + </link> + <link> + <name>Subgraph.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.cpp</locationURI> + </link> + <link> + <name>Subgraph.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.h</locationURI> + </link> + <link> + <name>SyntaxTree.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI> + </link> + <link> + <name>SyntaxTree.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.h</locationURI> + </link> + <link> + <name>XmlTree.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.cpp</locationURI> + </link> + <link> + <name>XmlTree.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI> + </link> + <link> + <name>XmlTreeParser.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.cpp</locationURI> + </link> + <link> + <name>XmlTreeParser.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.h</locationURI> + </link> + <link> + <name>tables-core.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp</locationURI> + </link> + <link> + <name>tables-core.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h</locationURI> + </link> + </linkedResources> +</projectDescription> diff --git a/mert/BleuDocScorer.cpp b/mert/BleuDocScorer.cpp new file mode 100644 index 000000000..53ef0e506 --- /dev/null +++ b/mert/BleuDocScorer.cpp @@ -0,0 +1,206 @@ +#include "BleuDocScorer.h" + +#include <algorithm> +#include <cassert> +#include <cmath> +#include <climits> +#include <fstream> +#include <iostream> +#include <stdexcept> + +#include "util/check.hh" +#include "Ngram.h" +#include "Reference.h" +#include "Util.h" +#include "Vocabulary.h" + + +using namespace std; + +namespace +{ + +// configure regularisation +const char KEY_REFLEN[] = "reflen"; +const char REFLEN_AVERAGE[] = "average"; +const char REFLEN_SHORTEST[] = "shortest"; +const char REFLEN_CLOSEST[] = "closest"; + +} // namespace + +namespace MosesTuning +{ + + +BleuDocScorer::BleuDocScorer(const string& config) + : BleuScorer("BLEUDOC", config), + m_ref_length_type(CLOSEST) +{ + const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST); + if (reflen == REFLEN_AVERAGE) { + m_ref_length_type = AVERAGE; + } else if (reflen == REFLEN_SHORTEST) { + m_ref_length_type = SHORTEST; + } else if (reflen == REFLEN_CLOSEST) { + m_ref_length_type = CLOSEST; + } else { + throw runtime_error("Unknown reference length strategy: " + reflen); + } +} + +BleuDocScorer::~BleuDocScorer() {} + + +bool BleuDocScorer::OpenReferenceStream(istream* is, size_t file_id) +{ + if (is == NULL) return false; + + string line; + size_t doc_id = -1; + size_t sid = 0; + while (getline(*is, line)) { + + if (line.find("<doc docid") != std::string::npos) { // new document + doc_id++; + m_references.push_back(new ScopedVector<Reference>()); + sid = 0; + } + else if (line.find("<seg") != std::string::npos) { //new sentence + int start = line.find_first_of('>') + 1; + std::string trans = line.substr(start, line.find_last_of('<')-start); + trans = preprocessSentence(trans); + + if (file_id == 0) { + Reference* ref = new Reference; + m_references[doc_id]->push_back(ref); // Take ownership of the Reference object. + } + + if (m_references[doc_id]->size() <= sid) { + return false; + } + NgramCounts counts; + size_t length = CountNgrams(trans, counts, kBleuNgramOrder); + + //for any counts larger than those already there, merge them in + for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) { + const NgramCounts::Key& ngram = ci->first; + const NgramCounts::Value newcount = ci->second; + + NgramCounts::Value oldcount = 0; + m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount); + if (newcount > oldcount) { + m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount; + } + } + //add in the length + + m_references[doc_id]->get().at(sid)->push_back(length); + if (sid > 0 && sid % 100 == 0) { + TRACE_ERR("."); + } + ++sid; + } + } + return true; +} + +void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry) +{ + if (sid >= m_references.size()) { + stringstream msg; + msg << "Sentence id (" << sid << ") not found in reference set"; + throw runtime_error(msg.str()); + } + + std::vector<std::string> sentences = splitDoc(text); + + vector<ScoreStatsType> totStats(kBleuNgramOrder * 2 + 1); + + for (uint i=0; i<sentences.size(); ++i) { + + NgramCounts testcounts; + // stats for this line + vector<ScoreStatsType> stats(kBleuNgramOrder * 2); + string sentence = preprocessSentence(sentences[i]); + const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder); + + //precision on each ngram type + for (NgramCounts::const_iterator testcounts_it = testcounts.begin(); + testcounts_it != testcounts.end(); ++testcounts_it) { + const NgramCounts::Value guess = testcounts_it->second; + const size_t len = testcounts_it->first.size(); + NgramCounts::Value correct = 0; + + NgramCounts::Value v = 0; + if (m_references[sid]->get().at(i)->get_counts()->Lookup(testcounts_it->first, &v)) { + correct = min(v, guess); + } + stats[len * 2 - 2] += correct; + stats[len * 2 - 1] += guess; + } + + const int reference_len = CalcReferenceLength(sid, i, length); + stats.push_back(reference_len); + + //ADD stats to totStats + std::transform(stats.begin(), stats.end(), totStats.begin(), + totStats.begin(), std::plus<int>()); + } + entry.set(totStats); +} + +std::vector<std::string> BleuDocScorer::splitDoc(const std::string& text) +{ + std::vector<std::string> res; + + uint index = 0; + std::string::size_type end; + + while ((end = text.find(" \\n ", index)) != std::string::npos) { + res.push_back(text.substr(index,end-index)); + index = end + 4; + } + return res; +} + +statscore_t BleuDocScorer::calculateScore(const vector<int>& comps) const +{ + CHECK(comps.size() == kBleuNgramOrder * 2 + 1); + + float logbleu = 0.0; + for (int i = 0; i < kBleuNgramOrder; ++i) { + if (comps[2*i] == 0) { + return 0.0; + } + logbleu += log(comps[2*i]) - log(comps[2*i+1]); + + } + logbleu /= kBleuNgramOrder; + // reflength divided by test length + const float brevity = 1.0 - static_cast<float>(comps[kBleuNgramOrder * 2]) / comps[1]; + if (brevity < 0.0) { + logbleu += brevity; + } + return exp(logbleu); +} + +int BleuDocScorer::CalcReferenceLength(size_t doc_id, size_t sentence_id, size_t length) +{ + switch (m_ref_length_type) { + case AVERAGE: + return m_references[doc_id]->get().at(sentence_id)->CalcAverage(); + break; + case CLOSEST: + return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length); + break; + case SHORTEST: + return m_references[doc_id]->get().at(sentence_id)->CalcShortest(); + break; + default: + cerr << "unknown reference types." << endl; + exit(1); + } +} + +} + diff --git a/mert/BleuDocScorer.h b/mert/BleuDocScorer.h new file mode 100644 index 000000000..349745825 --- /dev/null +++ b/mert/BleuDocScorer.h @@ -0,0 +1,67 @@ +#ifndef MERT_BLEU_DOC_SCORER_H_ +#define MERT_BLEU_DOC_SCORER_H_ + +#include <ostream> +#include <string> +#include <vector> + +#include "Types.h" +#include "ScoreData.h" +#include "StatisticsBasedScorer.h" +#include "ScopedVector.h" +#include "BleuScorer.h" + +namespace MosesTuning +{ + +/** + * Bleu document scoring + * + * Needs xml reference files, and nbest lists where sentences are separated by '\n' + */ +class BleuDocScorer : public BleuScorer +{ +public: + + explicit BleuDocScorer(const std::string& config = ""); + ~BleuDocScorer(); + + virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry); + virtual statscore_t calculateScore(const std::vector<int>& comps) const; + + int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length); + + // NOTE: this function is used for unit testing. + virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id); + +private: + ReferenceLengthType m_ref_length_type; + + // reference translations. + ScopedVector<ScopedVector<Reference> > m_references; + + // no copying allowed + BleuDocScorer(const BleuDocScorer&); + BleuDocScorer& operator=(const BleuDocScorer&); + + std::vector<std::string> splitDoc(const std::string& text); +}; + +/* /\** Computes sentence-level BLEU+1 score. */ +/* * This function is used in PRO. */ +/* *\/ */ +/* float sentenceLevelBleuPlusOne(const std::vector<float>& stats); */ + +/* /\** Computes sentence-level BLEU score given a background corpus. */ +/* * This function is used in batch MIRA. */ +/* *\/ */ +/* float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg); */ + +/* /\** */ +/* * Computes plain old BLEU from a vector of stats */ +/* *\/ */ +/* float unsmoothedBleu(const std::vector<float>& stats); */ + +} + +#endif // MERT_BLEU_DOC_SCORER_H_ diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h index 248b3e1d1..92d7fb9d5 100644 --- a/mert/BleuScorer.h +++ b/mert/BleuScorer.h @@ -65,14 +65,18 @@ public: bool OpenReference(const char* filename, std::size_t file_id); // NOTE: this function is used for unit testing. - bool OpenReferenceStream(std::istream* is, std::size_t file_id); + virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id); -private: + //private: +protected: ReferenceLengthType m_ref_length_type; // reference translations. ScopedVector<Reference> m_references; + // constructor used by subclasses + BleuScorer(const std::string& name, const std::string& config): StatisticsBasedScorer(name,config) {} + // no copying allowed BleuScorer(const BleuScorer&); BleuScorer& operator=(const BleuScorer&); diff --git a/mert/Jamfile b/mert/Jamfile index 0ee32638e..bb4073f52 100644 --- a/mert/Jamfile +++ b/mert/Jamfile @@ -20,6 +20,7 @@ MiraWeightVector.cpp HypPackEnumerator.cpp Data.cpp BleuScorer.cpp +BleuDocScorer.cpp SemposScorer.cpp SemposOverlapping.cpp InterpolatedScorer.cpp diff --git a/mert/ScorerFactory.cpp b/mert/ScorerFactory.cpp index 02000c1bc..446ecb36b 100644 --- a/mert/ScorerFactory.cpp +++ b/mert/ScorerFactory.cpp @@ -3,6 +3,7 @@ #include <stdexcept> #include "Scorer.h" #include "BleuScorer.h" +#include "BleuDocScorer.h" #include "PerScorer.h" #include "TerScorer.h" #include "CderScorer.h" @@ -20,6 +21,7 @@ vector<string> ScorerFactory::getTypes() { vector<string> types; types.push_back(string("BLEU")); + types.push_back(string("BLEUDOC")); types.push_back(string("PER")); types.push_back(string("TER")); types.push_back(string("CDER")); @@ -34,6 +36,8 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config) { if (type == "BLEU") { return new BleuScorer(config); + } else if (type == "BLEUDOC") { + return new BleuDocScorer(config); } else if (type == "PER") { return new PerScorer(config); } else if (type == "TER") { |