Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/other-builds/moses/.project20
-rw-r--r--lm/CMakeLists.txt89
-rw-r--r--lm/builder/CMakeLists.txt44
-rw-r--r--lm/builder/adjust_counts.cc2
-rw-r--r--moses/FF/Factory.cpp2
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.cpp4
-rw-r--r--moses/FF/TargetPreferencesFeature.cpp408
-rw-r--r--moses/FF/TargetPreferencesFeature.h121
-rw-r--r--moses/Manager.cpp4
-rw-r--r--moses/PP/TargetPreferencesPhraseProperty.cpp123
-rw-r--r--moses/PP/TargetPreferencesPhraseProperty.h71
-rw-r--r--moses/parameters/ReportingOptions.cpp12
m---------regtest0
-rwxr-xr-xscripts/generic/score-parallel.perl2
-rwxr-xr-xscripts/training/train-model.perl2
-rw-r--r--util/CMakeLists.txt68
-rw-r--r--util/exception.cc20
-rw-r--r--util/exception.hh12
-rw-r--r--util/stream/CMakeLists.txt44
-rw-r--r--util/string_stream.hh21
20 files changed, 648 insertions, 421 deletions
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index e8651529d..32bfa1927 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1636,6 +1636,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetNgramFeature.h</locationURI>
</link>
<link>
+ <name>FF/TargetPreferencesFeature.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetPreferencesFeature.cpp</locationURI>
+ </link>
+ <link>
+ <name>FF/TargetPreferencesFeature.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetPreferencesFeature.h</locationURI>
+ </link>
+ <link>
<name>FF/TargetWordInsertionFeature.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetWordInsertionFeature.cpp</locationURI>
@@ -1996,6 +2006,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/SpanLengthPhraseProperty.h</locationURI>
</link>
<link>
+ <name>PP/TargetPreferencesPhraseProperty.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetPreferencesPhraseProperty.cpp</locationURI>
+ </link>
+ <link>
+ <name>PP/TargetPreferencesPhraseProperty.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetPreferencesPhraseProperty.h</locationURI>
+ </link>
+ <link>
<name>PP/TreeStructurePhraseProperty.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TreeStructurePhraseProperty.h</locationURI>
diff --git a/lm/CMakeLists.txt b/lm/CMakeLists.txt
index 5fca22c71..e3ef06f04 100644
--- a/lm/CMakeLists.txt
+++ b/lm/CMakeLists.txt
@@ -13,7 +13,7 @@ cmake_minimum_required(VERSION 2.8.8)
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
-set(KENLM_MAX_ORDER 6)
+set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order")
add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
@@ -64,76 +64,27 @@ set(EXE_LIST
build_binary
)
-# Iterate through the executable list
-foreach(exe ${EXE_LIST})
-
- # Compile the executable, linking against the requisite dependent object files
- add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
-
- # Link the executable against boost
- target_link_libraries(${exe} ${Boost_LIBRARIES} pthread)
-
- # Group executables together
- set_target_properties(${exe} PROPERTIES FOLDER executables)
-
-# End for loop
-endforeach(exe)
-
-
-# Install the executable files
-install(TARGETS ${EXE_LIST} DESTINATION bin)
-
+AddExes(EXES ${EXE_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread)
+# Conditionally build the interpolation code
+if(BUILD_INTERPOLATE)
+ add_subdirectory(interpolate)
+endif()
if(BUILD_TESTING)
- # Explicitly list the Boost test files to be compiled
- set(KENLM_BOOST_TESTS_LIST
- left_test
- model_test
- partial_test
- )
-
- # Iterate through the Boost tests list
- foreach(test ${KENLM_BOOST_TESTS_LIST})
-
- # Compile the executable, linking against the requisite dependent object files
- add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
-
- # Require the following compile flag
- set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
-
- # Link the executable against boost
- target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
-
- # model_test requires an extra command line parameter
- if ("${test}" STREQUAL "model_test")
- set(test_params
- ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
- ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa
- )
- else()
- set(test_params
- ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
- )
- endif()
-
- # Specify command arguments for how to run each unit test
- #
- # Assuming that foo was defined via add_executable(foo ...),
- # the syntax $<TARGET_FILE:foo> gives the full path to the executable.
- #
- add_test(NAME ${test}_test
- COMMAND $<TARGET_FILE:${test}> ${test_params})
-
- # Group unit tests together
- set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
-
- # End for loop
- endforeach(test)
-
+ set(KENLM_BOOST_TESTS_LIST left_test partial_test)
+ AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread
+ TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa)
+
+ # model_test requires an extra command line parameter
+ KenLMAddTest(TEST model_test
+ DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread
+ TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
+ ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa)
endif()
-
-
-
-
diff --git a/lm/builder/CMakeLists.txt b/lm/builder/CMakeLists.txt
index 01b415da2..cc0d3ed9f 100644
--- a/lm/builder/CMakeLists.txt
+++ b/lm/builder/CMakeLists.txt
@@ -52,36 +52,16 @@ set_target_properties(lmplz PROPERTIES FOLDER executables)
if(BUILD_TESTING)
- # Explicitly list the Boost test files to be compiled
- set(KENLM_BOOST_TESTS_LIST
- adjust_counts_test
- corpus_count_test
- )
-
- # Iterate through the Boost tests list
- foreach(test ${KENLM_BOOST_TESTS_LIST})
-
- # Compile the executable, linking against the requisite dependent object files
- add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
-
- # Require the following compile flag
- set_target_properties(${test} PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK")
-
- # Link the executable against boost
- target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
-
- # Specify command arguments for how to run each unit test
- #
- # Assuming that foo was defined via add_executable(foo ...),
- # the syntax $<TARGET_FILE:foo> gives the full path to the executable.
- #
- add_test(NAME ${test}_test
- COMMAND $<TARGET_FILE:${test}>)
-
- # Group unit tests together
- set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
-
- # End for loop
- endforeach(test)
-
+ # Explicitly list the Boost test files to be compiled
+ set(KENLM_BOOST_TESTS_LIST
+ adjust_counts_test
+ corpus_count_test
+ )
+
+ AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm>
+ $<TARGET_OBJECTS:kenlm_common>
+ $<TARGET_OBJECTS:kenlm_util>
+ $<TARGET_OBJECTS:kenlm_builder>
+ LIBRARIES ${Boost_LIBRARIES} pthread)
endif()
diff --git a/lm/builder/adjust_counts.cc b/lm/builder/adjust_counts.cc
index 3ac3e8d20..b4c5ba8b7 100644
--- a/lm/builder/adjust_counts.cc
+++ b/lm/builder/adjust_counts.cc
@@ -269,7 +269,7 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
std::size_t same = full->end() - 1 - different;
// STEP 1: Output all the n-grams that changed.
- for (; lower_valid >= &streams[same]; --lower_valid) {
+ for (; lower_valid >= streams.begin() + same; --lower_valid) {
uint64_t order_minus_1 = lower_valid - streams_begin;
if(actual_counts[order_minus_1] <= prune_thresholds_[order_minus_1])
(*lower_valid)->Value().Mark();
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 3435a6374..e44c5c509 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -42,6 +42,7 @@
#include "moses/FF/ControlRecombination.h"
#include "moses/FF/ConstrainedDecoding.h"
#include "moses/FF/SoftSourceSyntacticConstraintsFeature.h"
+#include "moses/FF/TargetPreferencesFeature.h"
#include "moses/FF/CoveredReferenceFeature.h"
#include "moses/FF/TreeStructureFeature.h"
#include "moses/FF/SoftMatchingFeature.h"
@@ -254,6 +255,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(CoveredReferenceFeature);
MOSES_FNAME(SourceGHKMTreeInputMatchFeature);
MOSES_FNAME(SoftSourceSyntacticConstraintsFeature);
+ MOSES_FNAME(TargetPreferencesFeature);
MOSES_FNAME(TreeStructureFeature);
MOSES_FNAME(SoftMatchingFeature);
MOSES_FNAME(DynamicCacheBasedLanguageModel);
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
index 73575f8b1..afba59b47 100644
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
@@ -193,7 +193,7 @@ void SoftSourceSyntacticConstraintsFeature::LoadLabelSet(std::string &filename,
if ( foundSourceLabelIndex != m_sourceLabels.end() ) {
labelSet.insert(foundSourceLabelIndex->second);
} else {
- FEATUREVERBOSE(2, "Ignoring unknown source label \"" << label << "\" "
+ FEATUREVERBOSE(2, "Ignoring undefined source label \"" << label << "\" "
<< "from core source label set file " << filename << "."
<< std::endl);
}
@@ -232,7 +232,7 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou
boost::unordered_map<std::string,size_t>::iterator foundSourceLabelIndex = m_sourceLabels.find( sourceLabel );
UTIL_THROW_IF2(foundSourceLabelIndex == m_sourceLabels.end(), GetScoreProducerDescription()
<< ": Target/source label joint count file " << m_targetSourceLHSJointCountFile
- << " contains unknown source label \"" << sourceLabel << "\".");
+ << " contains undefined source label \"" << sourceLabel << "\".");
const Factor* targetLabelFactor = factorCollection.AddFactor(targetLabel,true);
diff --git a/moses/FF/TargetPreferencesFeature.cpp b/moses/FF/TargetPreferencesFeature.cpp
new file mode 100644
index 000000000..4c79177af
--- /dev/null
+++ b/moses/FF/TargetPreferencesFeature.cpp
@@ -0,0 +1,408 @@
+#include <vector>
+#include <limits>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <assert.h>
+#include "TargetPreferencesFeature.h"
+#include "moses/StaticData.h"
+#include "moses/InputFileStream.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/Hypothesis.h"
+#include "moses/ChartHypothesis.h"
+#include "moses/ChartManager.h"
+#include "moses/FactorCollection.h"
+#include "moses/TreeInput.h"
+#include "moses/PP/TargetPreferencesPhraseProperty.h"
+
+
+using namespace std;
+
+namespace Moses
+{
+
+void TargetPreferencesFeatureState::AddProbabilityForLHSLabel(size_t label, double cost)
+{
+ std::pair< std::map<size_t,double>::iterator, bool > inserted =
+ m_probabilitiesForLHSLabels.insert(std::pair<size_t,double>(label,cost));
+ if ( !inserted.second ) {
+ (inserted.first)->second += cost;
+ }
+}
+
+void TargetPreferencesFeatureState::NormalizeProbabilitiesForLHSLabels(double denominator)
+{
+ for ( std::map<size_t,double>::iterator iter=m_probabilitiesForLHSLabels.begin();
+ iter!=m_probabilitiesForLHSLabels.end(); ++iter ) {
+ (iter->second) /= denominator;
+ }
+}
+
+double TargetPreferencesFeatureState::GetProbabilityForLHSLabel(size_t label, bool &isMatch) const
+{
+ std::map<size_t,double>::const_iterator iter = m_probabilitiesForLHSLabels.find(label);
+ if ( iter != m_probabilitiesForLHSLabels.end() ) {
+ isMatch = true;
+ return iter->second;
+ }
+ isMatch = false;
+ return 0;
+}
+
+size_t TargetPreferencesFeatureState::hash() const
+{
+ if (!m_distinguishStates) {
+ return 0;
+ }
+ size_t ret = 0;
+ boost::hash_combine(ret, m_probabilitiesForLHSLabels.size());
+ for (std::map<size_t,double>::const_iterator it=m_probabilitiesForLHSLabels.begin();
+ it!=m_probabilitiesForLHSLabels.end(); ++it) {
+ boost::hash_combine(ret, it->first);
+ }
+ return ret;
+};
+
+bool TargetPreferencesFeatureState::operator==(const FFState& other) const
+{
+ if (!m_distinguishStates) {
+ return true;
+ }
+
+ if (this == &other) {
+ return true;
+ }
+
+ const TargetPreferencesFeatureState* otherState =
+ dynamic_cast<const TargetPreferencesFeatureState*>(&other);
+ UTIL_THROW_IF2(otherState == NULL, "Wrong state type");
+
+ if (m_probabilitiesForLHSLabels.size() != (otherState->m_probabilitiesForLHSLabels).size()) {
+ return false;
+ }
+ std::map<size_t,double>::const_iterator thisIt, otherIt;
+ for (thisIt=m_probabilitiesForLHSLabels.begin(), otherIt=(otherState->m_probabilitiesForLHSLabels).begin();
+ thisIt!=m_probabilitiesForLHSLabels.end(); ++thisIt, ++otherIt) {
+ if (thisIt->first != otherIt->first) {
+ return false;
+ }
+ }
+ return true;
+};
+
+
+TargetPreferencesFeature::TargetPreferencesFeature(const std::string &line)
+ : StatefulFeatureFunction(2, line)
+ , m_featureVariant(0)
+ , m_distinguishStates(false)
+ , m_noMismatches(false)
+{
+ VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
+ ReadParameters();
+ VERBOSE(1, " Done." << std::endl);
+ VERBOSE(1, " Feature variant: " << m_featureVariant << "." << std::endl);
+}
+
+TargetPreferencesFeature::~TargetPreferencesFeature()
+{}
+
+void TargetPreferencesFeature::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "label-set-file") {
+ m_labelSetFile = value;
+ } else if (key == "unknown-word-labels-file") {
+ m_unknownLeftHandSideFile = value;
+ } else if (key == "variant") {
+ m_featureVariant = Scan<size_t>(value);
+ } else if (key == "distinguish-states") {
+ m_distinguishStates = Scan<bool>(value);
+ } else if (key == "no-mismatches") {
+ m_noMismatches = Scan<bool>(value);
+ } else {
+ StatefulFeatureFunction::SetParameter(key, value);
+ }
+}
+
+
+void TargetPreferencesFeature::Load(AllOptions::ptr const& opts)
+{
+ // don't change the loading order!
+ LoadLabelSet();
+ LoadUnknownLeftHandSideFile();
+}
+
+void TargetPreferencesFeature::LoadLabelSet()
+{
+ FEATUREVERBOSE(2, "Loading label set from file " << m_labelSetFile << " ...");
+ InputFileStream inFile(m_labelSetFile);
+
+ // read label set
+ std::string line;
+ m_labels.clear();
+ m_labelsByIndex.clear();
+ while (getline(inFile, line)) {
+ std::istringstream tokenizer(line);
+ std::string label;
+ size_t index;
+ try {
+ tokenizer >> label >> index;
+ } catch (const std::exception &e) {
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Error reading label set file " << m_labelSetFile << " .");
+ }
+ std::pair< boost::unordered_map<std::string,size_t>::iterator, bool > inserted = m_labels.insert( std::pair<std::string,size_t>(label,index) );
+ UTIL_THROW_IF2(!inserted.second, GetScoreProducerDescription()
+ << ": Label set file " << m_labelSetFile << " should contain each label only once.");
+
+ if (index >= m_labelsByIndex.size()) {
+ m_labelsByIndex.resize(index+1);
+ }
+ m_labelsByIndex[index] = label;
+ }
+
+ inFile.Close();
+
+ std::list<std::string> specialLabels;
+ specialLabels.push_back("GlueTop");
+ for (std::list<std::string>::const_iterator iter=specialLabels.begin();
+ iter!=specialLabels.end(); ++iter) {
+ boost::unordered_map<std::string,size_t>::iterator found = m_labels.find(*iter);
+ UTIL_THROW_IF2(found == m_labels.end(), GetScoreProducerDescription()
+ << ": Label set file " << m_labelSetFile << " should contain an entry for the special label \"" << *iter << "\".");
+ if (!(found->first).compare("GlueTop")) {
+ m_GlueTopLabel = found->second;
+ }
+ }
+ FEATUREVERBOSE2(2, " Done." << std::endl);
+}
+
+// Make sure to call this method _after_ LoadLabelSet()
+void TargetPreferencesFeature::LoadUnknownLeftHandSideFile()
+{
+ FEATUREVERBOSE(2, "Loading left-hand side labels for unknowns from file " << m_unknownLeftHandSideFile << std::endl);
+ InputFileStream inFile(m_unknownLeftHandSideFile);
+
+ // read left-hand side labels for unknowns
+ std::string line;
+ m_unknownLHSProbabilities.clear();
+ double countsSum = 0.0;
+ while (getline(inFile, line)) {
+ istringstream tokenizer(line);
+ std::string label;
+ double count;
+ tokenizer >> label;
+ tokenizer >> count;
+ boost::unordered_map<std::string,size_t>::iterator found = m_labels.find( label );
+ if ( found != m_labels.end() ) {
+ std::pair< std::map<size_t,double>::iterator, bool > inserted =
+ m_unknownLHSProbabilities.insert( std::pair<size_t,double>(found->second,count) );
+ if ( !inserted.second ) {
+ (inserted.first)->second += count;
+ }
+ countsSum += count;
+ } else {
+ FEATUREVERBOSE(1, "WARNING: undefined label \"" << label << "\" in file " << m_unknownLeftHandSideFile << std::endl);
+ }
+ }
+ // compute probabilities from counts
+ countsSum += (double)m_labels.size();
+ for (std::map<size_t,double>::iterator iter=m_unknownLHSProbabilities.begin();
+ iter!=m_unknownLHSProbabilities.end(); ++iter) {
+ iter->second /= countsSum;
+ }
+
+ IFFEATUREVERBOSE(3) {
+ for (std::map<size_t,double>::iterator iter=m_unknownLHSProbabilities.begin();
+ iter!=m_unknownLHSProbabilities.end(); ++iter) {
+ FEATUREVERBOSE(3, GetScoreProducerDescription() << "::LoadUnknownLeftHandSideFile(): " << iter->first << " " << iter->second << std::endl);
+ }
+ }
+
+ inFile.Close();
+}
+
+FFState* TargetPreferencesFeature::EvaluateWhenApplied(
+ const ChartHypothesis& hypo,
+ int featureID, // used to index the state in the previous hypotheses
+ ScoreComponentCollection* accumulator) const
+{
+ streamsize cerr_precision = std::cerr.precision();
+ std::cerr.precision(20); // TODO: remove. just for debug purposes.
+
+ // dense scores
+ std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 2
+
+ // state: used to store tree probabilities of partial hypotheses
+ // and access the respective tree probabilities of subderivations
+ TargetPreferencesFeatureState *state = new TargetPreferencesFeatureState(m_distinguishStates);
+
+ size_t nNTs = 1;
+ double overallTreeProbability = 0.0;
+ bool isGlueGrammarRule = false;
+
+ // read TargetPreferences property
+ const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
+
+ FEATUREVERBOSE(2, "Phrase: " << currTarPhr << std::endl);
+
+ if (const PhraseProperty *property = currTarPhr.GetProperty("TargetPreferences")) {
+
+ const TargetPreferencesPhraseProperty *targetPreferencesPhraseProperty = static_cast<const TargetPreferencesPhraseProperty*>(property);
+
+// IFFEATUREVERBOSE(2) {
+// const std::string *targetPreferencesPhrasePropertyValueString = targetPreferencesPhraseProperty->GetValueString();
+// if (targetPreferencesPhrasePropertyValueString) {
+// FEATUREVERBOSE(2, "PreferencesPhraseProperty " << *targetPreferencesPhrasePropertyValueString << std::endl);
+// } else {
+// FEATUREVERBOSE(2, "PreferencesPhraseProperty NULL" << std::endl);
+// }
+// }
+
+ nNTs = targetPreferencesPhraseProperty->GetNumberOfNonTerminals();
+ double totalCount = targetPreferencesPhraseProperty->GetTotalCount();
+
+ // get index map for underlying hypotheses
+ const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+ currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
+
+ // retrieve states from previous hypotheses, if any
+ std::vector< const TargetPreferencesFeatureState* > prevStatesByNonTerminal(nNTs-1);
+
+ if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
+ size_t nonTerminalNumber = 0;
+
+ for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
+ // consult rule for either word or non-terminal
+ const Word &word = currTarPhr.GetWord(phrasePos);
+ if ( word.IsNonTerminal() ) {
+ // non-terminal: consult subderivation
+ size_t nonTermIndex = nonTermIndexMap[phrasePos];
+ const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
+ const TargetPreferencesFeatureState* prevState =
+ static_cast<const TargetPreferencesFeatureState*>(prevHypo->GetFFState(featureID));
+ prevStatesByNonTerminal[nonTerminalNumber] = prevState;
+
+ IFFEATUREVERBOSE(2) {
+ // some log output that is not required in any way for the functionality
+ const std::map<size_t,double> &prevHypoTreeProbabilities =
+ prevStatesByNonTerminal[nonTerminalNumber]->GetProbabilitiesForLHSLabels();
+ FEATUREVERBOSE(2, "Previous tree probs:");
+ for (std::map<size_t,double>::const_iterator iter=prevHypoTreeProbabilities.begin();
+ iter!=prevHypoTreeProbabilities.end(); ++iter) {
+ FEATUREVERBOSE2(2, " " << m_labelsByIndex[iter->first] << " " << iter->second);
+ }
+ FEATUREVERBOSE2(2, std::endl);
+ }
+
+ ++nonTerminalNumber;
+ }
+ }
+ }
+
+ // inspect labelled rule items
+
+ overallTreeProbability = 0.0;
+
+ const std::list<TargetPreferencesPhrasePropertyItem> &targetPreferencesItems = targetPreferencesPhraseProperty->GetTargetPreferencesItems();
+
+ for (std::list<TargetPreferencesPhrasePropertyItem>::const_iterator targetPreferencesItem = targetPreferencesItems.begin();
+ targetPreferencesItem != targetPreferencesItems.end(); ++targetPreferencesItem) {
+
+ const std::list<size_t> &targetPreferencesRHS = targetPreferencesItem->GetTargetPreferencesRHS();
+ const std::list< std::pair<size_t,float> > &targetPreferencesLHSList = targetPreferencesItem->GetTargetPreferencesLHSList();
+
+ assert(targetPreferencesRHS.size() == nNTs-1);
+
+ size_t currentTargetLabelsMismatches = nNTs - 1;
+ double matchingLabelsProbabilityProduct = 1.0;
+
+ size_t nonTerminalNumber=0;
+ for (std::list<size_t>::const_iterator targetPreferencesRHSIt = targetPreferencesRHS.begin();
+ targetPreferencesRHSIt != targetPreferencesRHS.end(); ++targetPreferencesRHSIt, ++nonTerminalNumber) {
+
+ bool isLabelMatch = false;
+ double matchingLabelsProbability =
+ prevStatesByNonTerminal[nonTerminalNumber]->GetProbabilityForLHSLabel(*targetPreferencesRHSIt,
+ isLabelMatch);
+ matchingLabelsProbabilityProduct *= matchingLabelsProbability;
+
+ if ( isLabelMatch ) {
+ currentTargetLabelsMismatches -= 1;
+ }
+ }
+
+ FEATUREVERBOSE(2, "matchingLabelsProbabilityProduct = " << matchingLabelsProbabilityProduct << std::endl);
+
+ // LHS labels seen with this RHS
+ for (std::list< std::pair<size_t,float> >::const_iterator targetPreferencesLHSIt = targetPreferencesLHSList.begin();
+ targetPreferencesLHSIt != targetPreferencesLHSList.end(); ++targetPreferencesLHSIt) {
+
+ size_t targetPreferenceLHS = targetPreferencesLHSIt->first;
+
+ if ( targetPreferenceLHS == m_GlueTopLabel ) {
+ isGlueGrammarRule = true;
+ }
+
+ // proceed with the actual probability computations
+ double ruleTargetPreferenceCount = targetPreferencesLHSIt->second;
+ double ruleTargetPreferenceProbability = ruleTargetPreferenceCount / totalCount;
+
+ FEATUREVERBOSE(2, " ruleTargetPreferenceProbability = " << ruleTargetPreferenceProbability << std::endl);
+
+ double weightedTargetPreferenceRuleProbability = ruleTargetPreferenceProbability * matchingLabelsProbabilityProduct;
+ if ( weightedTargetPreferenceRuleProbability != 0 ) {
+ state->AddProbabilityForLHSLabel(targetPreferenceLHS, weightedTargetPreferenceRuleProbability);
+ }
+ overallTreeProbability += weightedTargetPreferenceRuleProbability;
+ }
+ }
+
+ IFFEATUREVERBOSE(2) {
+ FEATUREVERBOSE(2, "overallTreeProbability = " << overallTreeProbability);
+ if ( overallTreeProbability > 1.0001 ) { // account for some rounding error
+ FEATUREVERBOSE2(2, " -- WARNING: overallTreeProbability > 1");
+ }
+ FEATUREVERBOSE2(2, std::endl);
+ }
+
+ if ( overallTreeProbability != 0 ) {
+ UTIL_THROW_IF2(!boost::math::isnormal(overallTreeProbability), GetScoreProducerDescription()
+ << ": Oops. Numerical precision issues.");
+ state->NormalizeProbabilitiesForLHSLabels(overallTreeProbability);
+ }
+
+ } else {
+
+ // abort with error message if the phrase does not translate an unknown word
+ UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
+ << ": Missing TargetPreferences property. Please check phrase table and glue rules.");
+
+ // unknown word
+ overallTreeProbability = 1.0;
+
+ for (std::map<size_t,double>::const_iterator iter=m_unknownLHSProbabilities.begin();
+ iter!=m_unknownLHSProbabilities.end(); ++iter) {
+ // update state
+ state->AddProbabilityForLHSLabel(iter->first, iter->second);
+ }
+ }
+
+ FEATUREVERBOSE(2, "-> OVERALLTREEPROB = " << overallTreeProbability << std::endl);
+
+ // add scores
+
+ // tree probability (preference grammar style)
+ newScores[0] = (overallTreeProbability == 0 ? 0 : std::log(overallTreeProbability) );
+ if ( m_noMismatches && (overallTreeProbability == 0) && !isGlueGrammarRule ) {
+ newScores[0] = -std::numeric_limits<float>::infinity();
+ }
+ // tree mismatch penalty
+ // TODO: deactivate the tree mismatch penalty score component automatically if feature configuration parameter no-mismatches=true
+ newScores[1] = (overallTreeProbability == 0 ? 1 : 0 );
+
+ accumulator->PlusEquals(this, newScores);
+
+ std::cerr.precision(cerr_precision);
+ return state;
+}
+
+}
+
diff --git a/moses/FF/TargetPreferencesFeature.h b/moses/FF/TargetPreferencesFeature.h
new file mode 100644
index 000000000..3a5b444e9
--- /dev/null
+++ b/moses/FF/TargetPreferencesFeature.h
@@ -0,0 +1,121 @@
+#pragma once
+
+#include <string>
+#include <map>
+#include <iostream>
+#include <boost/unordered_map.hpp>
+#include "StatefulFeatureFunction.h"
+#include "FFState.h"
+#include "util/exception.hh"
+#include <stdint.h>
+
+namespace Moses
+{
+
+class TargetPreferencesFeatureState : public FFState
+{
+
+public:
+
+ TargetPreferencesFeatureState(bool distinguishStates)
+ : m_distinguishStates(distinguishStates)
+ {}
+
+ void AddProbabilityForLHSLabel(size_t label, double cost);
+
+ void NormalizeProbabilitiesForLHSLabels(double denominator);
+
+ const std::map<size_t,double> &GetProbabilitiesForLHSLabels() const {
+ return m_probabilitiesForLHSLabels;
+ }
+
+ double GetProbabilityForLHSLabel(size_t label, bool &isMatch) const;
+
+ size_t hash() const;
+
+ virtual bool operator==(const FFState& other) const;
+
+
+private:
+
+ const bool m_distinguishStates;
+ std::map<size_t,double> m_probabilitiesForLHSLabels;
+
+};
+
+
+class TargetPreferencesFeature : public StatefulFeatureFunction
+{
+
+public:
+
+ TargetPreferencesFeature(const std::string &line);
+
+ ~TargetPreferencesFeature();
+
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+ return new TargetPreferencesFeatureState(m_distinguishStates);
+ }
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+ void Load(AllOptions::ptr const& opts);
+
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
+ {};
+
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const
+ {};
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const
+ {}
+
+ FFState* EvaluateWhenApplied(
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const {
+ UTIL_THROW2(GetScoreProducerDescription() << ": feature currently not implemented for phrase-based decoding.");
+ return new TargetPreferencesFeatureState(m_distinguishStates);
+ };
+
+ FFState* EvaluateWhenApplied(
+ const ChartHypothesis& cur_hypo,
+ int featureID, // used to index the state in the previous hypotheses
+ ScoreComponentCollection* accumulator) const;
+
+
+private:
+
+ std::string m_labelSetFile;
+ std::string m_unknownLeftHandSideFile;
+ size_t m_featureVariant;
+ bool m_distinguishStates;
+ bool m_noMismatches;
+
+ mutable boost::unordered_map<std::string,size_t> m_labels;
+ mutable std::vector<std::string> m_labelsByIndex;
+ mutable size_t m_XRHSLabel;
+ mutable size_t m_XLHSLabel;
+ mutable size_t m_GlueTopLabel;
+ std::map<size_t,double> m_unknownLHSProbabilities;
+
+ void LoadLabelSet();
+ void LoadUnknownLeftHandSideFile();
+
+};
+
+}
+
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index e00457803..3650baabb 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -1723,8 +1723,8 @@ OutputSurface(std::ostream &out, Hypothesis const& edge, bool const recursive) c
out << *factor;
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
- UTIL_THROW_IF2(factor==NULL,"No factor "<<i<<" at position "<< pos);
- out << fd << *factor;
+ if (factor) out << fd << *factor;
+ else out << fd << UNKNOWN_FACTOR;
}
if(markUnknown && word.IsOOV()) {
diff --git a/moses/PP/TargetPreferencesPhraseProperty.cpp b/moses/PP/TargetPreferencesPhraseProperty.cpp
deleted file mode 100644
index 9358ee4bf..000000000
--- a/moses/PP/TargetPreferencesPhraseProperty.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-#include "moses/PP/TargetPreferencesPhraseProperty.h"
-#include <iostream>
-#include <cstdio>
-#include <cstdlib>
-#include <sstream>
-#include <string>
-#include <queue>
-#include <assert.h>
-#include <limits>
-
-namespace Moses
-{
-
-void TargetPreferencesPhraseProperty::ProcessValue(const std::string &value)
-{
- std::istringstream tokenizer(value);
-
- if (! (tokenizer >> m_nNTs)) { // first token: number of non-terminals (incl. left-hand side)
- UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of non-terminals. Flawed property?");
- }
- assert( m_nNTs > 0 );
-
- if (! (tokenizer >> m_totalCount)) { // second token: overall rule count
- UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read overall rule count. Flawed property?");
- }
- assert( m_totalCount > 0.0 );
-
-
- // read labelled rule items
-
- std::priority_queue<float> ruleLabelledCountsPQ;
-
- while (tokenizer.peek() != EOF) {
- try {
-
- TargetPreferencesPhrasePropertyItem item;
- size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
-
- if (m_nNTs == 1) {
-
- item.m_labelsRHSCount = m_totalCount;
-
- } else { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
-
- for (size_t i=0; i<m_nNTs-1; ++i) { // RHS non-terminal labels
- size_t labelRHS;
- if (! (tokenizer >> labelRHS) ) { // RHS non-terminal label
- UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side label index. Flawed property?");
- }
- item.m_labelsRHS.push_back(labelRHS);
- }
-
- if (! (tokenizer >> item.m_labelsRHSCount)) {
- UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side count. Flawed property?");
- }
-
- if (! (tokenizer >> numberOfLHSsGivenRHS)) {
- UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of left-hand sides. Flawed property?");
- }
- }
-
- for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS non-terminal labels seen with this RHS
- size_t labelLHS;
- if (! (tokenizer >> labelLHS)) { // LHS non-terminal label
- UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read left-hand side label index. Flawed property?");
- }
- float ruleLabelledCount;
- if (! (tokenizer >> ruleLabelledCount)) {
- UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read count. Flawed property?");
- }
- item.m_labelsLHSList.push_back( std::make_pair(labelLHS,ruleLabelledCount) );
- ruleLabelledCountsPQ.push(ruleLabelledCount);
- }
-
- m_labelItems.push_back(item);
-
- } catch (const std::exception &e) {
- UTIL_THROW2("TargetPreferencesPhraseProperty: Read error. Flawed property?");
- }
- }
-
- // keep only top N label vectors
- const size_t N=50;
-
- if (ruleLabelledCountsPQ.size() > N) {
-
- float topNRuleLabelledCount = std::numeric_limits<int>::max();
- for (size_t i=0; !ruleLabelledCountsPQ.empty() && i<N; ++i) {
- topNRuleLabelledCount = ruleLabelledCountsPQ.top();
- ruleLabelledCountsPQ.pop();
- }
-
- size_t nKept=0;
- std::list<TargetPreferencesPhrasePropertyItem>::iterator itemIter=m_labelItems.begin();
- while (itemIter!=m_labelItems.end()) {
- if (itemIter->m_labelsRHSCount < topNRuleLabelledCount) {
- itemIter = m_labelItems.erase(itemIter);
- } else {
- std::list< std::pair<size_t,float> >::iterator itemLHSIter=(itemIter->m_labelsLHSList).begin();
- while (itemLHSIter!=(itemIter->m_labelsLHSList).end()) {
- if (itemLHSIter->second < topNRuleLabelledCount) {
- itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter);
- } else {
- if (nKept >= N) {
- itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter,(itemIter->m_labelsLHSList).end());
- } else {
- ++nKept;
- ++itemLHSIter;
- }
- }
- }
- if ((itemIter->m_labelsLHSList).empty()) {
- itemIter = m_labelItems.erase(itemIter);
- } else {
- ++itemIter;
- }
- }
- }
- }
-};
-
-} // namespace Moses
-
diff --git a/moses/PP/TargetPreferencesPhraseProperty.h b/moses/PP/TargetPreferencesPhraseProperty.h
deleted file mode 100644
index 84ef9b3c5..000000000
--- a/moses/PP/TargetPreferencesPhraseProperty.h
+++ /dev/null
@@ -1,71 +0,0 @@
-
-#pragma once
-
-#include "moses/PP/PhraseProperty.h"
-#include "util/exception.hh"
-#include <string>
-#include <list>
-
-namespace Moses
-{
-
-class TargetPreferencesPhrasePropertyItem
-{
- friend class TargetPreferencesPhraseProperty;
-
-public:
- TargetPreferencesPhrasePropertyItem() {};
-
- float GetTargetPreferencesRHSCount() const {
- return m_labelsRHSCount;
- };
-
- const std::list<size_t> &GetTargetPreferencesRHS() const {
- return m_labelsRHS;
- };
-
- const std::list< std::pair<size_t,float> > &GetTargetPreferencesLHSList() const {
- return m_labelsLHSList;
- };
-
-private:
- float m_labelsRHSCount;
- std::list<size_t> m_labelsRHS; // should be of size nNTs-1 (empty if initial rule, i.e. no right-hand side non-terminals)
- std::list< std::pair<size_t,float> > m_labelsLHSList; // list of left-hand sides for this right-hand side, with counts
-};
-
-
-class TargetPreferencesPhraseProperty : public PhraseProperty
-{
-public:
- TargetPreferencesPhraseProperty() {};
-
- virtual void ProcessValue(const std::string &value);
-
- size_t GetNumberOfNonTerminals() const {
- return m_nNTs;
- }
-
- float GetTotalCount() const {
- return m_totalCount;
- }
-
- const std::list<TargetPreferencesPhrasePropertyItem> &GetTargetPreferencesItems() const {
- return m_labelItems;
- };
-
- virtual const std::string *GetValueString() const {
- UTIL_THROW2("TargetPreferencesPhraseProperty: value string not available in this phrase property");
- return NULL;
- };
-
-protected:
-
- size_t m_nNTs;
- float m_totalCount;
-
- std::list<TargetPreferencesPhrasePropertyItem> m_labelItems;
-};
-
-} // namespace Moses
-
diff --git a/moses/parameters/ReportingOptions.cpp b/moses/parameters/ReportingOptions.cpp
index d7d3cc64b..210950a3c 100644
--- a/moses/parameters/ReportingOptions.cpp
+++ b/moses/parameters/ReportingOptions.cpp
@@ -92,15 +92,17 @@ namespace Moses {
}
}
- params= param.GetParam("output-factors");
- if (params) factor_order = Scan<FactorType>(*params);
- if (factor_order.empty()) factor_order.assign(1,0);
if (ReportAllFactors) {
- for (size_t i = 1; i < MAX_NUM_FACTORS; ++i)
+ factor_order.clear();
+ for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
factor_order.push_back(i);
+ } else {
+ params= param.GetParam("output-factors");
+ if (params) factor_order = Scan<FactorType>(*params);
+ if (factor_order.empty()) factor_order.assign(1,0);
}
-
+
param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|"));
param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter);
diff --git a/regtest b/regtest
-Subproject bbea49d71c5b9835d9a777a82085e57a33a0bcf
+Subproject 0f892797ae03b37f7bf4470b172de83736bce95
diff --git a/scripts/generic/score-parallel.perl b/scripts/generic/score-parallel.perl
index edf91e0cd..48f29c627 100755
--- a/scripts/generic/score-parallel.perl
+++ b/scripts/generic/score-parallel.perl
@@ -314,7 +314,7 @@ if (!$inverse && defined($partsOfSpeechFile))
# merge target syntactic preferences labels files
if (!$inverse && defined($targetSyntacticPreferencesLabelsFile))
{
- my $cmd = "(echo \"GlueTop 0\"; echo \"GlueX 1\"; cat $TMPDIR/phrase-table.half.*.gz.syntaxLabels.tgtpref | LC_ALL=C sort | uniq | perl -pe \"s/\$/ \@{[\$.+3]}/\") > $targetSyntacticPreferencesLabelsFile";
+ my $cmd = "(echo \"GlueTop 0\"; echo \"GlueX 1\"; cat $TMPDIR/phrase-table.half.*.gz.syntaxLabels.tgtpref | LC_ALL=C sort | uniq | perl -pe \"s/\$/ \@{[\$.+1]}/\") > $targetSyntacticPreferencesLabelsFile";
print STDERR "Merging target syntactic preferences labels files: $cmd \n";
`$cmd`;
}
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index a2cf8b6f9..9c52235db 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -2378,7 +2378,7 @@ sub create_ini {
print INI "PhraseOrientationFeature";
# find the label of the left-hand side non-terminal in glue rules (target non-terminal set)
my $TOPLABEL = `head -n 1 $___GLUE_GRAMMAR_FILE`;
- $TOPLABEL =~ s/.* \|\|\| .* \[(.*)\] \|\|\| .*/\1/;
+ $TOPLABEL =~ s/.* \|\|\| .* \[(.*)\] \|\|\| .*/$1/;
chomp($TOPLABEL);
print INI " glue-label=$TOPLABEL\n";
}
diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt
index 6f7f5e99b..8a544aa07 100644
--- a/util/CMakeLists.txt
+++ b/util/CMakeLists.txt
@@ -58,52 +58,24 @@ add_library(kenlm_util OBJECT ${KENLM_UTIL_DOUBLECONVERSION_SOURCE} ${KENLM_UTIL
# Only compile and run unit tests if tests should be run
if(BUILD_TESTING)
- # Explicitly list the Boost test files to be compiled
- set(KENLM_BOOST_TESTS_LIST
- bit_packing_test
- file_piece_test
- joint_sort_test
- multi_intersection_test
- probing_hash_table_test
- read_compressed_test
- sorted_uniform_test
- tokenize_piece_test
- )
-
- # Iterate through the Boost tests list
- foreach(test ${KENLM_BOOST_TESTS_LIST})
-
- # Compile the executable, linking against the requisite dependent object files
- add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm_util>)
-
- # Require the following compile flag
- set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
-
- # Link the executable against boost
- target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
-
- # file_piece_test requires an extra command line parameter
- if ("${test}" STREQUAL "file_piece_test")
- set(test_params
- ${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc
- )
- else()
- set(test_params
- )
- endif()
-
- # Specify command arguments for how to run each unit test
- #
- # Assuming that foo was defined via add_executable(foo ...),
- # the syntax $<TARGET_FILE:foo> gives the full path to the executable.
- #
- add_test(NAME ${test}_test
- COMMAND $<TARGET_FILE:${test}> ${test_params})
-
- # Group unit tests together
- set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
-
- # End for loop
- endforeach(test)
-
+ # Explicitly list the Boost test files to be compiled
+ set(KENLM_BOOST_TESTS_LIST
+ bit_packing_test
+ joint_sort_test
+ multi_intersection_test
+ probing_hash_table_test
+ read_compressed_test
+ sorted_uniform_test
+ tokenize_piece_test
+ )
+
+ AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread)
+
+ # file_piece_test requires an extra command line parameter
+ KenLMAddTest(TEST file_piece_test
+ DEPENDS $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread
+ TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc)
endif()
diff --git a/util/exception.cc b/util/exception.cc
index 5ba06f065..01ff9a672 100644
--- a/util/exception.cc
+++ b/util/exception.cc
@@ -24,25 +24,23 @@ void Exception::SetLocation(const char *file, unsigned int line, const char *fun
* them down.
*/
std::string old_text;
- std::swap(old_text, what_);
- StringStream stream;
- stream << what_;
- stream << file << ':' << line;
- if (func) stream << " in " << func << " threw ";
+ what_.swap(old_text);
+ what_ << file << ':' << line;
+ if (func) what_ << " in " << func << " threw ";
if (child_name) {
- stream << child_name;
+ what_ << child_name;
} else {
#ifdef __GXX_RTTI
- stream << typeid(this).name();
+ what_ << typeid(this).name();
#else
- stream << "an exception";
+ what_ << "an exception";
#endif
}
if (condition) {
- stream << " because `" << condition << '\'';
+ what_ << " because `" << condition << '\'';
}
- stream << ".\n";
- stream << old_text;
+ what_ << ".\n";
+ what_ << old_text;
}
namespace {
diff --git a/util/exception.hh b/util/exception.hh
index 00207b242..b30183e7f 100644
--- a/util/exception.hh
+++ b/util/exception.hh
@@ -8,7 +8,7 @@
#include <string>
#include <stdint.h>
-// TODO(hieu) delete this
+// TODO(hieu): delete this
#include <sstream>
namespace util {
@@ -20,7 +20,7 @@ class Exception : public std::exception {
Exception() throw();
virtual ~Exception() throw();
- const char *what() const throw() { return what_.c_str(); }
+ const char *what() const throw() { return what_.str().c_str(); }
// For use by the UTIL_THROW macros.
void SetLocation(
@@ -38,7 +38,7 @@ class Exception : public std::exception {
typedef T Identity;
};
- std::string what_;
+ StringStream what_;
};
/* This implements the normal operator<< for Exception and all its children.
@@ -46,12 +46,10 @@ class Exception : public std::exception {
* boost::enable_if.
*/
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
- // TODO(hieu): change this to
- // StringStream(e.what_) << data;
-
+ // TODO(hieu): delete this.
std::stringstream moses_hack;
moses_hack << data;
- e.what_ += moses_hack.str();
+ e.what_ << moses_hack.str();
return e;
}
diff --git a/util/stream/CMakeLists.txt b/util/stream/CMakeLists.txt
index 3e47f73e6..0c4c115dd 100644
--- a/util/stream/CMakeLists.txt
+++ b/util/stream/CMakeLists.txt
@@ -37,38 +37,14 @@ set(KENLM_UTIL_STREAM_SOURCE
if(BUILD_TESTING)
-
- # Explicitly list the Boost test files to be compiled
- set(KENLM_BOOST_TESTS_LIST
- io_test
- sort_test
- stream_test
- )
-
- # Iterate through the Boost tests list
- foreach(test ${KENLM_BOOST_TESTS_LIST})
-
- # Compile the executable, linking against the requisite dependent object files
- add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm_util>)
-
- # Require the following compile flag
- set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
-
- # Link the executable against boost
- target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
-
- # Specify command arguments for how to run each unit test
- #
- # Assuming that foo was defined via add_executable(foo ...),
- # the syntax $<TARGET_FILE:foo> gives the full path to the executable.
- #
- add_test(NAME ${test}_test
- COMMAND $<TARGET_FILE:${test}>)
-
- # Group unit tests together
- set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
-
- # End for loop
- endforeach(test)
-
+ # Explicitly list the Boost test files to be compiled
+ set(KENLM_BOOST_TESTS_LIST
+ io_test
+ sort_test
+ stream_test
+ )
+
+ AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread)
endif()
diff --git a/util/string_stream.hh b/util/string_stream.hh
index ee76a7a57..28fdd4219 100644
--- a/util/string_stream.hh
+++ b/util/string_stream.hh
@@ -10,14 +10,8 @@ namespace util {
class StringStream : public FakeOStream<StringStream> {
public:
- // Semantics: appends to string. Remember to clear first!
-
- explicit StringStream()
- {}
- /*
- explicit StringStream(std::string &out)
- : out_(out) {}
- */
+ StringStream() {}
+
StringStream &flush() { return *this; }
StringStream &write(const void *data, std::size_t length) {
@@ -25,12 +19,11 @@ class StringStream : public FakeOStream<StringStream> {
return *this;
}
- const std::string &str() const
- { return out_; }
- void str(const std::string &val)
- {
- out_ = val;
- }
+ const std::string &str() const { return out_; }
+
+ void str(const std::string &val) { out_ = val; }
+
+ void swap(std::string &str) { std::swap(out_, str); }
protected:
friend class FakeOStream<StringStream>;