diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2016-01-13 17:57:20 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2016-01-13 17:57:20 +0300 |
commit | 38f999fa3f8f3f91ae8c898681a00ab6b0fdf539 (patch) | |
tree | e98f5456ef875f0f29a04681050475c54ca88325 | |
parent | 74285a4db3dd346a6127ea5375dd03e78ef743c3 (diff) | |
parent | e0fb456dbb3296bb1523d7a48b5f627322e33a4b (diff) |
Merge ../mosesdecoder into perf_moses2
-rw-r--r-- | contrib/other-builds/moses/.project | 20 | ||||
-rw-r--r-- | lm/CMakeLists.txt | 89 | ||||
-rw-r--r-- | lm/builder/CMakeLists.txt | 44 | ||||
-rw-r--r-- | lm/builder/adjust_counts.cc | 2 | ||||
-rw-r--r-- | moses/FF/Factory.cpp | 2 | ||||
-rw-r--r-- | moses/FF/SoftSourceSyntacticConstraintsFeature.cpp | 4 | ||||
-rw-r--r-- | moses/FF/TargetPreferencesFeature.cpp | 408 | ||||
-rw-r--r-- | moses/FF/TargetPreferencesFeature.h | 121 | ||||
-rw-r--r-- | moses/Manager.cpp | 4 | ||||
-rw-r--r-- | moses/PP/TargetPreferencesPhraseProperty.cpp | 123 | ||||
-rw-r--r-- | moses/PP/TargetPreferencesPhraseProperty.h | 71 | ||||
-rw-r--r-- | moses/parameters/ReportingOptions.cpp | 12 | ||||
m--------- | regtest | 0 | ||||
-rwxr-xr-x | scripts/generic/score-parallel.perl | 2 | ||||
-rwxr-xr-x | scripts/training/train-model.perl | 2 | ||||
-rw-r--r-- | util/CMakeLists.txt | 68 | ||||
-rw-r--r-- | util/exception.cc | 20 | ||||
-rw-r--r-- | util/exception.hh | 12 | ||||
-rw-r--r-- | util/stream/CMakeLists.txt | 44 | ||||
-rw-r--r-- | util/string_stream.hh | 21 |
20 files changed, 648 insertions, 421 deletions
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index e8651529d..32bfa1927 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -1636,6 +1636,16 @@ <locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetNgramFeature.h</locationURI> </link> <link> + <name>FF/TargetPreferencesFeature.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetPreferencesFeature.cpp</locationURI> + </link> + <link> + <name>FF/TargetPreferencesFeature.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetPreferencesFeature.h</locationURI> + </link> + <link> <name>FF/TargetWordInsertionFeature.cpp</name> <type>1</type> <locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetWordInsertionFeature.cpp</locationURI> @@ -1996,6 +2006,16 @@ <locationURI>PARENT-3-PROJECT_LOC/moses/PP/SpanLengthPhraseProperty.h</locationURI> </link> <link> + <name>PP/TargetPreferencesPhraseProperty.cpp</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetPreferencesPhraseProperty.cpp</locationURI> + </link> + <link> + <name>PP/TargetPreferencesPhraseProperty.h</name> + <type>1</type> + <locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetPreferencesPhraseProperty.h</locationURI> + </link> + <link> <name>PP/TreeStructurePhraseProperty.h</name> <type>1</type> <locationURI>PARENT-3-PROJECT_LOC/moses/PP/TreeStructurePhraseProperty.h</locationURI> diff --git a/lm/CMakeLists.txt b/lm/CMakeLists.txt index 5fca22c71..e3ef06f04 100644 --- a/lm/CMakeLists.txt +++ b/lm/CMakeLists.txt @@ -13,7 +13,7 @@ cmake_minimum_required(VERSION 2.8.8) # This CMake file was created by Lane Schwartz <dowobeha@gmail.com> -set(KENLM_MAX_ORDER 6) +set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order") add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER}) @@ -64,76 +64,27 @@ set(EXE_LIST build_binary ) -# Iterate through the executable list -foreach(exe ${EXE_LIST}) - - # Compile the executable, linking against the requisite dependent object files - add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>) - - # Link the executable against boost - target_link_libraries(${exe} ${Boost_LIBRARIES} pthread) - - # Group executables together - set_target_properties(${exe} PROPERTIES FOLDER executables) - -# End for loop -endforeach(exe) - - -# Install the executable files -install(TARGETS ${EXE_LIST} DESTINATION bin) - +AddExes(EXES ${EXE_LIST} + DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util> + LIBRARIES ${Boost_LIBRARIES} pthread) +# Conditionally build the interpolation code +if(BUILD_INTERPOLATE) + add_subdirectory(interpolate) +endif() if(BUILD_TESTING) - # Explicitly list the Boost test files to be compiled - set(KENLM_BOOST_TESTS_LIST - left_test - model_test - partial_test - ) - - # Iterate through the Boost tests list - foreach(test ${KENLM_BOOST_TESTS_LIST}) - - # Compile the executable, linking against the requisite dependent object files - add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>) - - # Require the following compile flag - set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK) - - # Link the executable against boost - target_link_libraries(${test} ${Boost_LIBRARIES} pthread) - - # model_test requires an extra command line parameter - if ("${test}" STREQUAL "model_test") - set(test_params - ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa - ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa - ) - else() - set(test_params - ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa - ) - endif() - - # Specify command arguments for how to run each unit test - # - # Assuming that foo was defined via add_executable(foo ...), - # the syntax $<TARGET_FILE:foo> gives the full path to the executable. - # - add_test(NAME ${test}_test - COMMAND $<TARGET_FILE:${test}> ${test_params}) - - # Group unit tests together - set_target_properties(${test} PROPERTIES FOLDER "unit_tests") - - # End for loop - endforeach(test) - + set(KENLM_BOOST_TESTS_LIST left_test partial_test) + AddTests(TESTS ${KENLM_BOOST_TESTS_LIST} + DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util> + LIBRARIES ${Boost_LIBRARIES} pthread + TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa) + + # model_test requires an extra command line parameter + KenLMAddTest(TEST model_test + DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util> + LIBRARIES ${Boost_LIBRARIES} pthread + TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa + ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa) endif() - - - - diff --git a/lm/builder/CMakeLists.txt b/lm/builder/CMakeLists.txt index 01b415da2..cc0d3ed9f 100644 --- a/lm/builder/CMakeLists.txt +++ b/lm/builder/CMakeLists.txt @@ -52,36 +52,16 @@ set_target_properties(lmplz PROPERTIES FOLDER executables) if(BUILD_TESTING) - # Explicitly list the Boost test files to be compiled - set(KENLM_BOOST_TESTS_LIST - adjust_counts_test - corpus_count_test - ) - - # Iterate through the Boost tests list - foreach(test ${KENLM_BOOST_TESTS_LIST}) - - # Compile the executable, linking against the requisite dependent object files - add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>) - - # Require the following compile flag - set_target_properties(${test} PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK") - - # Link the executable against boost - target_link_libraries(${test} ${Boost_LIBRARIES} pthread) - - # Specify command arguments for how to run each unit test - # - # Assuming that foo was defined via add_executable(foo ...), - # the syntax $<TARGET_FILE:foo> gives the full path to the executable. - # - add_test(NAME ${test}_test - COMMAND $<TARGET_FILE:${test}>) - - # Group unit tests together - set_target_properties(${test} PROPERTIES FOLDER "unit_tests") - - # End for loop - endforeach(test) - + # Explicitly list the Boost test files to be compiled + set(KENLM_BOOST_TESTS_LIST + adjust_counts_test + corpus_count_test + ) + + AddTests(TESTS ${KENLM_BOOST_TESTS_LIST} + DEPENDS $<TARGET_OBJECTS:kenlm> + $<TARGET_OBJECTS:kenlm_common> + $<TARGET_OBJECTS:kenlm_util> + $<TARGET_OBJECTS:kenlm_builder> + LIBRARIES ${Boost_LIBRARIES} pthread) endif() diff --git a/lm/builder/adjust_counts.cc b/lm/builder/adjust_counts.cc index 3ac3e8d20..b4c5ba8b7 100644 --- a/lm/builder/adjust_counts.cc +++ b/lm/builder/adjust_counts.cc @@ -269,7 +269,7 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) { std::size_t same = full->end() - 1 - different; // STEP 1: Output all the n-grams that changed. - for (; lower_valid >= &streams[same]; --lower_valid) { + for (; lower_valid >= streams.begin() + same; --lower_valid) { uint64_t order_minus_1 = lower_valid - streams_begin; if(actual_counts[order_minus_1] <= prune_thresholds_[order_minus_1]) (*lower_valid)->Value().Mark(); diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 3435a6374..e44c5c509 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -42,6 +42,7 @@ #include "moses/FF/ControlRecombination.h" #include "moses/FF/ConstrainedDecoding.h" #include "moses/FF/SoftSourceSyntacticConstraintsFeature.h" +#include "moses/FF/TargetPreferencesFeature.h" #include "moses/FF/CoveredReferenceFeature.h" #include "moses/FF/TreeStructureFeature.h" #include "moses/FF/SoftMatchingFeature.h" @@ -254,6 +255,7 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(CoveredReferenceFeature); MOSES_FNAME(SourceGHKMTreeInputMatchFeature); MOSES_FNAME(SoftSourceSyntacticConstraintsFeature); + MOSES_FNAME(TargetPreferencesFeature); MOSES_FNAME(TreeStructureFeature); MOSES_FNAME(SoftMatchingFeature); MOSES_FNAME(DynamicCacheBasedLanguageModel); diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp index 73575f8b1..afba59b47 100644 --- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp +++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp @@ -193,7 +193,7 @@ void SoftSourceSyntacticConstraintsFeature::LoadLabelSet(std::string &filename, if ( foundSourceLabelIndex != m_sourceLabels.end() ) { labelSet.insert(foundSourceLabelIndex->second); } else { - FEATUREVERBOSE(2, "Ignoring unknown source label \"" << label << "\" " + FEATUREVERBOSE(2, "Ignoring undefined source label \"" << label << "\" " << "from core source label set file " << filename << "." << std::endl); } @@ -232,7 +232,7 @@ void SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCou boost::unordered_map<std::string,size_t>::iterator foundSourceLabelIndex = m_sourceLabels.find( sourceLabel ); UTIL_THROW_IF2(foundSourceLabelIndex == m_sourceLabels.end(), GetScoreProducerDescription() << ": Target/source label joint count file " << m_targetSourceLHSJointCountFile - << " contains unknown source label \"" << sourceLabel << "\"."); + << " contains undefined source label \"" << sourceLabel << "\"."); const Factor* targetLabelFactor = factorCollection.AddFactor(targetLabel,true); diff --git a/moses/FF/TargetPreferencesFeature.cpp b/moses/FF/TargetPreferencesFeature.cpp new file mode 100644 index 000000000..4c79177af --- /dev/null +++ b/moses/FF/TargetPreferencesFeature.cpp @@ -0,0 +1,408 @@ +#include <vector> +#include <limits> +#include <boost/math/special_functions/fpclassify.hpp> +#include <assert.h> +#include "TargetPreferencesFeature.h" +#include "moses/StaticData.h" +#include "moses/InputFileStream.h" +#include "moses/ScoreComponentCollection.h" +#include "moses/Hypothesis.h" +#include "moses/ChartHypothesis.h" +#include "moses/ChartManager.h" +#include "moses/FactorCollection.h" +#include "moses/TreeInput.h" +#include "moses/PP/TargetPreferencesPhraseProperty.h" + + +using namespace std; + +namespace Moses +{ + +void TargetPreferencesFeatureState::AddProbabilityForLHSLabel(size_t label, double cost) +{ + std::pair< std::map<size_t,double>::iterator, bool > inserted = + m_probabilitiesForLHSLabels.insert(std::pair<size_t,double>(label,cost)); + if ( !inserted.second ) { + (inserted.first)->second += cost; + } +} + +void TargetPreferencesFeatureState::NormalizeProbabilitiesForLHSLabels(double denominator) +{ + for ( std::map<size_t,double>::iterator iter=m_probabilitiesForLHSLabels.begin(); + iter!=m_probabilitiesForLHSLabels.end(); ++iter ) { + (iter->second) /= denominator; + } +} + +double TargetPreferencesFeatureState::GetProbabilityForLHSLabel(size_t label, bool &isMatch) const +{ + std::map<size_t,double>::const_iterator iter = m_probabilitiesForLHSLabels.find(label); + if ( iter != m_probabilitiesForLHSLabels.end() ) { + isMatch = true; + return iter->second; + } + isMatch = false; + return 0; +} + +size_t TargetPreferencesFeatureState::hash() const +{ + if (!m_distinguishStates) { + return 0; + } + size_t ret = 0; + boost::hash_combine(ret, m_probabilitiesForLHSLabels.size()); + for (std::map<size_t,double>::const_iterator it=m_probabilitiesForLHSLabels.begin(); + it!=m_probabilitiesForLHSLabels.end(); ++it) { + boost::hash_combine(ret, it->first); + } + return ret; +}; + +bool TargetPreferencesFeatureState::operator==(const FFState& other) const +{ + if (!m_distinguishStates) { + return true; + } + + if (this == &other) { + return true; + } + + const TargetPreferencesFeatureState* otherState = + dynamic_cast<const TargetPreferencesFeatureState*>(&other); + UTIL_THROW_IF2(otherState == NULL, "Wrong state type"); + + if (m_probabilitiesForLHSLabels.size() != (otherState->m_probabilitiesForLHSLabels).size()) { + return false; + } + std::map<size_t,double>::const_iterator thisIt, otherIt; + for (thisIt=m_probabilitiesForLHSLabels.begin(), otherIt=(otherState->m_probabilitiesForLHSLabels).begin(); + thisIt!=m_probabilitiesForLHSLabels.end(); ++thisIt, ++otherIt) { + if (thisIt->first != otherIt->first) { + return false; + } + } + return true; +}; + + +TargetPreferencesFeature::TargetPreferencesFeature(const std::string &line) + : StatefulFeatureFunction(2, line) + , m_featureVariant(0) + , m_distinguishStates(false) + , m_noMismatches(false) +{ + VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ..."); + ReadParameters(); + VERBOSE(1, " Done." << std::endl); + VERBOSE(1, " Feature variant: " << m_featureVariant << "." << std::endl); +} + +TargetPreferencesFeature::~TargetPreferencesFeature() +{} + +void TargetPreferencesFeature::SetParameter(const std::string& key, const std::string& value) +{ + if (key == "label-set-file") { + m_labelSetFile = value; + } else if (key == "unknown-word-labels-file") { + m_unknownLeftHandSideFile = value; + } else if (key == "variant") { + m_featureVariant = Scan<size_t>(value); + } else if (key == "distinguish-states") { + m_distinguishStates = Scan<bool>(value); + } else if (key == "no-mismatches") { + m_noMismatches = Scan<bool>(value); + } else { + StatefulFeatureFunction::SetParameter(key, value); + } +} + + +void TargetPreferencesFeature::Load(AllOptions::ptr const& opts) +{ + // don't change the loading order! + LoadLabelSet(); + LoadUnknownLeftHandSideFile(); +} + +void TargetPreferencesFeature::LoadLabelSet() +{ + FEATUREVERBOSE(2, "Loading label set from file " << m_labelSetFile << " ..."); + InputFileStream inFile(m_labelSetFile); + + // read label set + std::string line; + m_labels.clear(); + m_labelsByIndex.clear(); + while (getline(inFile, line)) { + std::istringstream tokenizer(line); + std::string label; + size_t index; + try { + tokenizer >> label >> index; + } catch (const std::exception &e) { + UTIL_THROW2(GetScoreProducerDescription() + << ": Error reading label set file " << m_labelSetFile << " ."); + } + std::pair< boost::unordered_map<std::string,size_t>::iterator, bool > inserted = m_labels.insert( std::pair<std::string,size_t>(label,index) ); + UTIL_THROW_IF2(!inserted.second, GetScoreProducerDescription() + << ": Label set file " << m_labelSetFile << " should contain each label only once."); + + if (index >= m_labelsByIndex.size()) { + m_labelsByIndex.resize(index+1); + } + m_labelsByIndex[index] = label; + } + + inFile.Close(); + + std::list<std::string> specialLabels; + specialLabels.push_back("GlueTop"); + for (std::list<std::string>::const_iterator iter=specialLabels.begin(); + iter!=specialLabels.end(); ++iter) { + boost::unordered_map<std::string,size_t>::iterator found = m_labels.find(*iter); + UTIL_THROW_IF2(found == m_labels.end(), GetScoreProducerDescription() + << ": Label set file " << m_labelSetFile << " should contain an entry for the special label \"" << *iter << "\"."); + if (!(found->first).compare("GlueTop")) { + m_GlueTopLabel = found->second; + } + } + FEATUREVERBOSE2(2, " Done." << std::endl); +} + +// Make sure to call this method _after_ LoadLabelSet() +void TargetPreferencesFeature::LoadUnknownLeftHandSideFile() +{ + FEATUREVERBOSE(2, "Loading left-hand side labels for unknowns from file " << m_unknownLeftHandSideFile << std::endl); + InputFileStream inFile(m_unknownLeftHandSideFile); + + // read left-hand side labels for unknowns + std::string line; + m_unknownLHSProbabilities.clear(); + double countsSum = 0.0; + while (getline(inFile, line)) { + istringstream tokenizer(line); + std::string label; + double count; + tokenizer >> label; + tokenizer >> count; + boost::unordered_map<std::string,size_t>::iterator found = m_labels.find( label ); + if ( found != m_labels.end() ) { + std::pair< std::map<size_t,double>::iterator, bool > inserted = + m_unknownLHSProbabilities.insert( std::pair<size_t,double>(found->second,count) ); + if ( !inserted.second ) { + (inserted.first)->second += count; + } + countsSum += count; + } else { + FEATUREVERBOSE(1, "WARNING: undefined label \"" << label << "\" in file " << m_unknownLeftHandSideFile << std::endl); + } + } + // compute probabilities from counts + countsSum += (double)m_labels.size(); + for (std::map<size_t,double>::iterator iter=m_unknownLHSProbabilities.begin(); + iter!=m_unknownLHSProbabilities.end(); ++iter) { + iter->second /= countsSum; + } + + IFFEATUREVERBOSE(3) { + for (std::map<size_t,double>::iterator iter=m_unknownLHSProbabilities.begin(); + iter!=m_unknownLHSProbabilities.end(); ++iter) { + FEATUREVERBOSE(3, GetScoreProducerDescription() << "::LoadUnknownLeftHandSideFile(): " << iter->first << " " << iter->second << std::endl); + } + } + + inFile.Close(); +} + +FFState* TargetPreferencesFeature::EvaluateWhenApplied( + const ChartHypothesis& hypo, + int featureID, // used to index the state in the previous hypotheses + ScoreComponentCollection* accumulator) const +{ + streamsize cerr_precision = std::cerr.precision(); + std::cerr.precision(20); // TODO: remove. just for debug purposes. + + // dense scores + std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 2 + + // state: used to store tree probabilities of partial hypotheses + // and access the respective tree probabilities of subderivations + TargetPreferencesFeatureState *state = new TargetPreferencesFeatureState(m_distinguishStates); + + size_t nNTs = 1; + double overallTreeProbability = 0.0; + bool isGlueGrammarRule = false; + + // read TargetPreferences property + const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase(); + + FEATUREVERBOSE(2, "Phrase: " << currTarPhr << std::endl); + + if (const PhraseProperty *property = currTarPhr.GetProperty("TargetPreferences")) { + + const TargetPreferencesPhraseProperty *targetPreferencesPhraseProperty = static_cast<const TargetPreferencesPhraseProperty*>(property); + +// IFFEATUREVERBOSE(2) { +// const std::string *targetPreferencesPhrasePropertyValueString = targetPreferencesPhraseProperty->GetValueString(); +// if (targetPreferencesPhrasePropertyValueString) { +// FEATUREVERBOSE(2, "PreferencesPhraseProperty " << *targetPreferencesPhrasePropertyValueString << std::endl); +// } else { +// FEATUREVERBOSE(2, "PreferencesPhraseProperty NULL" << std::endl); +// } +// } + + nNTs = targetPreferencesPhraseProperty->GetNumberOfNonTerminals(); + double totalCount = targetPreferencesPhraseProperty->GetTotalCount(); + + // get index map for underlying hypotheses + const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = + currTarPhr.GetAlignNonTerm().GetNonTermIndexMap(); + + // retrieve states from previous hypotheses, if any + std::vector< const TargetPreferencesFeatureState* > prevStatesByNonTerminal(nNTs-1); + + if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule + size_t nonTerminalNumber = 0; + + for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) { + // consult rule for either word or non-terminal + const Word &word = currTarPhr.GetWord(phrasePos); + if ( word.IsNonTerminal() ) { + // non-terminal: consult subderivation + size_t nonTermIndex = nonTermIndexMap[phrasePos]; + const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex); + const TargetPreferencesFeatureState* prevState = + static_cast<const TargetPreferencesFeatureState*>(prevHypo->GetFFState(featureID)); + prevStatesByNonTerminal[nonTerminalNumber] = prevState; + + IFFEATUREVERBOSE(2) { + // some log output that is not required in any way for the functionality + const std::map<size_t,double> &prevHypoTreeProbabilities = + prevStatesByNonTerminal[nonTerminalNumber]->GetProbabilitiesForLHSLabels(); + FEATUREVERBOSE(2, "Previous tree probs:"); + for (std::map<size_t,double>::const_iterator iter=prevHypoTreeProbabilities.begin(); + iter!=prevHypoTreeProbabilities.end(); ++iter) { + FEATUREVERBOSE2(2, " " << m_labelsByIndex[iter->first] << " " << iter->second); + } + FEATUREVERBOSE2(2, std::endl); + } + + ++nonTerminalNumber; + } + } + } + + // inspect labelled rule items + + overallTreeProbability = 0.0; + + const std::list<TargetPreferencesPhrasePropertyItem> &targetPreferencesItems = targetPreferencesPhraseProperty->GetTargetPreferencesItems(); + + for (std::list<TargetPreferencesPhrasePropertyItem>::const_iterator targetPreferencesItem = targetPreferencesItems.begin(); + targetPreferencesItem != targetPreferencesItems.end(); ++targetPreferencesItem) { + + const std::list<size_t> &targetPreferencesRHS = targetPreferencesItem->GetTargetPreferencesRHS(); + const std::list< std::pair<size_t,float> > &targetPreferencesLHSList = targetPreferencesItem->GetTargetPreferencesLHSList(); + + assert(targetPreferencesRHS.size() == nNTs-1); + + size_t currentTargetLabelsMismatches = nNTs - 1; + double matchingLabelsProbabilityProduct = 1.0; + + size_t nonTerminalNumber=0; + for (std::list<size_t>::const_iterator targetPreferencesRHSIt = targetPreferencesRHS.begin(); + targetPreferencesRHSIt != targetPreferencesRHS.end(); ++targetPreferencesRHSIt, ++nonTerminalNumber) { + + bool isLabelMatch = false; + double matchingLabelsProbability = + prevStatesByNonTerminal[nonTerminalNumber]->GetProbabilityForLHSLabel(*targetPreferencesRHSIt, + isLabelMatch); + matchingLabelsProbabilityProduct *= matchingLabelsProbability; + + if ( isLabelMatch ) { + currentTargetLabelsMismatches -= 1; + } + } + + FEATUREVERBOSE(2, "matchingLabelsProbabilityProduct = " << matchingLabelsProbabilityProduct << std::endl); + + // LHS labels seen with this RHS + for (std::list< std::pair<size_t,float> >::const_iterator targetPreferencesLHSIt = targetPreferencesLHSList.begin(); + targetPreferencesLHSIt != targetPreferencesLHSList.end(); ++targetPreferencesLHSIt) { + + size_t targetPreferenceLHS = targetPreferencesLHSIt->first; + + if ( targetPreferenceLHS == m_GlueTopLabel ) { + isGlueGrammarRule = true; + } + + // proceed with the actual probability computations + double ruleTargetPreferenceCount = targetPreferencesLHSIt->second; + double ruleTargetPreferenceProbability = ruleTargetPreferenceCount / totalCount; + + FEATUREVERBOSE(2, " ruleTargetPreferenceProbability = " << ruleTargetPreferenceProbability << std::endl); + + double weightedTargetPreferenceRuleProbability = ruleTargetPreferenceProbability * matchingLabelsProbabilityProduct; + if ( weightedTargetPreferenceRuleProbability != 0 ) { + state->AddProbabilityForLHSLabel(targetPreferenceLHS, weightedTargetPreferenceRuleProbability); + } + overallTreeProbability += weightedTargetPreferenceRuleProbability; + } + } + + IFFEATUREVERBOSE(2) { + FEATUREVERBOSE(2, "overallTreeProbability = " << overallTreeProbability); + if ( overallTreeProbability > 1.0001 ) { // account for some rounding error + FEATUREVERBOSE2(2, " -- WARNING: overallTreeProbability > 1"); + } + FEATUREVERBOSE2(2, std::endl); + } + + if ( overallTreeProbability != 0 ) { + UTIL_THROW_IF2(!boost::math::isnormal(overallTreeProbability), GetScoreProducerDescription() + << ": Oops. Numerical precision issues."); + state->NormalizeProbabilitiesForLHSLabels(overallTreeProbability); + } + + } else { + + // abort with error message if the phrase does not translate an unknown word + UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription() + << ": Missing TargetPreferences property. Please check phrase table and glue rules."); + + // unknown word + overallTreeProbability = 1.0; + + for (std::map<size_t,double>::const_iterator iter=m_unknownLHSProbabilities.begin(); + iter!=m_unknownLHSProbabilities.end(); ++iter) { + // update state + state->AddProbabilityForLHSLabel(iter->first, iter->second); + } + } + + FEATUREVERBOSE(2, "-> OVERALLTREEPROB = " << overallTreeProbability << std::endl); + + // add scores + + // tree probability (preference grammar style) + newScores[0] = (overallTreeProbability == 0 ? 0 : std::log(overallTreeProbability) ); + if ( m_noMismatches && (overallTreeProbability == 0) && !isGlueGrammarRule ) { + newScores[0] = -std::numeric_limits<float>::infinity(); + } + // tree mismatch penalty + // TODO: deactivate the tree mismatch penalty score component automatically if feature configuration parameter no-mismatches=true + newScores[1] = (overallTreeProbability == 0 ? 1 : 0 ); + + accumulator->PlusEquals(this, newScores); + + std::cerr.precision(cerr_precision); + return state; +} + +} + diff --git a/moses/FF/TargetPreferencesFeature.h b/moses/FF/TargetPreferencesFeature.h new file mode 100644 index 000000000..3a5b444e9 --- /dev/null +++ b/moses/FF/TargetPreferencesFeature.h @@ -0,0 +1,121 @@ +#pragma once + +#include <string> +#include <map> +#include <iostream> +#include <boost/unordered_map.hpp> +#include "StatefulFeatureFunction.h" +#include "FFState.h" +#include "util/exception.hh" +#include <stdint.h> + +namespace Moses +{ + +class TargetPreferencesFeatureState : public FFState +{ + +public: + + TargetPreferencesFeatureState(bool distinguishStates) + : m_distinguishStates(distinguishStates) + {} + + void AddProbabilityForLHSLabel(size_t label, double cost); + + void NormalizeProbabilitiesForLHSLabels(double denominator); + + const std::map<size_t,double> &GetProbabilitiesForLHSLabels() const { + return m_probabilitiesForLHSLabels; + } + + double GetProbabilityForLHSLabel(size_t label, bool &isMatch) const; + + size_t hash() const; + + virtual bool operator==(const FFState& other) const; + + +private: + + const bool m_distinguishStates; + std::map<size_t,double> m_probabilitiesForLHSLabels; + +}; + + +class TargetPreferencesFeature : public StatefulFeatureFunction +{ + +public: + + TargetPreferencesFeature(const std::string &line); + + ~TargetPreferencesFeature(); + + bool IsUseable(const FactorMask &mask) const { + return true; + } + + virtual const FFState* EmptyHypothesisState(const InputType &input) const { + return new TargetPreferencesFeatureState(m_distinguishStates); + } + + void SetParameter(const std::string& key, const std::string& value); + + void Load(AllOptions::ptr const& opts); + + void EvaluateInIsolation(const Phrase &source + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const + {}; + + void EvaluateWithSourceContext(const InputType &input + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , const StackVec *stackVec + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedFutureScore = NULL) const + {}; + + void EvaluateTranslationOptionListWithSourceContext(const InputType &input + , const TranslationOptionList &translationOptionList) const + {} + + FFState* EvaluateWhenApplied( + const Hypothesis& cur_hypo, + const FFState* prev_state, + ScoreComponentCollection* accumulator) const { + UTIL_THROW2(GetScoreProducerDescription() << ": feature currently not implemented for phrase-based decoding."); + return new TargetPreferencesFeatureState(m_distinguishStates); + }; + + FFState* EvaluateWhenApplied( + const ChartHypothesis& cur_hypo, + int featureID, // used to index the state in the previous hypotheses + ScoreComponentCollection* accumulator) const; + + +private: + + std::string m_labelSetFile; + std::string m_unknownLeftHandSideFile; + size_t m_featureVariant; + bool m_distinguishStates; + bool m_noMismatches; + + mutable boost::unordered_map<std::string,size_t> m_labels; + mutable std::vector<std::string> m_labelsByIndex; + mutable size_t m_XRHSLabel; + mutable size_t m_XLHSLabel; + mutable size_t m_GlueTopLabel; + std::map<size_t,double> m_unknownLHSProbabilities; + + void LoadLabelSet(); + void LoadUnknownLeftHandSideFile(); + +}; + +} + diff --git a/moses/Manager.cpp b/moses/Manager.cpp index e00457803..3650baabb 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -1723,8 +1723,8 @@ OutputSurface(std::ostream &out, Hypothesis const& edge, bool const recursive) c out << *factor; for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); - UTIL_THROW_IF2(factor==NULL,"No factor "<<i<<" at position "<< pos); - out << fd << *factor; + if (factor) out << fd << *factor; + else out << fd << UNKNOWN_FACTOR; } if(markUnknown && word.IsOOV()) { diff --git a/moses/PP/TargetPreferencesPhraseProperty.cpp b/moses/PP/TargetPreferencesPhraseProperty.cpp deleted file mode 100644 index 9358ee4bf..000000000 --- a/moses/PP/TargetPreferencesPhraseProperty.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include "moses/PP/TargetPreferencesPhraseProperty.h" -#include <iostream> -#include <cstdio> -#include <cstdlib> -#include <sstream> -#include <string> -#include <queue> -#include <assert.h> -#include <limits> - -namespace Moses -{ - -void TargetPreferencesPhraseProperty::ProcessValue(const std::string &value) -{ - std::istringstream tokenizer(value); - - if (! (tokenizer >> m_nNTs)) { // first token: number of non-terminals (incl. left-hand side) - UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of non-terminals. Flawed property?"); - } - assert( m_nNTs > 0 ); - - if (! (tokenizer >> m_totalCount)) { // second token: overall rule count - UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read overall rule count. Flawed property?"); - } - assert( m_totalCount > 0.0 ); - - - // read labelled rule items - - std::priority_queue<float> ruleLabelledCountsPQ; - - while (tokenizer.peek() != EOF) { - try { - - TargetPreferencesPhrasePropertyItem item; - size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max(); - - if (m_nNTs == 1) { - - item.m_labelsRHSCount = m_totalCount; - - } else { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule - - for (size_t i=0; i<m_nNTs-1; ++i) { // RHS non-terminal labels - size_t labelRHS; - if (! (tokenizer >> labelRHS) ) { // RHS non-terminal label - UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side label index. Flawed property?"); - } - item.m_labelsRHS.push_back(labelRHS); - } - - if (! (tokenizer >> item.m_labelsRHSCount)) { - UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side count. Flawed property?"); - } - - if (! (tokenizer >> numberOfLHSsGivenRHS)) { - UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of left-hand sides. Flawed property?"); - } - } - - for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS non-terminal labels seen with this RHS - size_t labelLHS; - if (! (tokenizer >> labelLHS)) { // LHS non-terminal label - UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read left-hand side label index. Flawed property?"); - } - float ruleLabelledCount; - if (! (tokenizer >> ruleLabelledCount)) { - UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read count. Flawed property?"); - } - item.m_labelsLHSList.push_back( std::make_pair(labelLHS,ruleLabelledCount) ); - ruleLabelledCountsPQ.push(ruleLabelledCount); - } - - m_labelItems.push_back(item); - - } catch (const std::exception &e) { - UTIL_THROW2("TargetPreferencesPhraseProperty: Read error. Flawed property?"); - } - } - - // keep only top N label vectors - const size_t N=50; - - if (ruleLabelledCountsPQ.size() > N) { - - float topNRuleLabelledCount = std::numeric_limits<int>::max(); - for (size_t i=0; !ruleLabelledCountsPQ.empty() && i<N; ++i) { - topNRuleLabelledCount = ruleLabelledCountsPQ.top(); - ruleLabelledCountsPQ.pop(); - } - - size_t nKept=0; - std::list<TargetPreferencesPhrasePropertyItem>::iterator itemIter=m_labelItems.begin(); - while (itemIter!=m_labelItems.end()) { - if (itemIter->m_labelsRHSCount < topNRuleLabelledCount) { - itemIter = m_labelItems.erase(itemIter); - } else { - std::list< std::pair<size_t,float> >::iterator itemLHSIter=(itemIter->m_labelsLHSList).begin(); - while (itemLHSIter!=(itemIter->m_labelsLHSList).end()) { - if (itemLHSIter->second < topNRuleLabelledCount) { - itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter); - } else { - if (nKept >= N) { - itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter,(itemIter->m_labelsLHSList).end()); - } else { - ++nKept; - ++itemLHSIter; - } - } - } - if ((itemIter->m_labelsLHSList).empty()) { - itemIter = m_labelItems.erase(itemIter); - } else { - ++itemIter; - } - } - } - } -}; - -} // namespace Moses - diff --git a/moses/PP/TargetPreferencesPhraseProperty.h b/moses/PP/TargetPreferencesPhraseProperty.h deleted file mode 100644 index 84ef9b3c5..000000000 --- a/moses/PP/TargetPreferencesPhraseProperty.h +++ /dev/null @@ -1,71 +0,0 @@ - -#pragma once - -#include "moses/PP/PhraseProperty.h" -#include "util/exception.hh" -#include <string> -#include <list> - -namespace Moses -{ - -class TargetPreferencesPhrasePropertyItem -{ - friend class TargetPreferencesPhraseProperty; - -public: - TargetPreferencesPhrasePropertyItem() {}; - - float GetTargetPreferencesRHSCount() const { - return m_labelsRHSCount; - }; - - const std::list<size_t> &GetTargetPreferencesRHS() const { - return m_labelsRHS; - }; - - const std::list< std::pair<size_t,float> > &GetTargetPreferencesLHSList() const { - return m_labelsLHSList; - }; - -private: - float m_labelsRHSCount; - std::list<size_t> m_labelsRHS; // should be of size nNTs-1 (empty if initial rule, i.e. no right-hand side non-terminals) - std::list< std::pair<size_t,float> > m_labelsLHSList; // list of left-hand sides for this right-hand side, with counts -}; - - -class TargetPreferencesPhraseProperty : public PhraseProperty -{ -public: - TargetPreferencesPhraseProperty() {}; - - virtual void ProcessValue(const std::string &value); - - size_t GetNumberOfNonTerminals() const { - return m_nNTs; - } - - float GetTotalCount() const { - return m_totalCount; - } - - const std::list<TargetPreferencesPhrasePropertyItem> &GetTargetPreferencesItems() const { - return m_labelItems; - }; - - virtual const std::string *GetValueString() const { - UTIL_THROW2("TargetPreferencesPhraseProperty: value string not available in this phrase property"); - return NULL; - }; - -protected: - - size_t m_nNTs; - float m_totalCount; - - std::list<TargetPreferencesPhrasePropertyItem> m_labelItems; -}; - -} // namespace Moses - diff --git a/moses/parameters/ReportingOptions.cpp b/moses/parameters/ReportingOptions.cpp index d7d3cc64b..210950a3c 100644 --- a/moses/parameters/ReportingOptions.cpp +++ b/moses/parameters/ReportingOptions.cpp @@ -92,15 +92,17 @@ namespace Moses { } } - params= param.GetParam("output-factors"); - if (params) factor_order = Scan<FactorType>(*params); - if (factor_order.empty()) factor_order.assign(1,0); if (ReportAllFactors) { - for (size_t i = 1; i < MAX_NUM_FACTORS; ++i) + factor_order.clear(); + for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) factor_order.push_back(i); + } else { + params= param.GetParam("output-factors"); + if (params) factor_order = Scan<FactorType>(*params); + if (factor_order.empty()) factor_order.assign(1,0); } - + param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|")); param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter); diff --git a/regtest b/regtest -Subproject bbea49d71c5b9835d9a777a82085e57a33a0bcf +Subproject 0f892797ae03b37f7bf4470b172de83736bce95 diff --git a/scripts/generic/score-parallel.perl b/scripts/generic/score-parallel.perl index edf91e0cd..48f29c627 100755 --- a/scripts/generic/score-parallel.perl +++ b/scripts/generic/score-parallel.perl @@ -314,7 +314,7 @@ if (!$inverse && defined($partsOfSpeechFile)) # merge target syntactic preferences labels files if (!$inverse && defined($targetSyntacticPreferencesLabelsFile)) { - my $cmd = "(echo \"GlueTop 0\"; echo \"GlueX 1\"; cat $TMPDIR/phrase-table.half.*.gz.syntaxLabels.tgtpref | LC_ALL=C sort | uniq | perl -pe \"s/\$/ \@{[\$.+3]}/\") > $targetSyntacticPreferencesLabelsFile"; + my $cmd = "(echo \"GlueTop 0\"; echo \"GlueX 1\"; cat $TMPDIR/phrase-table.half.*.gz.syntaxLabels.tgtpref | LC_ALL=C sort | uniq | perl -pe \"s/\$/ \@{[\$.+1]}/\") > $targetSyntacticPreferencesLabelsFile"; print STDERR "Merging target syntactic preferences labels files: $cmd \n"; `$cmd`; } diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl index a2cf8b6f9..9c52235db 100755 --- a/scripts/training/train-model.perl +++ b/scripts/training/train-model.perl @@ -2378,7 +2378,7 @@ sub create_ini { print INI "PhraseOrientationFeature"; # find the label of the left-hand side non-terminal in glue rules (target non-terminal set) my $TOPLABEL = `head -n 1 $___GLUE_GRAMMAR_FILE`; - $TOPLABEL =~ s/.* \|\|\| .* \[(.*)\] \|\|\| .*/\1/; + $TOPLABEL =~ s/.* \|\|\| .* \[(.*)\] \|\|\| .*/$1/; chomp($TOPLABEL); print INI " glue-label=$TOPLABEL\n"; } diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index 6f7f5e99b..8a544aa07 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -58,52 +58,24 @@ add_library(kenlm_util OBJECT ${KENLM_UTIL_DOUBLECONVERSION_SOURCE} ${KENLM_UTIL # Only compile and run unit tests if tests should be run if(BUILD_TESTING) - # Explicitly list the Boost test files to be compiled - set(KENLM_BOOST_TESTS_LIST - bit_packing_test - file_piece_test - joint_sort_test - multi_intersection_test - probing_hash_table_test - read_compressed_test - sorted_uniform_test - tokenize_piece_test - ) - - # Iterate through the Boost tests list - foreach(test ${KENLM_BOOST_TESTS_LIST}) - - # Compile the executable, linking against the requisite dependent object files - add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm_util>) - - # Require the following compile flag - set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK) - - # Link the executable against boost - target_link_libraries(${test} ${Boost_LIBRARIES} pthread) - - # file_piece_test requires an extra command line parameter - if ("${test}" STREQUAL "file_piece_test") - set(test_params - ${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc - ) - else() - set(test_params - ) - endif() - - # Specify command arguments for how to run each unit test - # - # Assuming that foo was defined via add_executable(foo ...), - # the syntax $<TARGET_FILE:foo> gives the full path to the executable. - # - add_test(NAME ${test}_test - COMMAND $<TARGET_FILE:${test}> ${test_params}) - - # Group unit tests together - set_target_properties(${test} PROPERTIES FOLDER "unit_tests") - - # End for loop - endforeach(test) - + # Explicitly list the Boost test files to be compiled + set(KENLM_BOOST_TESTS_LIST + bit_packing_test + joint_sort_test + multi_intersection_test + probing_hash_table_test + read_compressed_test + sorted_uniform_test + tokenize_piece_test + ) + + AddTests(TESTS ${KENLM_BOOST_TESTS_LIST} + DEPENDS $<TARGET_OBJECTS:kenlm_util> + LIBRARIES ${Boost_LIBRARIES} pthread) + + # file_piece_test requires an extra command line parameter + KenLMAddTest(TEST file_piece_test + DEPENDS $<TARGET_OBJECTS:kenlm_util> + LIBRARIES ${Boost_LIBRARIES} pthread + TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc) endif() diff --git a/util/exception.cc b/util/exception.cc index 5ba06f065..01ff9a672 100644 --- a/util/exception.cc +++ b/util/exception.cc @@ -24,25 +24,23 @@ void Exception::SetLocation(const char *file, unsigned int line, const char *fun * them down. */ std::string old_text; - std::swap(old_text, what_); - StringStream stream; - stream << what_; - stream << file << ':' << line; - if (func) stream << " in " << func << " threw "; + what_.swap(old_text); + what_ << file << ':' << line; + if (func) what_ << " in " << func << " threw "; if (child_name) { - stream << child_name; + what_ << child_name; } else { #ifdef __GXX_RTTI - stream << typeid(this).name(); + what_ << typeid(this).name(); #else - stream << "an exception"; + what_ << "an exception"; #endif } if (condition) { - stream << " because `" << condition << '\''; + what_ << " because `" << condition << '\''; } - stream << ".\n"; - stream << old_text; + what_ << ".\n"; + what_ << old_text; } namespace { diff --git a/util/exception.hh b/util/exception.hh index 00207b242..b30183e7f 100644 --- a/util/exception.hh +++ b/util/exception.hh @@ -8,7 +8,7 @@ #include <string> #include <stdint.h> -// TODO(hieu) delete this +// TODO(hieu): delete this #include <sstream> namespace util { @@ -20,7 +20,7 @@ class Exception : public std::exception { Exception() throw(); virtual ~Exception() throw(); - const char *what() const throw() { return what_.c_str(); } + const char *what() const throw() { return what_.str().c_str(); } // For use by the UTIL_THROW macros. void SetLocation( @@ -38,7 +38,7 @@ class Exception : public std::exception { typedef T Identity; }; - std::string what_; + StringStream what_; }; /* This implements the normal operator<< for Exception and all its children. @@ -46,12 +46,10 @@ class Exception : public std::exception { * boost::enable_if. */ template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) { - // TODO(hieu): change this to - // StringStream(e.what_) << data; - + // TODO(hieu): delete this. std::stringstream moses_hack; moses_hack << data; - e.what_ += moses_hack.str(); + e.what_ << moses_hack.str(); return e; } diff --git a/util/stream/CMakeLists.txt b/util/stream/CMakeLists.txt index 3e47f73e6..0c4c115dd 100644 --- a/util/stream/CMakeLists.txt +++ b/util/stream/CMakeLists.txt @@ -37,38 +37,14 @@ set(KENLM_UTIL_STREAM_SOURCE if(BUILD_TESTING) - - # Explicitly list the Boost test files to be compiled - set(KENLM_BOOST_TESTS_LIST - io_test - sort_test - stream_test - ) - - # Iterate through the Boost tests list - foreach(test ${KENLM_BOOST_TESTS_LIST}) - - # Compile the executable, linking against the requisite dependent object files - add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm_util>) - - # Require the following compile flag - set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK) - - # Link the executable against boost - target_link_libraries(${test} ${Boost_LIBRARIES} pthread) - - # Specify command arguments for how to run each unit test - # - # Assuming that foo was defined via add_executable(foo ...), - # the syntax $<TARGET_FILE:foo> gives the full path to the executable. - # - add_test(NAME ${test}_test - COMMAND $<TARGET_FILE:${test}>) - - # Group unit tests together - set_target_properties(${test} PROPERTIES FOLDER "unit_tests") - - # End for loop - endforeach(test) - + # Explicitly list the Boost test files to be compiled + set(KENLM_BOOST_TESTS_LIST + io_test + sort_test + stream_test + ) + + AddTests(TESTS ${KENLM_BOOST_TESTS_LIST} + DEPENDS $<TARGET_OBJECTS:kenlm_util> + LIBRARIES ${Boost_LIBRARIES} pthread) endif() diff --git a/util/string_stream.hh b/util/string_stream.hh index ee76a7a57..28fdd4219 100644 --- a/util/string_stream.hh +++ b/util/string_stream.hh @@ -10,14 +10,8 @@ namespace util { class StringStream : public FakeOStream<StringStream> { public: - // Semantics: appends to string. Remember to clear first! - - explicit StringStream() - {} - /* - explicit StringStream(std::string &out) - : out_(out) {} - */ + StringStream() {} + StringStream &flush() { return *this; } StringStream &write(const void *data, std::size_t length) { @@ -25,12 +19,11 @@ class StringStream : public FakeOStream<StringStream> { return *this; } - const std::string &str() const - { return out_; } - void str(const std::string &val) - { - out_ = val; - } + const std::string &str() const { return out_; } + + void str(const std::string &val) { out_ = val; } + + void swap(std::string &str) { std::swap(out_, str); } protected: friend class FakeOStream<StringStream>; |