#include #include #include #include #include #include "CoveredReferenceFeature.h" #include "moses/ScoreComponentCollection.h" #include "moses/Hypothesis.h" #include "moses/Manager.h" #include "moses/ChartHypothesis.h" #include "moses/ChartManager.h" #include "moses/StaticData.h" #include "moses/InputFileStream.h" #include "moses/Util.h" #include "util/exception.hh" using namespace std; namespace Moses { size_t CoveredReferenceState::hash() const { UTIL_THROW2("TODO:Haven't figure this out yet"); } bool CoveredReferenceState::operator==(const FFState& other) const { UTIL_THROW2("TODO:Haven't figure this out yet"); } ////////////////////////////////////////////////////////////////////////////////////////////////////////////// void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedScores) const { long id = input.GetTranslationId(); boost::unordered_map >::const_iterator refIt = m_refs.find(id); multiset wordsInPhrase = GetWordsInPhrase(targetPhrase); multiset covered; set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(), refIt->second.begin(), refIt->second.end(), inserter(covered, covered.begin())); vector scores; scores.push_back(covered.size()); scoreBreakdown.Assign(this, scores); estimatedScores->Assign(this, scores); } void CoveredReferenceFeature::Load() { InputFileStream refFile(m_path); std::string line; const StaticData &staticData = StaticData::Instance(); long sentenceID = staticData.GetStartTranslationId(); while (getline(refFile, line)) { vector words = Tokenize(line, " "); multiset wordSet; // TODO make Tokenize work with other containers than vector copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin())); m_refs.insert(make_pair(sentenceID++, wordSet)); } } void CoveredReferenceFeature::SetParameter(const std::string& key, const std::string& value) { if (key == "path") { m_path = value; } else { StatefulFeatureFunction::SetParameter(key, value); } } FFState* CoveredReferenceFeature::EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const { const CoveredReferenceState &prev = static_cast(*prev_state); CoveredReferenceState *ret = new CoveredReferenceState(prev); const Manager &mgr = cur_hypo.GetManager(); const InputType &input = mgr.GetSource(); long id = input.GetTranslationId(); // which words from the reference remain uncovered multiset remaining; boost::unordered_map >::const_iterator refIt = m_refs.find(id); if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint(id)); set_difference(refIt->second.begin(), refIt->second.end(), ret->m_coveredRef.begin(), ret->m_coveredRef.end(), inserter(remaining, remaining.begin())); // which of the remaining words are present in the current phrase multiset wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase()); multiset newCovered; set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(), remaining.begin(), remaining.end(), inserter(newCovered, newCovered.begin())); vector estimateScore = cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this); vector scores; scores.push_back(newCovered.size() - estimateScore[0]); accumulator->PlusEquals(this, scores); // update feature state multiset::const_iterator newCoveredIt; for (newCoveredIt = newCovered.begin(); newCoveredIt != newCovered.end(); newCoveredIt++) { ret->m_coveredRef.insert(*newCoveredIt); } return ret; } FFState* CoveredReferenceFeature::EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const { UTIL_THROW(util::Exception, "Not implemented"); } }