diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2017-07-22 00:40:42 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-22 00:40:42 +0300 |
commit | 11a2ef4af57b86ed1bff22d604f41ec410b21383 (patch) | |
tree | a7d9bcca6234705f547f3ba387d2b18675a6882f | |
parent | e32b8f580533e2b1cdb5b8496a8658f277409a69 (diff) | |
parent | f07e60aece8c0bbd144b2e4028f2ed5114a521a5 (diff) |
Merge pull request #184 from ebay-hlt/moses-4.0
1. Cache based translation models which supports multiple scores and factors ...
-rw-r--r-- | .gitmodules | 7 | ||||
-rw-r--r-- | mert/CderScorer.h | 4 | ||||
-rw-r--r-- | mert/StatisticsBasedScorer.h | 9 | ||||
-rw-r--r-- | moses/FF/Factory.cpp | 2 | ||||
-rw-r--r-- | moses/Parameter.cpp | 3 | ||||
-rw-r--r-- | moses/ScoreComponentCollection.cpp | 16 | ||||
-rw-r--r-- | moses/ScoreComponentCollection.h | 2 | ||||
-rw-r--r-- | moses/TranslationModel/PhraseDictionaryCache.cpp | 424 | ||||
-rw-r--r-- | moses/TranslationModel/PhraseDictionaryCache.h | 176 | ||||
-rw-r--r-- | moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp | 170 | ||||
-rw-r--r-- | moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h | 17 | ||||
-rw-r--r-- | moses/TranslationTask.cpp | 9 | ||||
-rw-r--r-- | moses/server/TranslationRequest.cpp | 1 | ||||
m--------- | regtest | 0 |
14 files changed, 788 insertions, 52 deletions
diff --git a/.gitmodules b/.gitmodules index 90a9b30ba..8a1ca284f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,10 @@ +[submodule "regtest"] + path = regtest + url = https://github.com/ebay-hlt/moses-regression-tests [submodule "contrib/arrow-pipelines/python/pcl"] path = contrib/arrow-pipelines/python/pcl url = https://github.com/ianj-als/pcl.git [submodule "contrib/omtc/omtc"] path = contrib/omtc/omtc url = https://github.com/ianj-als/omtc.git -[submodule "regtest"] - path = regtest - url = https://github.com/moses-smt/moses-regression-tests + diff --git a/mert/CderScorer.h b/mert/CderScorer.h index 68fa81857..47c03fe86 100644 --- a/mert/CderScorer.h +++ b/mert/CderScorer.h @@ -31,6 +31,10 @@ public: virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const; + virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const{ + return totals[1]; + } + private: bool m_allowed_long_jumps; diff --git a/mert/StatisticsBasedScorer.h b/mert/StatisticsBasedScorer.h index ba45634cc..ebfa4586a 100644 --- a/mert/StatisticsBasedScorer.h +++ b/mert/StatisticsBasedScorer.h @@ -44,10 +44,11 @@ protected: */ virtual statscore_t calculateScore(const std::vector<ScoreStatsType>& totals) const = 0; - virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const { - UTIL_THROW(util::Exception, "getReferenceLength not implemented for this scorer type."); - return 0; - } + virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const{} +// { +// UTIL_THROW(util::Exception, "getReferenceLength not implemented for this scorer type."); +// return 0; +// } // regularisation RegularisationType m_regularization_type; diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 398d6593c..00dcac27d 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -10,6 +10,7 @@ #include "moses/TranslationModel/PhraseDictionaryScope3.h" #include "moses/TranslationModel/PhraseDictionaryTransliteration.h" #include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h" +#include "moses/TranslationModel/PhraseDictionaryCache.h" #include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h" #include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h" @@ -243,6 +244,7 @@ FeatureRegistry::FeatureRegistry() // MOSES_FNAME(PhraseDictionaryDynSuffixArray); MOSES_FNAME(PhraseDictionaryTransliteration); MOSES_FNAME(PhraseDictionaryDynamicCacheBased); + MOSES_FNAME(PhraseDictionaryCache); MOSES_FNAME(PhraseDictionaryFuzzyMatch); MOSES_FNAME(ProbingPT); MOSES_FNAME(PhraseDictionaryMemoryPerSentence); diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index a1430ff61..8ce322ae2 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -793,6 +793,9 @@ ConvertWeightArgsPhraseModel(const string &oldWeightName) case 15: // DCacheBased: ptType = "PhraseDictionaryDynamicCacheBased"; break; + case 16: // CachePT: + ptType = "PhraseDictionaryCache"; + break; default: break; } diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp index 7ce03c608..b1ef3747f 100644 --- a/moses/ScoreComponentCollection.cpp +++ b/moses/ScoreComponentCollection.cpp @@ -257,6 +257,22 @@ Assign(const FeatureFunction* sp, const std::vector<float>& scores) } } +void +ScoreComponentCollection:: +Assign(const FeatureFunction* sp, size_t idx, float sc) +{ + size_t numScores = sp->GetNumScoreComponents(); + size_t offset = sp->GetIndex(); + + if (idx >= numScores) { + UTIL_THROW(util::Exception, "Feature function " + << sp->GetScoreProducerDescription() << " specified index " + << idx << " dense scores or weights. Actually has " + << numScores); + } + + m_scores[idx + offset] = sc; +} void ScoreComponentCollection::InvertDenseFeatures(const FeatureFunction* sp) { diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h index 9b7010746..62720512c 100644 --- a/moses/ScoreComponentCollection.h +++ b/moses/ScoreComponentCollection.h @@ -289,6 +289,8 @@ public: void Assign(const FeatureFunction* sp, const std::vector<float>& scores); + void Assign(const FeatureFunction* sp, size_t idx, float sc); + //! Special version Assign(ScoreProducer, vector<float>) //! to add the score from a single ScoreProducer that produces //! a single value diff --git a/moses/TranslationModel/PhraseDictionaryCache.cpp b/moses/TranslationModel/PhraseDictionaryCache.cpp new file mode 100644 index 000000000..604a42ee4 --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryCache.cpp @@ -0,0 +1,424 @@ +// vim:tabstop=2 + +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2006 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ +#include "util/exception.hh" + +#include "moses/TranslationModel/PhraseDictionary.h" +#include "moses/TranslationModel/PhraseDictionaryCache.h" +#include "moses/FactorCollection.h" +#include "moses/InputFileStream.h" +#include "moses/StaticData.h" +#include "moses/TargetPhrase.h" + + +using namespace std; + +namespace Moses +{ +std::map< const std::string, PhraseDictionaryCache * > PhraseDictionaryCache::s_instance_map; +PhraseDictionaryCache *PhraseDictionaryCache::s_instance = NULL; + +//! contructor +PhraseDictionaryCache::PhraseDictionaryCache(const std::string &line) + : PhraseDictionary(line, true) +{ + std::cerr << "Initializing PhraseDictionaryCache feature..." << std::endl; + + //disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0) + m_maxCacheSize = 0; + + m_entries = 0; + m_name = "default"; + m_constant = false; + + ReadParameters(); + + UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryCache feature named " + m_name + " is allowed"); + s_instance_map[m_name] = this; + s_instance = this; //for back compatibility + vector<float> weight = StaticData::Instance().GetWeights(this); + m_numscorecomponent = weight.size(); + m_sentences=0; +} + +PhraseDictionaryCache::~PhraseDictionaryCache() +{ + Clear(); +} + +void PhraseDictionaryCache::SetParameter(const std::string& key, const std::string& value) +{ + VERBOSE(2, "PhraseDictionaryCache::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl); + + if (key == "cache-name") { + m_name = Scan<std::string>(value); + } else if (key == "input-factor") { + m_inputFactorsVec = Tokenize<FactorType>(value,","); + } else if (key == "output-factor") { + m_outputFactorsVec = Tokenize<FactorType>(value,","); + } else { + PhraseDictionary::SetParameter(key, value); + } +} + +void PhraseDictionaryCache::CleanUpAfterSentenceProcessing(const InputType& source) { + Clear(source.GetTranslationId()); +} + +void PhraseDictionaryCache::InitializeForInput(ttasksptr const& ttask) +{ +#ifdef WITH_THREADS + boost::unique_lock<boost::shared_mutex> lock(m_cacheLock); +#endif + long tID = ttask->GetSource()->GetTranslationId(); + TargetPhraseCollection::shared_ptr tpc; + if (m_cacheTM.find(tID) == m_cacheTM.end()) return; + for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) { + tpc.reset(new TargetPhraseCollection(*(it->second).first)); + std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin(); + + while (it2 != tpc->end()) { + ((TargetPhrase*) *it2)->EvaluateInIsolation(it->first, GetFeaturesToApply()); + it2++; + } + } + if (tpc) { + tpc->NthElement(m_tableLimit); // sort the phrases for the decoder + } +} + +void PhraseDictionaryCache::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const +{ +#ifdef WITH_THREADS + boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock); +#endif + InputPathList::const_iterator iter; + for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) { + InputPath &inputPath = **iter; + long tID = inputPath.ttask->GetSource()->GetTranslationId(); + if (m_cacheTM.find(tID) == m_cacheTM.end()) continue; + const Phrase &source = inputPath.GetPhrase(); + TargetPhraseCollection::shared_ptr tpc; + for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) { + if (source.Compare(it->first)!=0) continue; + tpc.reset(new TargetPhraseCollection(*(it->second).first)); + inputPath.SetTargetPhrases(*this, tpc, NULL); + } + } +} + +TargetPhraseCollection::shared_ptr PhraseDictionaryCache::GetTargetPhraseCollection(const Phrase &source, long tID) const +{ +#ifdef WITH_THREADS + boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock); +#endif + TargetPhraseCollection::shared_ptr tpc; + + if(m_cacheTM.find(tID) == m_cacheTM.end()) return tpc; + + cacheMap::const_iterator it = m_cacheTM.at(tID).find(source); + if(it != m_cacheTM.at(tID).end()) { + tpc.reset(new TargetPhraseCollection(*(it->second).first)); + + std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin(); + + while (it2 != tpc->end()) { + ((TargetPhrase*) *it2)->EvaluateInIsolation(source, GetFeaturesToApply()); + it2++; + } + } + if (tpc) { + tpc->NthElement(m_tableLimit); // sort the phrases for the decoder + } + + return tpc; +} + +ChartRuleLookupManager* PhraseDictionaryCache::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/) +{ + UTIL_THROW(util::Exception, "Not implemented for Chart Decoder"); +} + +// friend +ostream& operator<<(ostream& out, const PhraseDictionaryCache& phraseDict) +{ + return out; +} + +void PhraseDictionaryCache::Insert(std::string &entries, long tID) +{ + if (entries != "") { + VERBOSE(3,"entries:|" << entries << "|" << " tID | " << tID << std::endl); + std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + Insert(elements, tID); + } +} + +void PhraseDictionaryCache::Insert(std::vector<std::string> entries, long tID) +{ + VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl); + Update(tID, entries); + IFVERBOSE(3) Print(); +} + + +void PhraseDictionaryCache::Update(long tID, std::vector<std::string> entries) +{ + std::vector<std::string> pp; + + std::vector<std::string>::iterator it; + for(it = entries.begin(); it!=entries.end(); it++) { + pp.clear(); + pp = TokenizeMultiCharSeparator((*it), "|||"); + VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); + VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); + + if (pp.size() > 3) { + VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); + VERBOSE(3,"pp[3]:|" << pp[3] << "|" << std::endl); + Update(tID,pp[0], pp[1], pp[2], pp[3]); + } else if (pp.size() > 2){ + VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); + Update(tID,pp[0], pp[1], pp[2]); + } else { + Update(tID,pp[0], pp[1]); + } + } +} + +Scores PhraseDictionaryCache::Conv2VecFloats(std::string& s){ + std::vector<float> n; + if (s.empty()) + return n; + std::istringstream iss(s); + std::copy(std::istream_iterator<float>(iss), + std::istream_iterator<float>(), + std::back_inserter(n)); + return n; +} + +void PhraseDictionaryCache::Update(long tID, std::string sourcePhraseString, std::string targetPhraseString, std::string scoreString, std::string waString) +{ + const StaticData &staticData = StaticData::Instance(); + Phrase sourcePhrase(0); + TargetPhrase targetPhrase(0); + + char *err_ind_temp; + Scores scores = Conv2VecFloats(scoreString); + //target + targetPhrase.Clear(); + // change here for factored based CBTM + VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); + targetPhrase.CreateFromString(Output, m_outputFactorsVec, + targetPhraseString, /*factorDelimiter,*/ NULL); + VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl); + + //TODO: Would be better to reuse source phrases, but ownership has to be + //consistent across phrase table implementations + sourcePhrase.Clear(); + VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); + sourcePhrase.CreateFromString(Input, m_inputFactorsVec, sourcePhraseString, /*factorDelimiter,*/ NULL); + VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); + + if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl); + + Update(tID, sourcePhrase, targetPhrase, scores, waString); +} + +void PhraseDictionaryCache::Update(long tID, Phrase sp, TargetPhrase tp, Scores scores, std::string waString) +{ + VERBOSE(3,"PhraseDictionaryCache::Update(Phrase sp, TargetPhrase tp, Scores scores, std::string waString)" << std::endl); +#ifdef WITH_THREADS + boost::unique_lock<boost::shared_mutex> lock(m_cacheLock); +#endif + VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| word-alignment |" << waString << "|" << std::endl); + // if there is no cache for the sentence tID, create one. + cacheMap::const_iterator it = m_cacheTM[tID].find(sp); + VERBOSE(3,"sp:|" << sp << "|" << std::endl); + if(it!=m_cacheTM.at(tID).end()) { + VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); + // sp is found + + TargetCollectionPair TgtCollPair = it->second; + TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first; + Scores* sc = TgtCollPair.second; + const Phrase* p_ptr = NULL; + TargetPhrase* tp_ptr = NULL; + bool found = false; + size_t tp_pos=0; + while (!found && tp_pos < tpc->GetSize()) { + tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); + p_ptr = (const TargetPhrase*) tp_ptr; + if ((Phrase) tp == *p_ptr) { + found = true; + continue; + } + tp_pos++; + } + if (!found) { + VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); + std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp)); + Scores scoreVec; + for (unsigned int i=0; i<scores.size(); i++){ + scoreVec.push_back(scores[i]); + } + if(scoreVec.size() != m_numScoreComponents){ + VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl); + VERBOSE(1, "I am ignoring this..." <<endl); +// std::cin.ignore(); + } + targetPhrase->GetScoreBreakdown().Assign(this, scoreVec); + if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); + + tpc->Add(targetPhrase.release()); + + tp_pos = tpc->GetSize()-1; + sc = &scores; + m_entries++; + VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl); + } else { + Scores scoreVec; + for (unsigned int i=0; i<scores.size(); i++){ + scoreVec.push_back(scores[i]); + } + if(scoreVec.size() != m_numScoreComponents){ + VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl); + VERBOSE(1, "I am ignoring this..." <<endl); +// std::cin.ignore(); + } + tp_ptr->GetScoreBreakdown().Assign(this, scoreVec); + if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString); + VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl); + } + } else { + VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); + // p is not found + // create target collection + + TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection); + Scores* sc = new Scores(); + m_cacheTM[tID].insert(make_pair(sp,std::make_pair(tpc,sc))); + + //tp is not found + std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp)); + // scoreVec is a composition of decay_score and the feature scores + Scores scoreVec; + for (unsigned int i=0; i<scores.size(); i++){ + scoreVec.push_back(scores[i]); + } + if(scoreVec.size() != m_numScoreComponents){ + VERBOSE(1, "Scores do not match number of score components for phrase : "<< sp <<" ||| " << tp <<endl); + VERBOSE(1, "I am ignoring this..." <<endl); +// std::cin.ignore(); + } + targetPhrase->GetScoreBreakdown().Assign(this, scoreVec); + if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); + + tpc->Add(targetPhrase.release()); + sc = &scores; + m_entries++; + VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl); + } +} + +void PhraseDictionaryCache::Execute(std::string command, long tID) +{ + VERBOSE(2,"command:|" << command << "|" << std::endl); + std::vector<std::string> commands = Tokenize(command, "||"); + Execute(commands, tID); +} + +void PhraseDictionaryCache::Execute(std::vector<std::string> commands, long tID) +{ + for (size_t j=0; j<commands.size(); j++) { + Execute_Single_Command(commands[j]); + } + IFVERBOSE(2) Print(); +} + +void PhraseDictionaryCache::Execute_Single_Command(std::string command) +{ + if (command == "clear") { + VERBOSE(2,"PhraseDictionaryCache Execute command:|"<< command << "|. Cache cleared." << std::endl); + Clear(); + } else { + VERBOSE(2,"PhraseDictionaryCache Execute command:|"<< command << "| is unknown. Skipped." << std::endl); + } +} + +void PhraseDictionaryCache::Clear(){ + for(sentCacheMap::iterator it=m_cacheTM.begin(); it!=m_cacheTM.end(); it++){ + Clear(it->first); + } +} + +void PhraseDictionaryCache::Clear(long tID) +{ +#ifdef WITH_THREADS + boost::unique_lock<boost::shared_mutex> lock(m_cacheLock); +#endif + if (m_cacheTM.find(tID) == m_cacheTM.end()) return; + cacheMap::iterator it; + for(it = m_cacheTM.at(tID).begin(); it!=m_cacheTM.at(tID).end(); it++) { + (((*it).second).second)->clear(); + delete ((*it).second).second; + ((*it).second).first.reset(); + } + m_cacheTM.at(tID).clear(); + m_entries = 0; +} + + +void PhraseDictionaryCache::ExecuteDlt(std::map<std::string, std::string> dlt_meta, long tID) +{ + if (dlt_meta.find("cbtm") != dlt_meta.end()) { + Insert(dlt_meta["cbtm"], tID); + } + if (dlt_meta.find("cbtm-command") != dlt_meta.end()) { + Execute(dlt_meta["cbtm-command"], tID); + } + if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) { + Clear(); + } +} + +void PhraseDictionaryCache::Print() const +{ + VERBOSE(2,"PhraseDictionaryCache::Print()" << std::endl); +#ifdef WITH_THREADS + boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock); +#endif + for(sentCacheMap::const_iterator itr = m_cacheTM.begin(); itr!=m_cacheTM.end(); itr++) { + cacheMap::const_iterator it; + for(it = (itr->second).begin(); it!=(itr->second).end(); it++) { + std::string source = (it->first).ToString(); + TargetPhraseCollection::shared_ptr tpc = (it->second).first; + TargetPhraseCollection::iterator itr; + for(itr = tpc->begin(); itr != tpc->end(); itr++) { + std::string target = (*itr)->ToString(); + std::cout << source << " ||| " << target << std::endl; + } + source.clear(); + } + } +} + +}// end namespace diff --git a/moses/TranslationModel/PhraseDictionaryCache.h b/moses/TranslationModel/PhraseDictionaryCache.h new file mode 100644 index 000000000..b6e54b8fa --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryCache.h @@ -0,0 +1,176 @@ +/*********************************************************************** + Moses - statistical machine translation system + Copyright (C) 2006-2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#pragma once + +#ifndef moses_PhraseDictionaryCache_H +#define moses_PhraseDictionaryCache_H + +#include "moses/TypeDef.h" +#include "moses/TranslationModel/PhraseDictionary.h" +#include "moses/TranslationTask.h" + +#include <boost/tuple/tuple.hpp> +#include <boost/tuple/tuple_io.hpp> + +#ifdef WITH_THREADS +#include <boost/thread/shared_mutex.hpp> +#include <boost/thread/locks.hpp> +#endif + +#define CBTM_SCORE_TYPE_UNDEFINED (-1) +#define CBTM_SCORE_TYPE_HYPERBOLA 0 +#define CBTM_SCORE_TYPE_POWER 1 +#define CBTM_SCORE_TYPE_EXPONENTIAL 2 +#define CBTM_SCORE_TYPE_COSINE 3 +#define CBTM_SCORE_TYPE_HYPERBOLA_REWARD 10 +#define CBTM_SCORE_TYPE_POWER_REWARD 11 +#define CBTM_SCORE_TYPE_EXPONENTIAL_REWARD 12 +#define PI 3.14159265 + + +namespace Moses +{ +class ChartParser; +class ChartCellCollectionBase; +class ChartRuleLookupManager; +class TranslationTask; +class PhraseDictionary; + +/** Implementation of a Cache-based phrase table. + */ +class PhraseDictionaryCache : public PhraseDictionary +{ + + typedef std::pair<TargetPhraseCollection::shared_ptr, Scores*> TargetCollectionPair; + typedef boost::unordered_map<Phrase, TargetCollectionPair> cacheMap; + typedef std::map<long, cacheMap> sentCacheMap; + + // factored translation + std::vector<FactorType> m_inputFactorsVec, m_outputFactorsVec; + + // data structure for the cache + sentCacheMap m_cacheTM; + long m_sentences; + unsigned int m_numscorecomponent; + size_t m_score_type; //scoring type of the match + size_t m_entries; //total number of entries in the cache + float m_lower_score; //lower_bound_score for no match + bool m_constant; //flag for setting a non-decaying cache + std::string m_initfiles; // vector of files loaded in the initialization phase + std::string m_name; // internal name to identify this instance of the Cache-based phrase table + +#ifdef WITH_THREADS + //multiple readers - single writer lock + mutable boost::shared_mutex m_cacheLock; +#endif + + friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryCache&); + +public: + PhraseDictionaryCache(const std::string &line); + ~PhraseDictionaryCache(); + + inline const std::string GetName() { + return m_name; + }; + inline void SetName(const std::string name) { + m_name = name; + } + + static const PhraseDictionaryCache* Instance(const std::string& name) { + if (s_instance_map.find(name) == s_instance_map.end()) { + return NULL; + } + return s_instance_map[name]; + } + + static PhraseDictionaryCache* InstanceNonConst(const std::string& name) { + if (s_instance_map.find(name) == s_instance_map.end()) { + return NULL; + } + return s_instance_map[name]; + } + + + static const PhraseDictionaryCache& Instance() { + return *s_instance; + } + + static PhraseDictionaryCache& InstanceNonConst() { + return *s_instance; + } + + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, + Phrase const& src) const{ + GetTargetPhraseCollection(src, ttask->GetSource()->GetTranslationId()); + } + + + // for phrase-based model + void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; + + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection(const Phrase &src, long tID) const; + + void CleanUpAfterSentenceProcessing(const InputType& source); + // for phrase-based model +// virtual void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; + + // for syntax/hiero model (CKY+ decoding) + ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t); + + void SetParameter(const std::string& key, const std::string& value); + + void InitializeForInput(ttasksptr const& ttask); + + void Print() const; // prints the cache + void Clear(); // clears the cache + void Clear(long tID); // clears cache of a sentence + + void Insert(std::string &entries, long tID); + void Execute(std::string command, long tID); + void ExecuteDlt(std::map<std::string, std::string> dlt_meta, long tID); + +protected: + + static PhraseDictionaryCache *s_instance; + static std::map< const std::string, PhraseDictionaryCache * > s_instance_map; + + Scores Conv2VecFloats(std::string&); + void Insert(std::vector<std::string> entries, long tID); + + void Update(long tID, std::vector<std::string> entries); + void Update(long tID, std::string sourceString, std::string targetString, std::string ScoreString="", std::string waString=""); + void Update(long tID, Phrase p, TargetPhrase tp, Scores scores, std::string waString=""); + + void Execute(std::vector<std::string> commands, long tID); + void Execute_Single_Command(std::string command); + + + void SetPreComputedScores(const unsigned int numScoreComponent); + Scores GetPreComputedScores(const unsigned int age); + + TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const; +}; + +} // namespace Moses + +#endif /* moses_PhraseDictionaryCache_H_ */ diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp index 80bbd5b3d..e1f29b356 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp @@ -27,6 +27,7 @@ #include "moses/StaticData.h" #include "moses/TargetPhrase.h" + using namespace std; namespace Moses @@ -48,11 +49,14 @@ PhraseDictionaryDynamicCacheBased::PhraseDictionaryDynamicCacheBased(const std:: m_entries = 0; m_name = "default"; m_constant = false; + ReadParameters(); UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryDynamicCacheBased feature named " + m_name + " is allowed"); s_instance_map[m_name] = this; s_instance = this; //for back compatibility + vector<float> weight = StaticData::Instance().GetWeights(this); + m_numscorecomponent = weight.size(); } PhraseDictionaryDynamicCacheBased::~PhraseDictionaryDynamicCacheBased() @@ -66,8 +70,9 @@ void PhraseDictionaryDynamicCacheBased::Load(AllOptions::ptr const& opts) VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load()" << std::endl); SetFeaturesToApply(); - vector<float> weight = StaticData::Instance().GetWeights(this); - SetPreComputedScores(weight.size()); + SetPreComputedScores(1); + // weights.size() doesn't make sense at all.. why would you have multiple ages for a unique phrase pair?? +// SetPreComputedScores(m_numscorecomponent); Load(m_initfiles); } @@ -96,12 +101,12 @@ void PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file) //age |||| src_phr2 ||| trg_phr2 |||| src_phr3 ||| trg_phr3 |||| src_phr4 ||| trg_ph4 //.... //or - //age |||| src_phr ||| trg_phr ||| wa_align - //age |||| src_phr2 ||| trg_phr2 ||| wa_align2 |||| src_phr3 ||| trg_phr3 ||| wa_align3 |||| src_phr4 ||| trg_phr4 ||| wa_align4 + //age |||| src_phr ||| trg_phr ||| scores ||| wa_align + //age |||| src_phr2 ||| trg_phr2 ||| scores2 ||| wa_align2 |||| src_phr3 ||| trg_phr3 ||| scores3 ||| wa_align3 |||| src_phr4 ||| trg_phr4 ||| scores4 ||| wa_align4 //.... //each src_phr ad trg_phr are sequences of src and trg words, respectively, of any length //if provided, wa_align is the alignment between src_phr and trg_phr - // + //scores is the feature scores associated to the source phrase and the target phrase //there is no limit on the size of n // //entries can be repeated, but the last entry overwrites the previous @@ -141,6 +146,10 @@ void PhraseDictionaryDynamicCacheBased::SetParameter(const std::string& key, con m_name = Scan<std::string>(value); } else if (key == "cbtm-constant") { m_constant = Scan<bool>(value); + } else if (key == "input-factor") { + m_inputFactorsVec = Tokenize<FactorType>(value,","); + } else if (key == "output-factor") { + m_outputFactorsVec = Tokenize<FactorType>(value,","); } else { PhraseDictionary::SetParameter(key, value); } @@ -159,7 +168,7 @@ TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetP TargetPhraseCollection::shared_ptr tpc; cacheMap::const_iterator it = m_cacheTM.find(source); if(it != m_cacheTM.end()) { - tpc.reset(new TargetPhraseCollection(*(it->second).first)); + tpc.reset(new TargetPhraseCollection(*(boost::get<0>(it->second)))); std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin(); @@ -336,7 +345,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseStr //target targetPhrase.Clear(); VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); - targetPhrase.CreateFromString(Output, staticData.options()->output.factor_order, + targetPhrase.CreateFromString(Output, m_outputFactorsVec, targetPhraseString, /*factorDelimiter,*/ NULL); VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl); @@ -344,7 +353,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseStr //consistent across phrase table implementations sourcePhrase.Clear(); VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); - sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order, + sourcePhrase.CreateFromString(Input, m_inputFactorsVec, sourcePhraseString, /*factorDelimiter,*/ NULL); VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); ClearEntries(sourcePhrase, targetPhrase); @@ -367,9 +376,10 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp) // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap // and then add new entry - TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first; - AgeCollection* ac = TgtCollAgePair.second; + TargetCollectionPair TgtCollPair = it->second; + TargetPhraseCollection::shared_ptr tpc = boost::get<0>(TgtCollPair); + AgeCollection* ac = boost::get<1>(TgtCollPair); + Scores* sc = boost::get<2>(TgtCollPair); const Phrase* p_ptr = NULL; TargetPhrase* tp_ptr = NULL; bool found = false; @@ -391,16 +401,20 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp) tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection + // no need to delete scores here m_entries--; VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl); VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl); + VERBOSE(3,"sc size:|" << sc->size() << "|" << std::endl); VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl); } if (tpc->GetSize() == 0) { // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection ac->clear(); + sc->clear(); tpc.reset(); delete ac; + delete sc; m_cacheTM.erase(sp); } @@ -434,7 +448,7 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector<std::string> ent sourcePhrase.Clear(); VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl); - sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order, + sourcePhrase.CreateFromString(Input, m_inputFactorsVec, *it, /*factorDelimiter,*/ NULL); VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); @@ -452,16 +466,19 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) VERBOSE(3,"found:|" << sp << "|" << std::endl); //sp is found - TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first; - AgeCollection* ac = TgtCollAgePair.second; + TargetCollectionPair TgtCollPair = it->second; + TargetPhraseCollection::shared_ptr tpc = boost::get<0>(TgtCollPair); + AgeCollection* ac = boost::get<1>(TgtCollPair); + Scores* sc = boost::get<2>(TgtCollPair); m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection ac->clear(); + sc->clear(); tpc.reset(); delete ac; + delete sc; m_cacheTM.erase(sp); } else { //do nothing @@ -502,7 +519,11 @@ void PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries, VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); - if (pp.size() > 2) { + if (pp.size() > 3) { + VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); + VERBOSE(3,"pp[3]:|" << pp[3] << "|" << std::endl); + Update(pp[0], pp[1], ageString, pp[2], pp[3]); + } else if (pp.size() > 2){ VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); Update(pp[0], pp[1], ageString, pp[2]); } else { @@ -511,7 +532,18 @@ void PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries, } } -void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString) +Scores PhraseDictionaryDynamicCacheBased::Conv2VecFloats(std::string& s){ + std::vector<float> n; + if (s.empty()) + return n; + std::istringstream iss(s); + std::copy(std::istream_iterator<float>(iss), + std::istream_iterator<float>(), + std::back_inserter(n)); + return n; +} + +void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string scoreString, std::string waString) { VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl); const StaticData &staticData = StaticData::Instance(); @@ -523,11 +555,12 @@ void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, s ageString = Trim(ageString); int age = strtod(ageString.c_str(), &err_ind_temp); VERBOSE(3, "age:|" << age << "|" << std::endl); - + Scores scores = Conv2VecFloats(scoreString); //target targetPhrase.Clear(); + // change here for factored based CBTM VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); - targetPhrase.CreateFromString(Output, staticData.options()->output.factor_order, + targetPhrase.CreateFromString(Output, m_outputFactorsVec, targetPhraseString, /*factorDelimiter,*/ NULL); VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl); @@ -535,15 +568,15 @@ void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, s //consistent across phrase table implementations sourcePhrase.Clear(); VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); - sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order, sourcePhraseString, /*factorDelimiter,*/ NULL); + sourcePhrase.CreateFromString(Input, m_inputFactorsVec, sourcePhraseString, /*factorDelimiter,*/ NULL); VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl); - Update(sourcePhrase, targetPhrase, age, waString); + Update(sourcePhrase, targetPhrase, age, scores, waString); } -void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString) +void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, Scores scores, std::string waString) { VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString)" << std::endl); #ifdef WITH_THREADS @@ -559,9 +592,10 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap // and then add new entry - TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first; - AgeCollection* ac = TgtCollAgePair.second; + TargetCollectionPair TgtCollPair = it->second; + TargetPhraseCollection::shared_ptr tpc = boost::get<0>(TgtCollPair); + AgeCollection* ac = boost::get<1>(TgtCollPair); + Scores* sc = boost::get<2>(TgtCollPair); // const TargetPhrase* p_ptr = NULL; const Phrase* p_ptr = NULL; TargetPhrase* tp_ptr = NULL; @@ -579,18 +613,40 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a if (!found) { VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp)); - - targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); + Scores scoreVec; + scoreVec.push_back(GetPreComputedScores(age)[0]); + for (unsigned int i=0; i<scores.size(); i++){ + scoreVec.push_back(scores[i]); + } + if(scoreVec.size() != m_numScoreComponents){ + VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl); + VERBOSE(1, "Debugging: Press Enter to continue..." <<endl); + std::cin.ignore(); + } + targetPhrase->GetScoreBreakdown().Assign(this, scoreVec); +// targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); tpc->Add(targetPhrase.release()); tp_pos = tpc->GetSize()-1; ac->push_back(age); + sc = &scores; m_entries++; VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl); } else { - tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); + Scores scoreVec; + scoreVec.push_back(GetPreComputedScores(age)[0]); + for (unsigned int i=0; i<scores.size(); i++){ + scoreVec.push_back(scores[i]); + } + if(scoreVec.size() != m_numScoreComponents){ + VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl); + VERBOSE(1, "Debugging: Press Enter to continue..." <<endl); + std::cin.ignore(); + } + tp_ptr->GetScoreBreakdown().Assign(this, scoreVec); +// tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString); ac->at(tp_pos) = age; VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl); @@ -603,15 +659,28 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection); AgeCollection* ac = new AgeCollection(); - m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac))); + Scores* sc = new Scores(); + m_cacheTM.insert(make_pair(sp,boost::make_tuple(tpc,ac,sc))); //tp is not found std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp)); - targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); + // scoreVec is a composition of decay_score and the feature scores + Scores scoreVec; + scoreVec.push_back(GetPreComputedScores(age)[0]); + for (unsigned int i=0; i<scores.size(); i++){ + scoreVec.push_back(scores[i]); + } + if(scoreVec.size() != m_numScoreComponents){ + VERBOSE(1, "Scores do not match number of score components for phrase : "<< sp <<" ||| " << tp <<endl); + VERBOSE(1, "Debugging: Press Enter to continue..." <<endl); + std::cin.ignore(); + } + targetPhrase->GetScoreBreakdown().Assign(this, scoreVec); if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); tpc->Add(targetPhrase.release()); ac->push_back(age); + sc = &scores; m_entries++; VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl); } @@ -636,9 +705,10 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) VERBOSE(3,"found:|" << sp << "|" << std::endl); //sp is found - TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first; - AgeCollection* ac = TgtCollAgePair.second; + TargetCollectionPair TgtCollPair = it->second; + TargetPhraseCollection::shared_ptr tpc = boost::get<0>(TgtCollPair); + AgeCollection* ac = boost::get<1>(TgtCollPair); + Scores* sc = boost::get<2>(TgtCollPair); //loop in inverted order to allow a correct deletion of std::vectors tpc and ac for (int tp_pos = tpc->GetSize() - 1 ; tp_pos >= 0; tp_pos--) { @@ -652,18 +722,29 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) VERBOSE(3,"tp_age:|" << tp_age << "| TOO BIG" << std::endl); tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection + // no need to change scores here m_entries--; } else { VERBOSE(3,"tp_age:|" << tp_age << "| STILL GOOD" << std::endl); - tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age)); + // scoreVec is a composition of decay_score and the feature scores + size_t idx=0; + tp_ptr->GetScoreBreakdown().Assign(this, idx, GetPreComputedScores(tp_age)[0]); +// tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age)); ac->at(tp_pos) = tp_age; } } if (tpc->GetSize() == 0) { // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection - (((*it).second).second)->clear(); - delete ((*it).second).second; - ((*it).second).first.reset(); + // clear age collection + ac->clear(); + // clear score collection + sc->clear(); + // delete age collection + delete ac; + // delete score collection + delete sc; + // reset the target phrase collectio + tpc.reset(); m_cacheTM.erase(sp); } } else { @@ -707,9 +788,16 @@ void PhraseDictionaryDynamicCacheBased::Clear() #endif cacheMap::iterator it; for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { - (((*it).second).second)->clear(); - delete ((*it).second).second; - ((*it).second).first.reset(); + // clear age collection + (boost::get<1>((*it).second))->clear(); + // clear score collection + (boost::get<2>((*it).second))->clear(); + // delete age collection + delete boost::get<1>((*it).second); + // delete score collection + delete boost::get<2>((*it).second); + // reset the target phrase collection + (boost::get<0>(it->second)).reset(); } m_cacheTM.clear(); m_entries = 0; @@ -748,7 +836,7 @@ void PhraseDictionaryDynamicCacheBased::Print() const cacheMap::const_iterator it; for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { std::string source = (it->first).ToString(); - TargetPhraseCollection::shared_ptr tpc = (it->second).first; + TargetPhraseCollection::shared_ptr tpc = boost::get<0>(it->second); TargetPhraseCollection::iterator itr; for(itr = tpc->begin(); itr != tpc->end(); itr++) { std::string target = (*itr)->ToString(); diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h index 09527debc..4ca1aef94 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h @@ -25,6 +25,9 @@ #include "moses/TypeDef.h" #include "moses/TranslationModel/PhraseDictionary.h" +#include <boost/tuple/tuple.hpp> +#include <boost/tuple/tuple_io.hpp> + #ifdef WITH_THREADS #include <boost/thread/shared_mutex.hpp> #include <boost/thread/locks.hpp> @@ -52,14 +55,19 @@ class ChartRuleLookupManager; class PhraseDictionaryDynamicCacheBased : public PhraseDictionary { +// typedef std::vector<unsigned int> AgeCollection; typedef std::vector<unsigned int> AgeCollection; - typedef std::pair<TargetPhraseCollection::shared_ptr , AgeCollection*> TargetCollectionAgePair; - typedef std::map<Phrase, TargetCollectionAgePair> cacheMap; + typedef boost::tuple<TargetPhraseCollection::shared_ptr , AgeCollection*, Scores*> TargetCollectionPair; + typedef std::map<Phrase, TargetCollectionPair> cacheMap; + + // factored translation + std::vector<FactorType> m_inputFactorsVec, m_outputFactorsVec; // data structure for the cache cacheMap m_cacheTM; std::vector<Scores> precomputedScores; unsigned int m_maxAge; + unsigned int m_numscorecomponent; size_t m_score_type; //scoring type of the match size_t m_entries; //total number of entries in the cache float m_lower_score; //lower_bound_score for no match @@ -151,13 +159,14 @@ protected: static std::map< const std::string, PhraseDictionaryDynamicCacheBased * > s_instance_map; float decaying_score(const int age); // calculates the decay score given the age + Scores Conv2VecFloats(std::string&); void Insert(std::vector<std::string> entries); void Decay(); // traverse through the cache and decay each entry void Decay(Phrase p); // traverse through the cache and decay each entry for a given Phrase void Update(std::vector<std::string> entries, std::string ageString); - void Update(std::string sourceString, std::string targetString, std::string ageString, std::string waString=""); - void Update(Phrase p, TargetPhrase tp, int age, std::string waString=""); + void Update(std::string sourceString, std::string targetString, std::string ageString, std::string ScoreString="", std::string waString=""); + void Update(Phrase p, TargetPhrase tp, int age, Scores scores, std::string waString=""); void ClearEntries(std::vector<std::string> entries); void ClearEntries(std::string sourceString, std::string targetString); diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index 75df7443b..d0a44fb83 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -17,6 +17,8 @@ #include "moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h" #include "moses/Syntax/T2S/RuleMatcherSCFG.h" +#include "moses/TranslationModel/PhraseDictionaryCache.h" + #include "util/exception.hh" using namespace std; @@ -149,6 +151,13 @@ interpret_dlt() typedef std::map<std::string,std::string> dltmap_t; BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) { dltmap_t::const_iterator i = M.find("type"); + if (i->second == "cache") { + map<string, string>::const_iterator k = M.find("id"); + string id = k == M.end() ? "default" : k->second; + PhraseDictionaryCache* cache; + cache = PhraseDictionaryCache::InstanceNonConst(id); + if (cache) cache->ExecuteDlt(M, this->GetSource()->GetTranslationId()); + } if (i == M.end() || i->second != "adaptive-lm") continue; dltmap_t::const_iterator j = M.find("context-weights"); if (j == M.end()) continue; diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 767358e5c..d2118ad8f 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -433,6 +433,7 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params) } else { m_source.reset(new Sentence(m_options,0,m_source_string)); } + interpret_dlt(); } // end of Translationtask::parse_request() diff --git a/regtest b/regtest new file mode 160000 +Subproject 442ac5b2f7d1a29160fcf6b2140e0edc5e11024 |