Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2017-07-22 00:40:42 +0300
committerGitHub <noreply@github.com>2017-07-22 00:40:42 +0300
commit11a2ef4af57b86ed1bff22d604f41ec410b21383 (patch)
treea7d9bcca6234705f547f3ba387d2b18675a6882f
parente32b8f580533e2b1cdb5b8496a8658f277409a69 (diff)
parentf07e60aece8c0bbd144b2e4028f2ed5114a521a5 (diff)
Merge pull request #184 from ebay-hlt/moses-4.0
1. Cache based translation models which supports multiple scores and factors ...
-rw-r--r--.gitmodules7
-rw-r--r--mert/CderScorer.h4
-rw-r--r--mert/StatisticsBasedScorer.h9
-rw-r--r--moses/FF/Factory.cpp2
-rw-r--r--moses/Parameter.cpp3
-rw-r--r--moses/ScoreComponentCollection.cpp16
-rw-r--r--moses/ScoreComponentCollection.h2
-rw-r--r--moses/TranslationModel/PhraseDictionaryCache.cpp424
-rw-r--r--moses/TranslationModel/PhraseDictionaryCache.h176
-rw-r--r--moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp170
-rw-r--r--moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h17
-rw-r--r--moses/TranslationTask.cpp9
-rw-r--r--moses/server/TranslationRequest.cpp1
m---------regtest0
14 files changed, 788 insertions, 52 deletions
diff --git a/.gitmodules b/.gitmodules
index 90a9b30ba..8a1ca284f 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,10 @@
+[submodule "regtest"]
+ path = regtest
+ url = https://github.com/ebay-hlt/moses-regression-tests
[submodule "contrib/arrow-pipelines/python/pcl"]
path = contrib/arrow-pipelines/python/pcl
url = https://github.com/ianj-als/pcl.git
[submodule "contrib/omtc/omtc"]
path = contrib/omtc/omtc
url = https://github.com/ianj-als/omtc.git
-[submodule "regtest"]
- path = regtest
- url = https://github.com/moses-smt/moses-regression-tests
+
diff --git a/mert/CderScorer.h b/mert/CderScorer.h
index 68fa81857..47c03fe86 100644
--- a/mert/CderScorer.h
+++ b/mert/CderScorer.h
@@ -31,6 +31,10 @@ public:
virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const;
+ virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const{
+ return totals[1];
+ }
+
private:
bool m_allowed_long_jumps;
diff --git a/mert/StatisticsBasedScorer.h b/mert/StatisticsBasedScorer.h
index ba45634cc..ebfa4586a 100644
--- a/mert/StatisticsBasedScorer.h
+++ b/mert/StatisticsBasedScorer.h
@@ -44,10 +44,11 @@ protected:
*/
virtual statscore_t calculateScore(const std::vector<ScoreStatsType>& totals) const = 0;
- virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const {
- UTIL_THROW(util::Exception, "getReferenceLength not implemented for this scorer type.");
- return 0;
- }
+ virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const{}
+// {
+// UTIL_THROW(util::Exception, "getReferenceLength not implemented for this scorer type.");
+// return 0;
+// }
// regularisation
RegularisationType m_regularization_type;
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 398d6593c..00dcac27d 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -10,6 +10,7 @@
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
+#include "moses/TranslationModel/PhraseDictionaryCache.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
@@ -243,6 +244,7 @@ FeatureRegistry::FeatureRegistry()
// MOSES_FNAME(PhraseDictionaryDynSuffixArray);
MOSES_FNAME(PhraseDictionaryTransliteration);
MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
+ MOSES_FNAME(PhraseDictionaryCache);
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
MOSES_FNAME(ProbingPT);
MOSES_FNAME(PhraseDictionaryMemoryPerSentence);
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index a1430ff61..8ce322ae2 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -793,6 +793,9 @@ ConvertWeightArgsPhraseModel(const string &oldWeightName)
case 15: // DCacheBased:
ptType = "PhraseDictionaryDynamicCacheBased";
break;
+ case 16: // CachePT:
+ ptType = "PhraseDictionaryCache";
+ break;
default:
break;
}
diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp
index 7ce03c608..b1ef3747f 100644
--- a/moses/ScoreComponentCollection.cpp
+++ b/moses/ScoreComponentCollection.cpp
@@ -257,6 +257,22 @@ Assign(const FeatureFunction* sp, const std::vector<float>& scores)
}
}
+void
+ScoreComponentCollection::
+Assign(const FeatureFunction* sp, size_t idx, float sc)
+{
+ size_t numScores = sp->GetNumScoreComponents();
+ size_t offset = sp->GetIndex();
+
+ if (idx >= numScores) {
+ UTIL_THROW(util::Exception, "Feature function "
+ << sp->GetScoreProducerDescription() << " specified index "
+ << idx << " dense scores or weights. Actually has "
+ << numScores);
+ }
+
+ m_scores[idx + offset] = sc;
+}
void ScoreComponentCollection::InvertDenseFeatures(const FeatureFunction* sp)
{
diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h
index 9b7010746..62720512c 100644
--- a/moses/ScoreComponentCollection.h
+++ b/moses/ScoreComponentCollection.h
@@ -289,6 +289,8 @@ public:
void Assign(const FeatureFunction* sp, const std::vector<float>& scores);
+ void Assign(const FeatureFunction* sp, size_t idx, float sc);
+
//! Special version Assign(ScoreProducer, vector<float>)
//! to add the score from a single ScoreProducer that produces
//! a single value
diff --git a/moses/TranslationModel/PhraseDictionaryCache.cpp b/moses/TranslationModel/PhraseDictionaryCache.cpp
new file mode 100644
index 000000000..604a42ee4
--- /dev/null
+++ b/moses/TranslationModel/PhraseDictionaryCache.cpp
@@ -0,0 +1,424 @@
+// vim:tabstop=2
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+#include "util/exception.hh"
+
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/TranslationModel/PhraseDictionaryCache.h"
+#include "moses/FactorCollection.h"
+#include "moses/InputFileStream.h"
+#include "moses/StaticData.h"
+#include "moses/TargetPhrase.h"
+
+
+using namespace std;
+
+namespace Moses
+{
+std::map< const std::string, PhraseDictionaryCache * > PhraseDictionaryCache::s_instance_map;
+PhraseDictionaryCache *PhraseDictionaryCache::s_instance = NULL;
+
+//! contructor
+PhraseDictionaryCache::PhraseDictionaryCache(const std::string &line)
+ : PhraseDictionary(line, true)
+{
+ std::cerr << "Initializing PhraseDictionaryCache feature..." << std::endl;
+
+ //disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0)
+ m_maxCacheSize = 0;
+
+ m_entries = 0;
+ m_name = "default";
+ m_constant = false;
+
+ ReadParameters();
+
+ UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryCache feature named " + m_name + " is allowed");
+ s_instance_map[m_name] = this;
+ s_instance = this; //for back compatibility
+ vector<float> weight = StaticData::Instance().GetWeights(this);
+ m_numscorecomponent = weight.size();
+ m_sentences=0;
+}
+
+PhraseDictionaryCache::~PhraseDictionaryCache()
+{
+ Clear();
+}
+
+void PhraseDictionaryCache::SetParameter(const std::string& key, const std::string& value)
+{
+ VERBOSE(2, "PhraseDictionaryCache::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl);
+
+ if (key == "cache-name") {
+ m_name = Scan<std::string>(value);
+ } else if (key == "input-factor") {
+ m_inputFactorsVec = Tokenize<FactorType>(value,",");
+ } else if (key == "output-factor") {
+ m_outputFactorsVec = Tokenize<FactorType>(value,",");
+ } else {
+ PhraseDictionary::SetParameter(key, value);
+ }
+}
+
+void PhraseDictionaryCache::CleanUpAfterSentenceProcessing(const InputType& source) {
+ Clear(source.GetTranslationId());
+}
+
+void PhraseDictionaryCache::InitializeForInput(ttasksptr const& ttask)
+{
+#ifdef WITH_THREADS
+ boost::unique_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ long tID = ttask->GetSource()->GetTranslationId();
+ TargetPhraseCollection::shared_ptr tpc;
+ if (m_cacheTM.find(tID) == m_cacheTM.end()) return;
+ for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) {
+ tpc.reset(new TargetPhraseCollection(*(it->second).first));
+ std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
+
+ while (it2 != tpc->end()) {
+ ((TargetPhrase*) *it2)->EvaluateInIsolation(it->first, GetFeaturesToApply());
+ it2++;
+ }
+ }
+ if (tpc) {
+ tpc->NthElement(m_tableLimit); // sort the phrases for the decoder
+ }
+}
+
+void PhraseDictionaryCache::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+ InputPathList::const_iterator iter;
+ for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
+ InputPath &inputPath = **iter;
+ long tID = inputPath.ttask->GetSource()->GetTranslationId();
+ if (m_cacheTM.find(tID) == m_cacheTM.end()) continue;
+ const Phrase &source = inputPath.GetPhrase();
+ TargetPhraseCollection::shared_ptr tpc;
+ for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) {
+ if (source.Compare(it->first)!=0) continue;
+ tpc.reset(new TargetPhraseCollection(*(it->second).first));
+ inputPath.SetTargetPhrases(*this, tpc, NULL);
+ }
+ }
+}
+
+TargetPhraseCollection::shared_ptr PhraseDictionaryCache::GetTargetPhraseCollection(const Phrase &source, long tID) const
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+ TargetPhraseCollection::shared_ptr tpc;
+
+ if(m_cacheTM.find(tID) == m_cacheTM.end()) return tpc;
+
+ cacheMap::const_iterator it = m_cacheTM.at(tID).find(source);
+ if(it != m_cacheTM.at(tID).end()) {
+ tpc.reset(new TargetPhraseCollection(*(it->second).first));
+
+ std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
+
+ while (it2 != tpc->end()) {
+ ((TargetPhrase*) *it2)->EvaluateInIsolation(source, GetFeaturesToApply());
+ it2++;
+ }
+ }
+ if (tpc) {
+ tpc->NthElement(m_tableLimit); // sort the phrases for the decoder
+ }
+
+ return tpc;
+}
+
+ChartRuleLookupManager* PhraseDictionaryCache::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/)
+{
+ UTIL_THROW(util::Exception, "Not implemented for Chart Decoder");
+}
+
+// friend
+ostream& operator<<(ostream& out, const PhraseDictionaryCache& phraseDict)
+{
+ return out;
+}
+
+void PhraseDictionaryCache::Insert(std::string &entries, long tID)
+{
+ if (entries != "") {
+ VERBOSE(3,"entries:|" << entries << "|" << " tID | " << tID << std::endl);
+ std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
+ VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
+ Insert(elements, tID);
+ }
+}
+
+void PhraseDictionaryCache::Insert(std::vector<std::string> entries, long tID)
+{
+ VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
+ Update(tID, entries);
+ IFVERBOSE(3) Print();
+}
+
+
+void PhraseDictionaryCache::Update(long tID, std::vector<std::string> entries)
+{
+ std::vector<std::string> pp;
+
+ std::vector<std::string>::iterator it;
+ for(it = entries.begin(); it!=entries.end(); it++) {
+ pp.clear();
+ pp = TokenizeMultiCharSeparator((*it), "|||");
+ VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
+ VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
+
+ if (pp.size() > 3) {
+ VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
+ VERBOSE(3,"pp[3]:|" << pp[3] << "|" << std::endl);
+ Update(tID,pp[0], pp[1], pp[2], pp[3]);
+ } else if (pp.size() > 2){
+ VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
+ Update(tID,pp[0], pp[1], pp[2]);
+ } else {
+ Update(tID,pp[0], pp[1]);
+ }
+ }
+}
+
+Scores PhraseDictionaryCache::Conv2VecFloats(std::string& s){
+ std::vector<float> n;
+ if (s.empty())
+ return n;
+ std::istringstream iss(s);
+ std::copy(std::istream_iterator<float>(iss),
+ std::istream_iterator<float>(),
+ std::back_inserter(n));
+ return n;
+}
+
+void PhraseDictionaryCache::Update(long tID, std::string sourcePhraseString, std::string targetPhraseString, std::string scoreString, std::string waString)
+{
+ const StaticData &staticData = StaticData::Instance();
+ Phrase sourcePhrase(0);
+ TargetPhrase targetPhrase(0);
+
+ char *err_ind_temp;
+ Scores scores = Conv2VecFloats(scoreString);
+ //target
+ targetPhrase.Clear();
+ // change here for factored based CBTM
+ VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
+ targetPhrase.CreateFromString(Output, m_outputFactorsVec,
+ targetPhraseString, /*factorDelimiter,*/ NULL);
+ VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl);
+
+ //TODO: Would be better to reuse source phrases, but ownership has to be
+ //consistent across phrase table implementations
+ sourcePhrase.Clear();
+ VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
+ sourcePhrase.CreateFromString(Input, m_inputFactorsVec, sourcePhraseString, /*factorDelimiter,*/ NULL);
+ VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
+
+ if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl);
+
+ Update(tID, sourcePhrase, targetPhrase, scores, waString);
+}
+
+void PhraseDictionaryCache::Update(long tID, Phrase sp, TargetPhrase tp, Scores scores, std::string waString)
+{
+ VERBOSE(3,"PhraseDictionaryCache::Update(Phrase sp, TargetPhrase tp, Scores scores, std::string waString)" << std::endl);
+#ifdef WITH_THREADS
+ boost::unique_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| word-alignment |" << waString << "|" << std::endl);
+ // if there is no cache for the sentence tID, create one.
+ cacheMap::const_iterator it = m_cacheTM[tID].find(sp);
+ VERBOSE(3,"sp:|" << sp << "|" << std::endl);
+ if(it!=m_cacheTM.at(tID).end()) {
+ VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
+ // sp is found
+
+ TargetCollectionPair TgtCollPair = it->second;
+ TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first;
+ Scores* sc = TgtCollPair.second;
+ const Phrase* p_ptr = NULL;
+ TargetPhrase* tp_ptr = NULL;
+ bool found = false;
+ size_t tp_pos=0;
+ while (!found && tp_pos < tpc->GetSize()) {
+ tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
+ p_ptr = (const TargetPhrase*) tp_ptr;
+ if ((Phrase) tp == *p_ptr) {
+ found = true;
+ continue;
+ }
+ tp_pos++;
+ }
+ if (!found) {
+ VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
+ std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
+ Scores scoreVec;
+ for (unsigned int i=0; i<scores.size(); i++){
+ scoreVec.push_back(scores[i]);
+ }
+ if(scoreVec.size() != m_numScoreComponents){
+ VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl);
+ VERBOSE(1, "I am ignoring this..." <<endl);
+// std::cin.ignore();
+ }
+ targetPhrase->GetScoreBreakdown().Assign(this, scoreVec);
+ if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
+
+ tpc->Add(targetPhrase.release());
+
+ tp_pos = tpc->GetSize()-1;
+ sc = &scores;
+ m_entries++;
+ VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl);
+ } else {
+ Scores scoreVec;
+ for (unsigned int i=0; i<scores.size(); i++){
+ scoreVec.push_back(scores[i]);
+ }
+ if(scoreVec.size() != m_numScoreComponents){
+ VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl);
+ VERBOSE(1, "I am ignoring this..." <<endl);
+// std::cin.ignore();
+ }
+ tp_ptr->GetScoreBreakdown().Assign(this, scoreVec);
+ if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString);
+ VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl);
+ }
+ } else {
+ VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
+ // p is not found
+ // create target collection
+
+ TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
+ Scores* sc = new Scores();
+ m_cacheTM[tID].insert(make_pair(sp,std::make_pair(tpc,sc)));
+
+ //tp is not found
+ std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
+ // scoreVec is a composition of decay_score and the feature scores
+ Scores scoreVec;
+ for (unsigned int i=0; i<scores.size(); i++){
+ scoreVec.push_back(scores[i]);
+ }
+ if(scoreVec.size() != m_numScoreComponents){
+ VERBOSE(1, "Scores do not match number of score components for phrase : "<< sp <<" ||| " << tp <<endl);
+ VERBOSE(1, "I am ignoring this..." <<endl);
+// std::cin.ignore();
+ }
+ targetPhrase->GetScoreBreakdown().Assign(this, scoreVec);
+ if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
+
+ tpc->Add(targetPhrase.release());
+ sc = &scores;
+ m_entries++;
+ VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl);
+ }
+}
+
+void PhraseDictionaryCache::Execute(std::string command, long tID)
+{
+ VERBOSE(2,"command:|" << command << "|" << std::endl);
+ std::vector<std::string> commands = Tokenize(command, "||");
+ Execute(commands, tID);
+}
+
+void PhraseDictionaryCache::Execute(std::vector<std::string> commands, long tID)
+{
+ for (size_t j=0; j<commands.size(); j++) {
+ Execute_Single_Command(commands[j]);
+ }
+ IFVERBOSE(2) Print();
+}
+
+void PhraseDictionaryCache::Execute_Single_Command(std::string command)
+{
+ if (command == "clear") {
+ VERBOSE(2,"PhraseDictionaryCache Execute command:|"<< command << "|. Cache cleared." << std::endl);
+ Clear();
+ } else {
+ VERBOSE(2,"PhraseDictionaryCache Execute command:|"<< command << "| is unknown. Skipped." << std::endl);
+ }
+}
+
+void PhraseDictionaryCache::Clear(){
+ for(sentCacheMap::iterator it=m_cacheTM.begin(); it!=m_cacheTM.end(); it++){
+ Clear(it->first);
+ }
+}
+
+void PhraseDictionaryCache::Clear(long tID)
+{
+#ifdef WITH_THREADS
+ boost::unique_lock<boost::shared_mutex> lock(m_cacheLock);
+#endif
+ if (m_cacheTM.find(tID) == m_cacheTM.end()) return;
+ cacheMap::iterator it;
+ for(it = m_cacheTM.at(tID).begin(); it!=m_cacheTM.at(tID).end(); it++) {
+ (((*it).second).second)->clear();
+ delete ((*it).second).second;
+ ((*it).second).first.reset();
+ }
+ m_cacheTM.at(tID).clear();
+ m_entries = 0;
+}
+
+
+void PhraseDictionaryCache::ExecuteDlt(std::map<std::string, std::string> dlt_meta, long tID)
+{
+ if (dlt_meta.find("cbtm") != dlt_meta.end()) {
+ Insert(dlt_meta["cbtm"], tID);
+ }
+ if (dlt_meta.find("cbtm-command") != dlt_meta.end()) {
+ Execute(dlt_meta["cbtm-command"], tID);
+ }
+ if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) {
+ Clear();
+ }
+}
+
+void PhraseDictionaryCache::Print() const
+{
+ VERBOSE(2,"PhraseDictionaryCache::Print()" << std::endl);
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
+#endif
+ for(sentCacheMap::const_iterator itr = m_cacheTM.begin(); itr!=m_cacheTM.end(); itr++) {
+ cacheMap::const_iterator it;
+ for(it = (itr->second).begin(); it!=(itr->second).end(); it++) {
+ std::string source = (it->first).ToString();
+ TargetPhraseCollection::shared_ptr tpc = (it->second).first;
+ TargetPhraseCollection::iterator itr;
+ for(itr = tpc->begin(); itr != tpc->end(); itr++) {
+ std::string target = (*itr)->ToString();
+ std::cout << source << " ||| " << target << std::endl;
+ }
+ source.clear();
+ }
+ }
+}
+
+}// end namespace
diff --git a/moses/TranslationModel/PhraseDictionaryCache.h b/moses/TranslationModel/PhraseDictionaryCache.h
new file mode 100644
index 000000000..b6e54b8fa
--- /dev/null
+++ b/moses/TranslationModel/PhraseDictionaryCache.h
@@ -0,0 +1,176 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#pragma once
+
+#ifndef moses_PhraseDictionaryCache_H
+#define moses_PhraseDictionaryCache_H
+
+#include "moses/TypeDef.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/TranslationTask.h"
+
+#include <boost/tuple/tuple.hpp>
+#include <boost/tuple/tuple_io.hpp>
+
+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#include <boost/thread/locks.hpp>
+#endif
+
+#define CBTM_SCORE_TYPE_UNDEFINED (-1)
+#define CBTM_SCORE_TYPE_HYPERBOLA 0
+#define CBTM_SCORE_TYPE_POWER 1
+#define CBTM_SCORE_TYPE_EXPONENTIAL 2
+#define CBTM_SCORE_TYPE_COSINE 3
+#define CBTM_SCORE_TYPE_HYPERBOLA_REWARD 10
+#define CBTM_SCORE_TYPE_POWER_REWARD 11
+#define CBTM_SCORE_TYPE_EXPONENTIAL_REWARD 12
+#define PI 3.14159265
+
+
+namespace Moses
+{
+class ChartParser;
+class ChartCellCollectionBase;
+class ChartRuleLookupManager;
+class TranslationTask;
+class PhraseDictionary;
+
+/** Implementation of a Cache-based phrase table.
+ */
+class PhraseDictionaryCache : public PhraseDictionary
+{
+
+ typedef std::pair<TargetPhraseCollection::shared_ptr, Scores*> TargetCollectionPair;
+ typedef boost::unordered_map<Phrase, TargetCollectionPair> cacheMap;
+ typedef std::map<long, cacheMap> sentCacheMap;
+
+ // factored translation
+ std::vector<FactorType> m_inputFactorsVec, m_outputFactorsVec;
+
+ // data structure for the cache
+ sentCacheMap m_cacheTM;
+ long m_sentences;
+ unsigned int m_numscorecomponent;
+ size_t m_score_type; //scoring type of the match
+ size_t m_entries; //total number of entries in the cache
+ float m_lower_score; //lower_bound_score for no match
+ bool m_constant; //flag for setting a non-decaying cache
+ std::string m_initfiles; // vector of files loaded in the initialization phase
+ std::string m_name; // internal name to identify this instance of the Cache-based phrase table
+
+#ifdef WITH_THREADS
+ //multiple readers - single writer lock
+ mutable boost::shared_mutex m_cacheLock;
+#endif
+
+ friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryCache&);
+
+public:
+ PhraseDictionaryCache(const std::string &line);
+ ~PhraseDictionaryCache();
+
+ inline const std::string GetName() {
+ return m_name;
+ };
+ inline void SetName(const std::string name) {
+ m_name = name;
+ }
+
+ static const PhraseDictionaryCache* Instance(const std::string& name) {
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
+ return s_instance_map[name];
+ }
+
+ static PhraseDictionaryCache* InstanceNonConst(const std::string& name) {
+ if (s_instance_map.find(name) == s_instance_map.end()) {
+ return NULL;
+ }
+ return s_instance_map[name];
+ }
+
+
+ static const PhraseDictionaryCache& Instance() {
+ return *s_instance;
+ }
+
+ static PhraseDictionaryCache& InstanceNonConst() {
+ return *s_instance;
+ }
+
+ TargetPhraseCollection::shared_ptr
+ GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask,
+ Phrase const& src) const{
+ GetTargetPhraseCollection(src, ttask->GetSource()->GetTranslationId());
+ }
+
+
+ // for phrase-based model
+ void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
+
+ TargetPhraseCollection::shared_ptr
+ GetTargetPhraseCollection(const Phrase &src, long tID) const;
+
+ void CleanUpAfterSentenceProcessing(const InputType& source);
+ // for phrase-based model
+// virtual void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
+
+ // for syntax/hiero model (CKY+ decoding)
+ ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t);
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+ void InitializeForInput(ttasksptr const& ttask);
+
+ void Print() const; // prints the cache
+ void Clear(); // clears the cache
+ void Clear(long tID); // clears cache of a sentence
+
+ void Insert(std::string &entries, long tID);
+ void Execute(std::string command, long tID);
+ void ExecuteDlt(std::map<std::string, std::string> dlt_meta, long tID);
+
+protected:
+
+ static PhraseDictionaryCache *s_instance;
+ static std::map< const std::string, PhraseDictionaryCache * > s_instance_map;
+
+ Scores Conv2VecFloats(std::string&);
+ void Insert(std::vector<std::string> entries, long tID);
+
+ void Update(long tID, std::vector<std::string> entries);
+ void Update(long tID, std::string sourceString, std::string targetString, std::string ScoreString="", std::string waString="");
+ void Update(long tID, Phrase p, TargetPhrase tp, Scores scores, std::string waString="");
+
+ void Execute(std::vector<std::string> commands, long tID);
+ void Execute_Single_Command(std::string command);
+
+
+ void SetPreComputedScores(const unsigned int numScoreComponent);
+ Scores GetPreComputedScores(const unsigned int age);
+
+ TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const;
+};
+
+} // namespace Moses
+
+#endif /* moses_PhraseDictionaryCache_H_ */
diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp
index 80bbd5b3d..e1f29b356 100644
--- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp
+++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp
@@ -27,6 +27,7 @@
#include "moses/StaticData.h"
#include "moses/TargetPhrase.h"
+
using namespace std;
namespace Moses
@@ -48,11 +49,14 @@ PhraseDictionaryDynamicCacheBased::PhraseDictionaryDynamicCacheBased(const std::
m_entries = 0;
m_name = "default";
m_constant = false;
+
ReadParameters();
UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryDynamicCacheBased feature named " + m_name + " is allowed");
s_instance_map[m_name] = this;
s_instance = this; //for back compatibility
+ vector<float> weight = StaticData::Instance().GetWeights(this);
+ m_numscorecomponent = weight.size();
}
PhraseDictionaryDynamicCacheBased::~PhraseDictionaryDynamicCacheBased()
@@ -66,8 +70,9 @@ void PhraseDictionaryDynamicCacheBased::Load(AllOptions::ptr const& opts)
VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load()" << std::endl);
SetFeaturesToApply();
- vector<float> weight = StaticData::Instance().GetWeights(this);
- SetPreComputedScores(weight.size());
+ SetPreComputedScores(1);
+ // weights.size() doesn't make sense at all.. why would you have multiple ages for a unique phrase pair??
+// SetPreComputedScores(m_numscorecomponent);
Load(m_initfiles);
}
@@ -96,12 +101,12 @@ void PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file)
//age |||| src_phr2 ||| trg_phr2 |||| src_phr3 ||| trg_phr3 |||| src_phr4 ||| trg_ph4
//....
//or
- //age |||| src_phr ||| trg_phr ||| wa_align
- //age |||| src_phr2 ||| trg_phr2 ||| wa_align2 |||| src_phr3 ||| trg_phr3 ||| wa_align3 |||| src_phr4 ||| trg_phr4 ||| wa_align4
+ //age |||| src_phr ||| trg_phr ||| scores ||| wa_align
+ //age |||| src_phr2 ||| trg_phr2 ||| scores2 ||| wa_align2 |||| src_phr3 ||| trg_phr3 ||| scores3 ||| wa_align3 |||| src_phr4 ||| trg_phr4 ||| scores4 ||| wa_align4
//....
//each src_phr ad trg_phr are sequences of src and trg words, respectively, of any length
//if provided, wa_align is the alignment between src_phr and trg_phr
- //
+ //scores is the feature scores associated to the source phrase and the target phrase
//there is no limit on the size of n
//
//entries can be repeated, but the last entry overwrites the previous
@@ -141,6 +146,10 @@ void PhraseDictionaryDynamicCacheBased::SetParameter(const std::string& key, con
m_name = Scan<std::string>(value);
} else if (key == "cbtm-constant") {
m_constant = Scan<bool>(value);
+ } else if (key == "input-factor") {
+ m_inputFactorsVec = Tokenize<FactorType>(value,",");
+ } else if (key == "output-factor") {
+ m_outputFactorsVec = Tokenize<FactorType>(value,",");
} else {
PhraseDictionary::SetParameter(key, value);
}
@@ -159,7 +168,7 @@ TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetP
TargetPhraseCollection::shared_ptr tpc;
cacheMap::const_iterator it = m_cacheTM.find(source);
if(it != m_cacheTM.end()) {
- tpc.reset(new TargetPhraseCollection(*(it->second).first));
+ tpc.reset(new TargetPhraseCollection(*(boost::get<0>(it->second))));
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
@@ -336,7 +345,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseStr
//target
targetPhrase.Clear();
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
- targetPhrase.CreateFromString(Output, staticData.options()->output.factor_order,
+ targetPhrase.CreateFromString(Output, m_outputFactorsVec,
targetPhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl);
@@ -344,7 +353,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseStr
//consistent across phrase table implementations
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
- sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order,
+ sourcePhrase.CreateFromString(Input, m_inputFactorsVec,
sourcePhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
ClearEntries(sourcePhrase, targetPhrase);
@@ -367,9 +376,10 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
// here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap
// and then add new entry
- TargetCollectionAgePair TgtCollAgePair = it->second;
- TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
- AgeCollection* ac = TgtCollAgePair.second;
+ TargetCollectionPair TgtCollPair = it->second;
+ TargetPhraseCollection::shared_ptr tpc = boost::get<0>(TgtCollPair);
+ AgeCollection* ac = boost::get<1>(TgtCollPair);
+ Scores* sc = boost::get<2>(TgtCollPair);
const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL;
bool found = false;
@@ -391,16 +401,20 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection
+ // no need to delete scores here
m_entries--;
VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl);
VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl);
+ VERBOSE(3,"sc size:|" << sc->size() << "|" << std::endl);
VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl);
}
if (tpc->GetSize() == 0) {
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear();
+ sc->clear();
tpc.reset();
delete ac;
+ delete sc;
m_cacheTM.erase(sp);
}
@@ -434,7 +448,7 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector<std::string> ent
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl);
- sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order,
+ sourcePhrase.CreateFromString(Input, m_inputFactorsVec,
*it, /*factorDelimiter,*/ NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
@@ -452,16 +466,19 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp)
VERBOSE(3,"found:|" << sp << "|" << std::endl);
//sp is found
- TargetCollectionAgePair TgtCollAgePair = it->second;
- TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
- AgeCollection* ac = TgtCollAgePair.second;
+ TargetCollectionPair TgtCollPair = it->second;
+ TargetPhraseCollection::shared_ptr tpc = boost::get<0>(TgtCollPair);
+ AgeCollection* ac = boost::get<1>(TgtCollPair);
+ Scores* sc = boost::get<2>(TgtCollPair);
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear();
+ sc->clear();
tpc.reset();
delete ac;
+ delete sc;
m_cacheTM.erase(sp);
} else {
//do nothing
@@ -502,7 +519,11 @@ void PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries,
VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
- if (pp.size() > 2) {
+ if (pp.size() > 3) {
+ VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
+ VERBOSE(3,"pp[3]:|" << pp[3] << "|" << std::endl);
+ Update(pp[0], pp[1], ageString, pp[2], pp[3]);
+ } else if (pp.size() > 2){
VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
Update(pp[0], pp[1], ageString, pp[2]);
} else {
@@ -511,7 +532,18 @@ void PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries,
}
}
-void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)
+Scores PhraseDictionaryDynamicCacheBased::Conv2VecFloats(std::string& s){
+ std::vector<float> n;
+ if (s.empty())
+ return n;
+ std::istringstream iss(s);
+ std::copy(std::istream_iterator<float>(iss),
+ std::istream_iterator<float>(),
+ std::back_inserter(n));
+ return n;
+}
+
+void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string scoreString, std::string waString)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl);
const StaticData &staticData = StaticData::Instance();
@@ -523,11 +555,12 @@ void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, s
ageString = Trim(ageString);
int age = strtod(ageString.c_str(), &err_ind_temp);
VERBOSE(3, "age:|" << age << "|" << std::endl);
-
+ Scores scores = Conv2VecFloats(scoreString);
//target
targetPhrase.Clear();
+ // change here for factored based CBTM
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
- targetPhrase.CreateFromString(Output, staticData.options()->output.factor_order,
+ targetPhrase.CreateFromString(Output, m_outputFactorsVec,
targetPhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl);
@@ -535,15 +568,15 @@ void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, s
//consistent across phrase table implementations
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
- sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order, sourcePhraseString, /*factorDelimiter,*/ NULL);
+ sourcePhrase.CreateFromString(Input, m_inputFactorsVec, sourcePhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl);
- Update(sourcePhrase, targetPhrase, age, waString);
+ Update(sourcePhrase, targetPhrase, age, scores, waString);
}
-void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString)
+void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, Scores scores, std::string waString)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString)" << std::endl);
#ifdef WITH_THREADS
@@ -559,9 +592,10 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
// here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap
// and then add new entry
- TargetCollectionAgePair TgtCollAgePair = it->second;
- TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
- AgeCollection* ac = TgtCollAgePair.second;
+ TargetCollectionPair TgtCollPair = it->second;
+ TargetPhraseCollection::shared_ptr tpc = boost::get<0>(TgtCollPair);
+ AgeCollection* ac = boost::get<1>(TgtCollPair);
+ Scores* sc = boost::get<2>(TgtCollPair);
// const TargetPhrase* p_ptr = NULL;
const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL;
@@ -579,18 +613,40 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
if (!found) {
VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
-
- targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
+ Scores scoreVec;
+ scoreVec.push_back(GetPreComputedScores(age)[0]);
+ for (unsigned int i=0; i<scores.size(); i++){
+ scoreVec.push_back(scores[i]);
+ }
+ if(scoreVec.size() != m_numScoreComponents){
+ VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl);
+ VERBOSE(1, "Debugging: Press Enter to continue..." <<endl);
+ std::cin.ignore();
+ }
+ targetPhrase->GetScoreBreakdown().Assign(this, scoreVec);
+// targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
tpc->Add(targetPhrase.release());
tp_pos = tpc->GetSize()-1;
ac->push_back(age);
+ sc = &scores;
m_entries++;
VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl);
} else {
- tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
+ Scores scoreVec;
+ scoreVec.push_back(GetPreComputedScores(age)[0]);
+ for (unsigned int i=0; i<scores.size(); i++){
+ scoreVec.push_back(scores[i]);
+ }
+ if(scoreVec.size() != m_numScoreComponents){
+ VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl);
+ VERBOSE(1, "Debugging: Press Enter to continue..." <<endl);
+ std::cin.ignore();
+ }
+ tp_ptr->GetScoreBreakdown().Assign(this, scoreVec);
+// tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString);
ac->at(tp_pos) = age;
VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl);
@@ -603,15 +659,28 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
AgeCollection* ac = new AgeCollection();
- m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac)));
+ Scores* sc = new Scores();
+ m_cacheTM.insert(make_pair(sp,boost::make_tuple(tpc,ac,sc)));
//tp is not found
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
- targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
+ // scoreVec is a composition of decay_score and the feature scores
+ Scores scoreVec;
+ scoreVec.push_back(GetPreComputedScores(age)[0]);
+ for (unsigned int i=0; i<scores.size(); i++){
+ scoreVec.push_back(scores[i]);
+ }
+ if(scoreVec.size() != m_numScoreComponents){
+ VERBOSE(1, "Scores do not match number of score components for phrase : "<< sp <<" ||| " << tp <<endl);
+ VERBOSE(1, "Debugging: Press Enter to continue..." <<endl);
+ std::cin.ignore();
+ }
+ targetPhrase->GetScoreBreakdown().Assign(this, scoreVec);
if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
tpc->Add(targetPhrase.release());
ac->push_back(age);
+ sc = &scores;
m_entries++;
VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl);
}
@@ -636,9 +705,10 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
VERBOSE(3,"found:|" << sp << "|" << std::endl);
//sp is found
- TargetCollectionAgePair TgtCollAgePair = it->second;
- TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
- AgeCollection* ac = TgtCollAgePair.second;
+ TargetCollectionPair TgtCollPair = it->second;
+ TargetPhraseCollection::shared_ptr tpc = boost::get<0>(TgtCollPair);
+ AgeCollection* ac = boost::get<1>(TgtCollPair);
+ Scores* sc = boost::get<2>(TgtCollPair);
//loop in inverted order to allow a correct deletion of std::vectors tpc and ac
for (int tp_pos = tpc->GetSize() - 1 ; tp_pos >= 0; tp_pos--) {
@@ -652,18 +722,29 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
VERBOSE(3,"tp_age:|" << tp_age << "| TOO BIG" << std::endl);
tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection
+ // no need to change scores here
m_entries--;
} else {
VERBOSE(3,"tp_age:|" << tp_age << "| STILL GOOD" << std::endl);
- tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age));
+ // scoreVec is a composition of decay_score and the feature scores
+ size_t idx=0;
+ tp_ptr->GetScoreBreakdown().Assign(this, idx, GetPreComputedScores(tp_age)[0]);
+// tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age));
ac->at(tp_pos) = tp_age;
}
}
if (tpc->GetSize() == 0) {
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
- (((*it).second).second)->clear();
- delete ((*it).second).second;
- ((*it).second).first.reset();
+ // clear age collection
+ ac->clear();
+ // clear score collection
+ sc->clear();
+ // delete age collection
+ delete ac;
+ // delete score collection
+ delete sc;
+ // reset the target phrase collectio
+ tpc.reset();
m_cacheTM.erase(sp);
}
} else {
@@ -707,9 +788,16 @@ void PhraseDictionaryDynamicCacheBased::Clear()
#endif
cacheMap::iterator it;
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
- (((*it).second).second)->clear();
- delete ((*it).second).second;
- ((*it).second).first.reset();
+ // clear age collection
+ (boost::get<1>((*it).second))->clear();
+ // clear score collection
+ (boost::get<2>((*it).second))->clear();
+ // delete age collection
+ delete boost::get<1>((*it).second);
+ // delete score collection
+ delete boost::get<2>((*it).second);
+ // reset the target phrase collection
+ (boost::get<0>(it->second)).reset();
}
m_cacheTM.clear();
m_entries = 0;
@@ -748,7 +836,7 @@ void PhraseDictionaryDynamicCacheBased::Print() const
cacheMap::const_iterator it;
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
std::string source = (it->first).ToString();
- TargetPhraseCollection::shared_ptr tpc = (it->second).first;
+ TargetPhraseCollection::shared_ptr tpc = boost::get<0>(it->second);
TargetPhraseCollection::iterator itr;
for(itr = tpc->begin(); itr != tpc->end(); itr++) {
std::string target = (*itr)->ToString();
diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h
index 09527debc..4ca1aef94 100644
--- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h
+++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h
@@ -25,6 +25,9 @@
#include "moses/TypeDef.h"
#include "moses/TranslationModel/PhraseDictionary.h"
+#include <boost/tuple/tuple.hpp>
+#include <boost/tuple/tuple_io.hpp>
+
#ifdef WITH_THREADS
#include <boost/thread/shared_mutex.hpp>
#include <boost/thread/locks.hpp>
@@ -52,14 +55,19 @@ class ChartRuleLookupManager;
class PhraseDictionaryDynamicCacheBased : public PhraseDictionary
{
+// typedef std::vector<unsigned int> AgeCollection;
typedef std::vector<unsigned int> AgeCollection;
- typedef std::pair<TargetPhraseCollection::shared_ptr , AgeCollection*> TargetCollectionAgePair;
- typedef std::map<Phrase, TargetCollectionAgePair> cacheMap;
+ typedef boost::tuple<TargetPhraseCollection::shared_ptr , AgeCollection*, Scores*> TargetCollectionPair;
+ typedef std::map<Phrase, TargetCollectionPair> cacheMap;
+
+ // factored translation
+ std::vector<FactorType> m_inputFactorsVec, m_outputFactorsVec;
// data structure for the cache
cacheMap m_cacheTM;
std::vector<Scores> precomputedScores;
unsigned int m_maxAge;
+ unsigned int m_numscorecomponent;
size_t m_score_type; //scoring type of the match
size_t m_entries; //total number of entries in the cache
float m_lower_score; //lower_bound_score for no match
@@ -151,13 +159,14 @@ protected:
static std::map< const std::string, PhraseDictionaryDynamicCacheBased * > s_instance_map;
float decaying_score(const int age); // calculates the decay score given the age
+ Scores Conv2VecFloats(std::string&);
void Insert(std::vector<std::string> entries);
void Decay(); // traverse through the cache and decay each entry
void Decay(Phrase p); // traverse through the cache and decay each entry for a given Phrase
void Update(std::vector<std::string> entries, std::string ageString);
- void Update(std::string sourceString, std::string targetString, std::string ageString, std::string waString="");
- void Update(Phrase p, TargetPhrase tp, int age, std::string waString="");
+ void Update(std::string sourceString, std::string targetString, std::string ageString, std::string ScoreString="", std::string waString="");
+ void Update(Phrase p, TargetPhrase tp, int age, Scores scores, std::string waString="");
void ClearEntries(std::vector<std::string> entries);
void ClearEntries(std::string sourceString, std::string targetString);
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 75df7443b..d0a44fb83 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -17,6 +17,8 @@
#include "moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h"
#include "moses/Syntax/T2S/RuleMatcherSCFG.h"
+#include "moses/TranslationModel/PhraseDictionaryCache.h"
+
#include "util/exception.hh"
using namespace std;
@@ -149,6 +151,13 @@ interpret_dlt()
typedef std::map<std::string,std::string> dltmap_t;
BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) {
dltmap_t::const_iterator i = M.find("type");
+ if (i->second == "cache") {
+ map<string, string>::const_iterator k = M.find("id");
+ string id = k == M.end() ? "default" : k->second;
+ PhraseDictionaryCache* cache;
+ cache = PhraseDictionaryCache::InstanceNonConst(id);
+ if (cache) cache->ExecuteDlt(M, this->GetSource()->GetTranslationId());
+ }
if (i == M.end() || i->second != "adaptive-lm") continue;
dltmap_t::const_iterator j = M.find("context-weights");
if (j == M.end()) continue;
diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp
index 767358e5c..d2118ad8f 100644
--- a/moses/server/TranslationRequest.cpp
+++ b/moses/server/TranslationRequest.cpp
@@ -433,6 +433,7 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
} else {
m_source.reset(new Sentence(m_options,0,m_source_string));
}
+ interpret_dlt();
} // end of Translationtask::parse_request()
diff --git a/regtest b/regtest
new file mode 160000
+Subproject 442ac5b2f7d1a29160fcf6b2140e0edc5e11024