Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorzens <zens@1f5c12ca-751b-0410-a591-d2e778427230>2006-07-22 01:09:23 +0400
committerzens <zens@1f5c12ca-751b-0410-a591-d2e778427230>2006-07-22 01:09:23 +0400
commit98ef97e803cf5e33af6d7091c0c04f00bab25381 (patch)
tree45e8b02697f0b443eaa4214845e5eef31d1f4d5e /moses
parenta857773715d3f2a1a7443b68ef88802e971762dc (diff)
- translation of confusion nets integrated (works only with binary phrase table)
- removed some obsolete stuff git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@250 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r--moses/src/ConfusionNet.cpp21
-rw-r--r--moses/src/ConfusionNet.h3
-rw-r--r--moses/src/CreateTranslationOptionCollection.cpp20
-rw-r--r--moses/src/CreateTranslationOptionCollection.h11
-rw-r--r--moses/src/Input.h4
-rw-r--r--moses/src/Makefile.am3
-rwxr-xr-xmoses/src/Manager.cpp2
-rwxr-xr-xmoses/src/Manager.h2
-rwxr-xr-xmoses/src/PhraseDictionary.cpp8
-rwxr-xr-xmoses/src/PhraseDictionary.h8
-rw-r--r--moses/src/PhraseDictionaryTree.cpp8
-rw-r--r--moses/src/PhraseDictionaryTree.h2
-rw-r--r--moses/src/PhraseDictionaryTreeAdaptor.cpp187
-rw-r--r--moses/src/PhraseDictionaryTreeAdaptor.h8
-rwxr-xr-xmoses/src/Sentence.cpp5
-rwxr-xr-xmoses/src/Sentence.h1
-rwxr-xr-xmoses/src/StaticData.cpp5
-rwxr-xr-xmoses/src/StaticData.h3
-rw-r--r--moses/src/TranslationOptionCollection.cpp12
-rwxr-xr-xmoses/src/TranslationOptionCollection.h18
-rw-r--r--moses/src/TranslationOptionCollectionConfusionNet.cpp105
-rw-r--r--moses/src/TranslationOptionCollectionConfusionNet.h8
-rw-r--r--moses/src/TranslationOptionCollectionText.cpp2
23 files changed, 284 insertions, 162 deletions
diff --git a/moses/src/ConfusionNet.cpp b/moses/src/ConfusionNet.cpp
index d5a153c25..235ae87a4 100644
--- a/moses/src/ConfusionNet.cpp
+++ b/moses/src/ConfusionNet.cpp
@@ -4,6 +4,8 @@
#include "FactorCollection.h"
#include "Util.h"
+#include "PhraseDictionaryTreeAdaptor.h"
+#include "TranslationOptionCollectionConfusionNet.h"
ConfusionNet::ConfusionNet(FactorCollection* p) : InputType(),m_factorCollection(p) {}
@@ -55,9 +57,6 @@ bool ConfusionNet::ReadFormat0(std::istream& in,const std::vector<FactorType>& f
}
else break;
}
- std::cerr<<"conf net read: "<<data.size()<<"\n";
-
-
return !data.empty();
}
bool ConfusionNet::ReadFormat1(std::istream& in,const std::vector<FactorType>& factorOrder) {
@@ -76,7 +75,7 @@ bool ConfusionNet::ReadFormat1(std::istream& in,const std::vector<FactorType>& f
data[i].resize(s);
for(size_t j=0;j<s;++j)
if(is>>word>>prob) {
- data[i][j].second=-log(prob);
+ data[i][j].second=log(prob);
if(data[i][j].second<0) {
std::cerr<<"WARN: neg costs: "<<data[i][j].second<<" -> set to 0\n";
data[i][j].second=0.0;}
@@ -111,3 +110,17 @@ std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
cn.Print(out);return out;
}
+TargetPhraseCollection const* ConfusionNet::CreateTargetPhraseCollection(PhraseDictionaryBase const& d,const WordsRange& r) const
+{
+ if(PhraseDictionaryTreeAdaptor const* pdict=dynamic_cast<PhraseDictionaryTreeAdaptor const*>(&d))
+ return pdict->GetTargetPhraseCollection(*this,r);
+ std::cerr<<"ERROR: wrong phrase dictionary type for confusion net decoding!\n"
+ "has to be PhraseDictionaryTreeAdaptor\n";
+ abort();
+}
+TranslationOptionCollection* ConfusionNet::CreateTranslationOptionCollection() const
+{
+ return new TranslationOptionCollectionConfusionNet(*this);
+}
+
+
diff --git a/moses/src/ConfusionNet.h b/moses/src/ConfusionNet.h
index 3581dcc62..8f27f4ccb 100644
--- a/moses/src/ConfusionNet.h
+++ b/moses/src/ConfusionNet.h
@@ -37,6 +37,9 @@ class ConfusionNet : public InputType {
const FactorArray& GetFactorArray(size_t pos) const;
+ TargetPhraseCollection const* CreateTargetPhraseCollection(PhraseDictionaryBase const& d,const WordsRange& r) const;
+ TranslationOptionCollection* CreateTranslationOptionCollection() const;
+
private:
bool ReadFormat0(std::istream&,const std::vector<FactorType>& factorOrder);
diff --git a/moses/src/CreateTranslationOptionCollection.cpp b/moses/src/CreateTranslationOptionCollection.cpp
deleted file mode 100644
index f76cecde3..000000000
--- a/moses/src/CreateTranslationOptionCollection.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// $Id$
-#include "CreateTranslationOptionCollection.h"
-#include "Sentence.h"
-#include "ConfusionNet.h"
-#include "TranslationOptionCollectionText.h"
-#include "TranslationOptionCollectionConfusionNet.h"
-
-TranslationOptionCollection* CreateTranslationOptionCollection(InputType const* src)
-{
- if(Sentence const * s=dynamic_cast<Sentence const*>(src))
- return new TranslationOptionCollectionText(*s);
- else if(ConfusionNet const* cn=dynamic_cast<ConfusionNet const*>(src))
- return new TranslationOptionCollectionConfusionNet(*cn);
- else
- {
- std::cerr<<"ERROR: unknown InputType in "<<__FILE__<<"\n";
- abort();
- }
-}
-
diff --git a/moses/src/CreateTranslationOptionCollection.h b/moses/src/CreateTranslationOptionCollection.h
deleted file mode 100644
index b191b00c8..000000000
--- a/moses/src/CreateTranslationOptionCollection.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// $Id$
-#ifndef CREATETRANSLATIONOPTIONCOLLECTION_H_
-#define CREATETRANSLATIONOPTIONCOLLECTION_H_
-
-class TranslationOptionCollection;
-class InputType;
-
-TranslationOptionCollection* CreateTranslationOptionCollection(InputType const* src);
-
-#endif
-
diff --git a/moses/src/Input.h b/moses/src/Input.h
index 194487673..bfb6a7a8c 100644
--- a/moses/src/Input.h
+++ b/moses/src/Input.h
@@ -9,7 +9,7 @@
class WordsRange;
class Factor;
class PhraseDictionaryBase;
-
+class TranslationOptionCollection;
// base class for sentences and confusion networks
class InputType
@@ -35,7 +35,7 @@ protected:
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection) =0;
virtual TargetPhraseCollection const* CreateTargetPhraseCollection(PhraseDictionaryBase const& d,const WordsRange& r) const=0;
-
+ virtual TranslationOptionCollection* CreateTranslationOptionCollection() const=0;
// these functions are not (yet) well-defined for confusion networks
virtual Phrase GetSubString(const WordsRange&) const =0;
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index d3bab277e..fc81592de 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -1,9 +1,8 @@
lib_LIBRARIES = libmoses.a
-AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES
+AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES
libmoses_a_SOURCES = \
Arc.cpp \
ConfusionNet.cpp \
- CreateTranslationOptionCollection.cpp \
Dictionary.cpp \
Factor.cpp \
FactorCollection.cpp \
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
index 3da7df431..409a38238 100755
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@@ -38,7 +38,7 @@ Manager::Manager(InputType const& source,
:m_source(source)
,m_hypoStack(source.GetSize() + 1)
,m_staticData(staticData)
-,m_possibleTranslations(toc) //dynamic_cast<Sentence const&>(source))
+,m_possibleTranslations(toc)
{
std::vector < HypothesisCollection >::iterator iterStack;
for (iterStack = m_hypoStack.begin() ; iterStack != m_hypoStack.end() ; ++iterStack)
diff --git a/moses/src/Manager.h b/moses/src/Manager.h
index 4b2fafa48..67f7e2883 100755
--- a/moses/src/Manager.h
+++ b/moses/src/Manager.h
@@ -29,7 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TranslationOption.h"
#include "HypothesisCollection.h"
#include "HypothesisCollectionIntermediate.h"
-#include "TranslationOptionCollectionText.h"
+#include "TranslationOptionCollection.h"
#include "LatticePathList.h"
#include "SquareMatrix.h"
#include "WordsBitmap.h"
diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp
index 24a72bdf9..b44f7a02f 100755
--- a/moses/src/PhraseDictionary.cpp
+++ b/moses/src/PhraseDictionary.cpp
@@ -30,6 +30,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Util.h"
#include "InputFileStream.h"
#include "StaticData.h"
+#include "Input.h"
+#include "WordsRange.h"
using namespace std;
@@ -39,6 +41,12 @@ PhraseDictionaryBase::PhraseDictionaryBase(size_t noScoreComponent)
{
}
PhraseDictionaryBase::~PhraseDictionaryBase() {}
+
+const TargetPhraseCollection *PhraseDictionaryBase::
+GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const
+{
+ return GetTargetPhraseCollection(src.GetSubString(range));
+}
void PhraseDictionary::Load(const std::vector<FactorType> &input
diff --git a/moses/src/PhraseDictionary.h b/moses/src/PhraseDictionary.h
index d24276a3a..9fea1cbfe 100755
--- a/moses/src/PhraseDictionary.h
+++ b/moses/src/PhraseDictionary.h
@@ -32,7 +32,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TargetPhraseCollection.h"
class StaticData;
-
+class InputType;
+class WordsRange;
class PhraseDictionaryBase : public Dictionary {
protected:
@@ -47,13 +48,14 @@ class PhraseDictionaryBase : public Dictionary {
return Translate;
}
+ virtual void InitializeForInput(InputType const&) {}
+
virtual void SetWeightTransModel(const std::vector<float> &weightT)=0;
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0;
+ virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;
virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0;
- // virtual const TargetPhraseCollection *FindEquivPhrase(const Phrase &source) const=0;
-
};
diff --git a/moses/src/PhraseDictionaryTree.cpp b/moses/src/PhraseDictionaryTree.cpp
index 1458a880e..c5b448867 100644
--- a/moses/src/PhraseDictionaryTree.cpp
+++ b/moses/src/PhraseDictionaryTree.cpp
@@ -530,6 +530,14 @@ PhraseDictionaryTree::GetTargetCandidates(PrefixPtr p,
imp->GetTargetCandidates(p,tcands);
imp->ConvertTgtCand(tcands,rv,m_outFactorType);
}
+void PhraseDictionaryTree::
+GetTargetCandidates(PrefixPtr p,
+ std::vector<StringTgtCand>& rv) const
+{
+ TgtCands tcands;
+ imp->GetTargetCandidates(p,tcands);
+ imp->ConvertTgtCand(tcands,rv);
+}
////////////////////////////////////////////////////////////
//
diff --git a/moses/src/PhraseDictionaryTree.h b/moses/src/PhraseDictionaryTree.h
index 3f4831a26..8f2b0a43c 100644
--- a/moses/src/PhraseDictionaryTree.h
+++ b/moses/src/PhraseDictionaryTree.h
@@ -93,6 +93,8 @@ public:
// requirement: the pointer has to evaluate to true
void GetTargetCandidates(PrefixPtr p,
std::vector<FactorTgtCand>& rv) const;
+ void GetTargetCandidates(PrefixPtr p,
+ std::vector<StringTgtCand>& rv) const;
// print target candidates for a given prefix pointer to a stream, mainly
// for debugging
diff --git a/moses/src/PhraseDictionaryTreeAdaptor.cpp b/moses/src/PhraseDictionaryTreeAdaptor.cpp
index efd6d6ce1..5ff1ebabe 100644
--- a/moses/src/PhraseDictionaryTreeAdaptor.cpp
+++ b/moses/src/PhraseDictionaryTreeAdaptor.cpp
@@ -5,6 +5,8 @@
#include "Phrase.h"
#include "FactorCollection.h"
#include "InputFileStream.h"
+#include "Input.h"
+#include "ConfusionNet.h"
inline bool existsFile(const char* filename) {
struct stat mystat;
@@ -25,10 +27,23 @@ struct PDTAimp {
PhraseDictionaryTreeAdaptor *m_obj;
int useCache;
+ typedef std::vector<TargetPhraseCollection const*> vTPC;
+ std::vector<vTPC> m_rangeCache;
+
+
PDTAimp(PhraseDictionaryTreeAdaptor *p)
: m_languageModels(0),m_weightWP(0.0),m_factorCollection(0),m_dict(0),
m_obj(p),useCache(1) {}
+ void Factors2String(FactorArray const& w,std::string& s) const
+ {
+ for(size_t j=0;j<m_input.size();++j)
+ {
+ if(s.size()) s+="|";
+ s+=w[m_input[j]]->ToString();
+ }
+ }
+
void CleanUp()
{
assert(m_dict);
@@ -36,6 +51,7 @@ struct PDTAimp {
for(size_t i=0;i<m_tgtColls.size();++i) delete m_tgtColls[i];
m_tgtColls.clear();
m_cache.clear();
+ m_rangeCache.clear();
}
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
@@ -75,11 +91,7 @@ struct PDTAimp {
std::vector<std::string> srcString(src.GetSize());
// convert source Phrase into vector of strings
for(size_t i=0;i<srcString.size();++i)
- for(size_t j=0;j<m_input.size();++j)
- {
- if(srcString[i].size()) srcString[i]+="|";
- srcString[i]+=src.GetFactor(i,m_input[j])->ToString();
- }
+ Factors2String(src.GetFactorArray(i),srcString[i]);
// get target phrases in string representation
std::vector<StringTgtCand> cands;
@@ -97,16 +109,7 @@ struct PDTAimp {
StringTgtCand::first_type const& factorStrings=cands[i].first;
StringTgtCand::second_type const& scoreVector=cands[i].second;
- for(size_t k=0;k<factorStrings.size();++k)
- {
- std::vector<std::string> factors=Tokenize(*factorStrings[k],"|");
- FactorArray& fa=targetPhrase.AddWord();
- for(size_t l=0;l<m_output.size();++l)
- fa[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
- }
-
- targetPhrase.SetScore(scoreVector, m_weights, *m_languageModels, m_weightWP);
-
+ CreateTargetPhrase(targetPhrase,factorStrings,scoreVector);
costs.push_back(std::make_pair(targetPhrase.GetFutureScore(),tCands.size()));
tCands.push_back(targetPhrase);
}
@@ -167,6 +170,145 @@ struct PDTAimp {
TRACE_ERR("reading bin ttable\n");
m_dict->Read(filePath);
}
+
+ typedef PhraseDictionaryTree::PrefixPtr PPtr;
+ typedef std::pair<size_t,size_t> Range;
+ struct State {
+ PPtr ptr;
+ Range range;
+ float score;
+
+ State() : range(0,0),score(0.0) {}
+ State(size_t b,size_t e,const PPtr& v,float sc=0.0) : ptr(v),range(b,e),score(sc) {}
+ State(Range const& r,const PPtr& v,float sc=0.0) : ptr(v),range(r),score(sc) {}
+
+ size_t begin() const {return range.first;}
+ size_t end() const {return range.second;}
+ float GetScore() const {return score;}
+
+ };
+
+ void CreateTargetPhrase(TargetPhrase& targetPhrase,StringTgtCand::first_type const& factorStrings,StringTgtCand::second_type const& scoreVector) const
+ {
+
+ for(size_t k=0;k<factorStrings.size();++k)
+ {
+ std::vector<std::string> factors=Tokenize(*factorStrings[k],"|");
+ FactorArray& fa=targetPhrase.AddWord();
+ for(size_t l=0;l<m_output.size();++l)
+ fa[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
+ }
+
+ targetPhrase.SetScore(scoreVector, m_weights, *m_languageModels, m_weightWP);
+
+ }
+
+
+ void CacheSource(ConfusionNet const& src)
+ {
+ assert(m_dict);
+ std::vector<State> stack;
+ for(size_t i=0;i<src.GetSize();++i) stack.push_back(State(i,i,m_dict->GetRoot()));
+
+ typedef StringTgtCand::first_type sPhrase;
+ typedef std::map<StringTgtCand::first_type,std::pair<float,StringTgtCand::second_type> > E2Costs;
+
+ std::map<Range,E2Costs> cov2cand;
+
+ while(!stack.empty())
+ {
+ State curr(stack.back());
+ stack.pop_back();
+
+ //std::cerr<<"processing state "<<curr<<" stack size: "<<stack.size()<<"\n";
+
+ assert(curr.end()<src.GetSize());
+ const ConfusionNet::Column &currCol=src[curr.end()];
+ for(size_t colidx=0;colidx<currCol.size();++colidx)
+ {
+ const Word& w=currCol[colidx].first;
+ std::string s;
+ Factors2String(w.GetFactorArray(),s);
+ PPtr nextP=m_dict->Extend(curr.ptr,s);
+
+ if(nextP)
+ {
+ Range newRange(curr.begin(),curr.end()+1);
+ if(newRange.second<src.GetSize())
+ stack.push_back(State(newRange,nextP,
+ curr.GetScore()+currCol[colidx].second));
+
+ std::vector<StringTgtCand> tcands;
+ m_dict->GetTargetCandidates(nextP,tcands);
+
+ if(tcands.size())
+ {
+ E2Costs& e2costs=cov2cand[newRange];
+
+ for(size_t i=0;i<tcands.size();++i)
+ {
+ float costs=CalcTranslationScore(tcands[i].second,m_weights);
+ costs-=tcands[i].first.size() * m_weightWP;
+ std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(tcands[i].first,std::make_pair(costs,tcands[i].second)));
+ if(!p.second)
+ {
+ if(p.first->second.first>costs)
+ p.first->second=std::make_pair(costs,tcands[i].second);
+ }
+ }
+
+
+ }
+ }
+ }
+ } // end while(!stack.empty())
+
+ m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
+
+
+ for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin();i!=cov2cand.end();++i)
+ {
+ assert(i->first.first<m_rangeCache.size());
+ assert(i->first.second>0);
+ assert(i->first.second-1<m_rangeCache[i->first.first].size());
+ assert(m_rangeCache[i->first.first][i->first.second-1]==0);
+
+ std::vector<TargetPhrase> tCands;tCands.reserve(i->second.size());
+ std::vector<std::pair<float,size_t> > costs;costs.reserve(i->second.size());
+
+ for(E2Costs::const_iterator j=i->second.begin();j!=i->second.end();++j)
+ {
+ TargetPhrase targetPhrase(Output, m_obj);
+ CreateTargetPhrase(targetPhrase,j->first,j->second.second);
+ costs.push_back(std::make_pair(targetPhrase.GetFutureScore(),tCands.size()));
+ tCands.push_back(targetPhrase);
+ }
+
+ // prune target candidates and sort according to score
+ std::vector<std::pair<float,size_t> >::iterator nth=costs.end();
+ if(m_obj->m_maxTargetPhrase>0 && costs.size()>m_obj->m_maxTargetPhrase) {
+ nth=costs.begin()+m_obj->m_maxTargetPhrase;
+ std::nth_element(costs.begin(),nth,costs.end(),std::greater<std::pair<float,size_t> >());
+ }
+ std::sort(costs.begin(),nth,std::greater<std::pair<float,size_t> >());
+
+ // convert into TargerPhraseCollection
+ TargetPhraseCollection *rv=new TargetPhraseCollection;
+ for(std::vector<std::pair<float,size_t> >::iterator it=costs.begin();it!=nth;++it)
+ rv->push_back(tCands[it->second]);
+
+ if(rv->empty())
+ delete rv;
+ else
+ {
+ m_rangeCache[i->first.first][i->first.second-1]=rv;
+ m_tgtColls.push_back(rv);
+ }
+
+ }
+
+
+ }
};
@@ -192,6 +334,13 @@ void PhraseDictionaryTreeAdaptor::CleanUp()
MyBase::CleanUp();
}
+void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
+{
+ // only required for confusion net
+ if(ConfusionNet const* cn=dynamic_cast<ConfusionNet const*>(&source))
+ imp->CacheSource(*cn);
+}
+
void PhraseDictionaryTreeAdaptor::Create(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, FactorCollection &factorCollection
@@ -224,6 +373,14 @@ PhraseDictionaryTreeAdaptor::GetTargetPhraseCollection(Phrase const &src) const
{
return imp->GetTargetPhraseCollection(src);
}
+TargetPhraseCollection const*
+PhraseDictionaryTreeAdaptor::GetTargetPhraseCollection(InputType const& src,WordsRange const &range) const
+{
+ if(imp->m_rangeCache.empty())
+ return imp->GetTargetPhraseCollection(src.GetSubString(range));
+ else
+ return imp->m_rangeCache[range.GetStartPos()][range.GetEndPos()];
+}
void PhraseDictionaryTreeAdaptor::
SetWeightTransModel(const std::vector<float> &weightT)
diff --git a/moses/src/PhraseDictionaryTreeAdaptor.h b/moses/src/PhraseDictionaryTreeAdaptor.h
index 7e182a6fe..6ecfa6327 100644
--- a/moses/src/PhraseDictionaryTreeAdaptor.h
+++ b/moses/src/PhraseDictionaryTreeAdaptor.h
@@ -8,6 +8,8 @@
class Phrase;
class PDTAimp;
+class WordsRange;
+class InputType;
class PhraseDictionaryTreeAdaptor : public PhraseDictionaryBase {
typedef PhraseDictionaryBase MyBase;
@@ -40,10 +42,14 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionaryBase {
// get translation candidates for a given source phrase
// returns null pointer if nothing found
TargetPhraseCollection const* GetTargetPhraseCollection(Phrase const &src) const;
+ TargetPhraseCollection const* GetTargetPhraseCollection(InputType const& src,WordsRange const & srcRange) const;
// clean up temporary memory etc.
void CleanUp();
-
+
+
+ void InitializeForInput(InputType const& source);
+
// change model scaling factors
void SetWeightTransModel(const std::vector<float> &weightT);
diff --git a/moses/src/Sentence.cpp b/moses/src/Sentence.cpp
index ce10bc957..4fe36abef 100755
--- a/moses/src/Sentence.cpp
+++ b/moses/src/Sentence.cpp
@@ -21,6 +21,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Sentence.h"
#include <boost/algorithm/string.hpp>
#include "PhraseDictionary.h"
+#include "TranslationOptionCollectionText.h"
int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection)
{
@@ -40,3 +41,7 @@ TargetPhraseCollection const* Sentence::CreateTargetPhraseCollection(PhraseDicti
Phrase src=GetSubString(r);
return d.GetTargetPhraseCollection(src);
}
+TranslationOptionCollection* Sentence::CreateTranslationOptionCollection() const
+{
+ return new TranslationOptionCollectionText(*this);
+}
diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h
index f8bb54985..b0fad09c3 100755
--- a/moses/src/Sentence.h
+++ b/moses/src/Sentence.h
@@ -61,5 +61,6 @@ class Sentence : public Phrase, public InputType
int Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection);
TargetPhraseCollection const* CreateTargetPhraseCollection(PhraseDictionaryBase const& d,const WordsRange& r) const;
+ TranslationOptionCollection* CreateTranslationOptionCollection() const;
};
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 3e2adf4c5..8471efd7e 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -502,3 +502,8 @@ void StaticData::CleanUpAfterSentenceProcessing()
for(size_t i=0;i<m_generationDictionary.size();++i)
m_generationDictionary[i]->CleanUp();
}
+void StaticData::InitializeBeforeSentenceProcessing(InputType const& in)
+{
+ for(size_t i=0;i<m_phraseDictionary.size();++i)
+ m_phraseDictionary[i]->InitializeForInput(in);
+}
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index a52e820a6..aabb239be 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -35,6 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "DecodeStep.h"
//#include "UnknownWordHandler.h"
+class InputType;
+
class StaticData
{
protected:
@@ -218,6 +220,7 @@ public:
void SetWeightGeneration(const std::vector<float> &weight);
int GetInputType() const {return m_inputType;}
+ void InitializeBeforeSentenceProcessing(InputType const&);
void CleanUpAfterSentenceProcessing();
};
diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp
index 3f7d7f4ca..a57037cf1 100644
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@@ -63,7 +63,10 @@ CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList
for (size_t endPos = startPos ; endPos < m_source.GetSize() ; ++endPos)
{
WordsRange wordsRange(startPos, endPos);
- const TargetPhraseCollection *phraseColl= m_source.CreateTargetPhraseCollection(dictionary,wordsRange);
+
+ const TargetPhraseCollection *phraseColl= dictionary.GetTargetPhraseCollection(m_source,wordsRange);
+ // const TargetPhraseCollection *phraseColl= m_source.CreateTargetPhraseCollection(dictionary,wordsRange);
+
mTPC[startPos][endPos]=phraseColl;
if (phraseColl != NULL)
{
@@ -96,9 +99,12 @@ CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList
}
}
+ ComputeFutureScores(verboseLevel);
+}
-
-
+void TranslationOptionCollection::ComputeFutureScores(size_t verboseLevel)
+{
+
#if 1
// create future score matrix
diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h
index f82c381a4..c27c18288 100755
--- a/moses/src/TranslationOptionCollection.h
+++ b/moses/src/TranslationOptionCollection.h
@@ -46,14 +46,14 @@ protected:
public:
virtual ~TranslationOptionCollection();
- void CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList
- , const LMList &languageModels
- , const LMList &allLM
- , FactorCollection &factorCollection
- , float weightWordPenalty
- , bool dropUnknown
- , size_t verboseLevel);
-
+ virtual void CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList
+ , const LMList &languageModels
+ , const LMList &allLM
+ , FactorCollection &factorCollection
+ , float weightWordPenalty
+ , bool dropUnknown
+ , size_t verboseLevel);
+
// get length/size of source input
size_t GetSize() const;
@@ -77,6 +77,8 @@ public:
bool dropUnknown,
float weightWordPenalty
) =0;
+
+ void ComputeFutureScores(size_t verboseLevel);
};
inline std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll)
diff --git a/moses/src/TranslationOptionCollectionConfusionNet.cpp b/moses/src/TranslationOptionCollectionConfusionNet.cpp
index 0224b7435..8ce8f45f4 100644
--- a/moses/src/TranslationOptionCollectionConfusionNet.cpp
+++ b/moses/src/TranslationOptionCollectionConfusionNet.cpp
@@ -9,102 +9,29 @@
TranslationOptionCollectionConfusionNet::
TranslationOptionCollectionConfusionNet(const ConfusionNet &input)
: TranslationOptionCollection(input) {}
-#if 0
-size_t TranslationOptionCollectionConfusionNet::GetSourceSize() const
-{
- return m_source.GetSize();
-}
void TranslationOptionCollectionConfusionNet::
CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList
- , const LMList & //languageModels
- , const LMList & //allLM
- , FactorCollection & //factorCollection
- , float //weightWordPenalty
- , bool //dropUnknown
- , size_t verboseLevel)
+ , const LMList &languageModels
+ , const LMList &allLM
+ , FactorCollection &factorCollection
+ , float weightWordPenalty
+ , bool dropUnknown
+ , size_t verboseLevel)
{
- // loop over all substrings of the source sentence, look them up
- // in the phraseDictionary (which is the- possibly filtered-- phrase
- // table loaded on initialization), generate TranslationOption objects
- // for all phrases
- //
- // possible optimization- don't consider phrases longer than the longest
- // phrase in the PhraseDictionary?
-
- Dictionary *dictionary = decodeStepList.front().GetDictionaryPtr();
-
- typedef std::vector<TargetPhraseCollection const*> vTPC;
- std::vector<vTPC> mTPC(m_source.GetSize(),vTPC(m_source.GetSize(),0));
- size_t maxLen=0;
+#if 0
+ ConfusionNet const& source(dynamic_cast<ConfusionNet&>(m_source));
+ assert(dynamic_cast<PhraseDictionaryTreeAdaptor const*>(decodeStepList.front().GetDictionaryPtr()));
+ PhraseDictionaryTreeAdaptor const& pdict
+ =dynamic_cast<PhraseDictionaryTreeAdaptor const&>(*decodeStepList.front().GetDictionaryPtr());
- for (size_t startPos = 0 ; startPos < m_source.GetSize() ; ++startPos)
- for (size_t endPos = startPos ; endPos < m_source.GetSize() ; ++endPos)
- {
- WordsRange wordsRange(startPos, endPos);
- const TargetPhraseCollection *phraseColl=
- CreateTargetPhraseCollection(dictionary,&m_source,wordsRange);
- mTPC[startPos][endPos]=phraseColl;
- if (phraseColl != NULL)
- {
- maxLen=std::max(endPos-startPos+1,maxLen);
- if (verboseLevel >= 3)
- std::cout << "[" << m_source.GetSubString(wordsRange) << "; "
- << startPos << "-" << endPos << "]\n";
-
- for(TargetPhraseCollection::const_iterator iterTargetPhrase=phraseColl->begin();
- iterTargetPhrase != phraseColl->end() ; ++iterTargetPhrase)
- {
- TranslationOption transOpt(wordsRange, *iterTargetPhrase);
- push_back(transOpt);
- if (verboseLevel >= 3) std::cout << "\t" << transOpt << "\n";
- }
- if (verboseLevel >= 3) std::cout << std::endl;
- }
- else if (wordsRange.GetWordsCount() == 1)
- {
- // drop unk
- m_initialCoverage.SetValue(startPos, startPos,1);
- }
- }
-
-
-
-
- // create future score matrix
- // for each span in the source phrase (denoted by start and end)
+ std::vector<State> stack;
+ for(size_t i=0;i<src.GetSize();++i) stack.push_back(State(i,i,data.GetRoot()));
-
- // init future costs
- for(size_t endPos=0; endPos<m_source.GetSize(); ++endPos)
- for(size_t startPos=0; startPos<=endPos; ++startPos)
- {
- float currScore=-std::numeric_limits<float>().infinity();
- if(TargetPhraseCollection const *p=mTPC[startPos][endPos])
- for(TargetPhraseCollection::const_iterator i=p->begin();i!=p->end();++i)
- currScore=std::max(currScore,i->GetFutureScore());
- m_futureScore.SetScore(startPos,endPos,currScore);
- }
-
- // solve DP recursion, similar to CYK parsing
- for(size_t len=1;len<maxLen;++len)
- for(size_t startPos=0; startPos<m_source.GetSize()-len; ++startPos)
- {
- size_t endPos=startPos+len;
- float currScore=m_futureScore.GetScore(startPos,endPos);
- for(size_t k=startPos;k<endPos;++k)
- currScore=std::max(currScore,
- m_futureScore.GetScore(startPos,k)
- + m_futureScore.GetScore(k+1,endPos));
- m_futureScore.SetScore(startPos,endPos,currScore);
- if(verboseLevel > 0)
- std::cout<<"future cost from "<<startPos<<" to "<<endPos<<" is "
- <<m_futureScore.GetScore(startPos,endPos)<<std::endl;
- }
-
-
-}
+#else
+ return TranslationOptionCollection::CreateTranslationOptions(decodeStepList,languageModels,allLM,factorCollection,weightWordPenalty,dropUnknown,verboseLevel);
#endif
+}
diff --git a/moses/src/TranslationOptionCollectionConfusionNet.h b/moses/src/TranslationOptionCollectionConfusionNet.h
index af512d953..88d0d761d 100644
--- a/moses/src/TranslationOptionCollectionConfusionNet.h
+++ b/moses/src/TranslationOptionCollectionConfusionNet.h
@@ -17,7 +17,13 @@ class TranslationOptionCollectionConfusionNet : public TranslationOptionCollecti
,float //weightWordPenalty
) {return 0;}
-
+ void CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList
+ , const LMList &languageModels
+ , const LMList &allLM
+ , FactorCollection &factorCollection
+ , float weightWordPenalty
+ , bool dropUnknown
+ , size_t verboseLevel);
};
#endif
diff --git a/moses/src/TranslationOptionCollectionText.cpp b/moses/src/TranslationOptionCollectionText.cpp
index 5c09e7bc6..e2a61f3dd 100644
--- a/moses/src/TranslationOptionCollectionText.cpp
+++ b/moses/src/TranslationOptionCollectionText.cpp
@@ -65,7 +65,7 @@ int TranslationOptionCollectionText::HandleUnkownWord(PhraseDictionaryBase& phra
targetPhraseOrig.SetScore(allLM, weightWordPenalty);
phraseDictionary.AddEquivPhrase(sourcePhrase, targetPhraseOrig);
- const TargetPhraseCollection *phraseColl = phraseDictionary.GetTargetPhraseCollection(sourcePhrase); //FindEquivPhrase(sourcePhrase);
+ const TargetPhraseCollection *phraseColl = phraseDictionary.GetTargetPhraseCollection(sourcePhrase);
assert(phraseColl);
const TargetPhrase &targetPhrase = *phraseColl->begin();