Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2016-09-30 18:01:54 +0300
committerHieu Hoang <hieuhoang@gmail.com>2016-09-30 18:01:54 +0300
commit792b25cbbd404d49298c25a43d7fdb4c4cdd481f (patch)
tree78baf2cfdeb9f1d18519a7488d144fc417c70c04
parent9bd727f1d537e25ed44bf0a42ebc6fb653b34d25 (diff)
redo import of PhraseDecoder and TargetPhraseCollectionCache. Use custom classes for phrases and target phrases
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp81
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h13
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp9
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h25
4 files changed, 63 insertions, 65 deletions
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp
index 8930bf94b..4ff4375cd 100644
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp
@@ -22,9 +22,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <deque>
#include "PhraseDecoder.h"
-#include "moses/StaticData.h"
-
-#include "../../ManagerBase.h"
+#include "../../System.h"
+#include "../../SubPhrase.h"
using namespace std;
@@ -248,12 +247,12 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(
}
TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
- const ManagerBase &mgr,
- TargetPhraseVectorPtr tpv,
- BitWrapper<> &encodedBitStream,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval)
+ const ManagerBase &mgr,
+ TargetPhraseVectorPtr tpv,
+ BitWrapper<> &encodedBitStream,
+ const Phrase<Word> &sourcePhrase,
+ bool topLevel,
+ bool eval)
{
const System &system = mgr.system;
FactorCollection &vocab = system.GetVocab();
@@ -274,19 +273,23 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
unsigned phraseStopSymbol = 0;
AlignPoint alignStopSymbol(-1, -1);
- TPCompact tpCompact;
+ std::vector<float> scores;
+ std::set<AlignPointSizeT> alignment;
enum DecodeState { New, Symbol, Score, Alignment, Add } state = New;
size_t srcSize = sourcePhrase.GetSize();
+ TPCompact* targetPhrase = NULL;
while(encodedBitStream.TellFromEnd()) {
if(state == New) {
// Creating new TargetPhrase on the heap
- tpCompact.words.clear();
- tpCompact.alignment.clear();
- tpCompact.scores.clear();
+ tpv->push_back(TPCompact());
+ targetPhrase = &tpv->back();
+
+ alignment.clear();
+ scores.clear();
state = Symbol;
}
@@ -312,12 +315,12 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t trgPos = tpCompact.words.size();
- tpCompact.alignment.insert(AlignPoint(srcPos, trgPos));
+ size_t trgPos = targetPhrase->words.size();
+ alignment.insert(AlignPoint(srcPos, trgPos));
}
} else if(type == 3) {
size_t rank = DecodeREncSymbol3(symbol);
- size_t srcPos = tpCompact.words.size();
+ size_t srcPos = targetPhrase->words.size();
if(srcPos >= sourceWords.size())
return TargetPhraseVectorPtr();
@@ -325,13 +328,13 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
if(m_phraseDictionary.m_useAlignmentInfo) {
size_t trgPos = srcPos;
- tpCompact.alignment.insert(AlignPoint(srcPos, trgPos));
+ alignment.insert(AlignPoint(srcPos, trgPos));
}
}
Word word;
word.CreateFromString(vocab, system, wordString);
- tpCompact.words.push_back(word);
+ targetPhrase->words.push_back(word);
} else if(m_coding == PREnc) {
// if the symbol is just a word
if(GetPREncType(symbol) == 1) {
@@ -339,7 +342,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
Word word;
word.CreateFromString(vocab, system, GetTargetSymbol(decodedSymbol));
- tpCompact.words.push_back(word);
+ targetPhrase->words.push_back(word);
}
// if the symbol is a subphrase pointer
else {
@@ -347,7 +350,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
int right = DecodePREncSymbol2Right(symbol);
unsigned rank = DecodePREncSymbol2Rank(symbol);
- int srcStart = left + tpCompact.words.size();
+ int srcStart = left + targetPhrase->words.size();
int srcEnd = srcSize - right - 1;
// false positive consistency check
@@ -374,34 +377,34 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
// false positive consistency check
if(subTpv != NULL && rank < subTpv->size()) {
// insert the subphrase into the main target phrase
- const TPCompact& subTp = subTpv->at(rank);
+ TPCompact& subTp = subTpv->at(rank);
if(m_phraseDictionary.m_useAlignmentInfo) {
// reconstruct the alignment data based on the alignment of the subphrase
for(std::set<AlignPointSizeT>::const_iterator it = subTp.alignment.begin();
it != subTp.alignment.end(); it++) {
- tpCompact.alignment.insert(AlignPointSizeT(srcStart + it->first,
- tpCompact.words.size() + it->second));
+ alignment.insert(AlignPointSizeT(srcStart + it->first,
+ targetPhrase->words.size() + it->second));
}
}
- for (size_t i = 0; i < subTp.words.size(); ++i) {
- tpCompact.words.push_back(subTp.words[i]);
- }
+ std::copy(subTp.words.begin(), subTp.words.end(), std::back_inserter(targetPhrase->words));
} else
return TargetPhraseVectorPtr();
}
} else {
Word word;
word.CreateFromString(vocab, system, GetTargetSymbol(symbol));
- tpCompact.words.push_back(word);
+ targetPhrase->words.push_back(word);
}
}
} else if(state == Score) {
- size_t idx = m_multipleScoreTrees ? tpCompact.scores.size() : 0;
+ size_t idx = m_multipleScoreTrees ? scores.size() : 0;
float score = m_scoreTrees[idx]->Read(encodedBitStream);
- tpCompact.scores.push_back(score);
+ scores.push_back(score);
+
+ if(scores.size() == m_numScoreComponent) {
+ targetPhrase->scores = scores;
- if(tpCompact.scores.size() == m_numScoreComponent) {
if(m_containsAlignmentInfo)
state = Alignment;
else
@@ -413,29 +416,19 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
state = Add;
} else {
if(m_phraseDictionary.m_useAlignmentInfo)
- tpCompact.alignment.insert(AlignPointSizeT(alignPoint));
+ alignment.insert(AlignPointSizeT(alignPoint));
}
}
if(state == Add) {
- size_t targetSize = tpCompact.words.size();
- TargetPhraseImpl *targetPhrase = new (mgr.GetPool().Allocate<TargetPhraseImpl>()) TargetPhraseImpl(mgr.GetPool(), m_phraseDictionary, system, targetSize);
-
- for (size_t i = 0; i < tpCompact.words.size(); ++i) {
- (*targetPhrase)[i] = tpCompact.words[i];
- }
-
if(m_phraseDictionary.m_useAlignmentInfo) {
size_t sourceSize = sourcePhrase.GetSize();
- for(std::set<AlignPointSizeT>::iterator it = tpCompact.alignment.begin(); it != tpCompact.alignment.end(); it++) {
+ size_t targetSize = targetPhrase->words.size();
+ for(std::set<AlignPointSizeT>::iterator it = alignment.begin(); it != alignment.end(); it++) {
if(it->first >= sourceSize || it->second >= targetSize)
return TargetPhraseVectorPtr();
}
- targetPhrase->SetAlignTerm(tpCompact.alignment);
- }
-
- if(eval) {
- mgr.system.featureFunctions.EvaluateInIsolation(mgr.GetPool(), mgr.system, sourcePhrase, *targetPhrase);
+ targetPhrase->alignment = alignment;
}
if(m_coding == PREnc) {
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h
index 01a7c23c5..79faa38a6 100644
--- a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h
+++ b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h
@@ -30,19 +30,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <algorithm>
#include <sys/stat.h>
-#include "moses/TypeDef.h"
-#include "moses/FactorCollection.h"
-#include "moses/Word.h"
-#include "moses/Util.h"
-#include "moses/InputFileStream.h"
-#include "moses/StaticData.h"
-#include "moses/Range.h"
-
#include "PhraseTableCompact.h"
#include "StringVector.h"
#include "CanonicalHuffman.h"
#include "TargetPhraseCollectionCache.h"
+#include "../../Phrase.h"
+#include "../../ManagerBase.h"
+
namespace Moses2
{
@@ -116,7 +111,7 @@ protected:
public:
PhraseDecoder(
- PhraseTableCompact &phraseDictionary,
+ PhraseTableCompact &phraseDictionary,
const std::vector<FactorType>* input,
const std::vector<FactorType>* output,
size_t numScoreComponent
diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
index 5f1caad78..07d0469e0 100644
--- a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
+++ b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
@@ -24,9 +24,16 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses2
{
-
boost::thread_specific_ptr<TargetPhraseCollectionCache::CacheMap>
TargetPhraseCollectionCache::m_phraseCache;
+PhraseCompact::PhraseCompact(const Phrase<Word> &copy)
+{
+ for (size_t i = 0; i < copy.GetSize(); ++i) {
+ const Word &word = copy[i];
+ push_back(word);
+ }
+}
+
}
diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
index 601ac1bbe..3a9e6f170 100644
--- a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
+++ b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
@@ -28,13 +28,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <boost/thread/tss.hpp>
#include <boost/shared_ptr.hpp>
-#include "../../PhraseBased/TargetPhraseImpl.h"
+#include "../../Word.h"
#include "../../Phrase.h"
namespace Moses2
{
typedef std::pair<size_t, size_t> AlignPointSizeT;
+struct PhraseCompact : public std::vector<Word>
+{
+public:
+ PhraseCompact(const Phrase<Word> &copy);
+};
+
struct TPCompact
{
std::vector<Word> words;
@@ -65,10 +71,7 @@ private:
: m_clock(clock), m_tpv(tpv), m_bitsLeft(bitsLeft) {}
};
- typedef boost::unordered_map<
- const Phrase<Word>*,
- LastUsed, UnorderedComparer< Phrase<Word> >,
- UnorderedComparer< Phrase<Word> > > CacheMap;
+ typedef std::map<PhraseCompact, LastUsed> CacheMap;
static boost::thread_specific_ptr<CacheMap> m_phraseCache;
public:
@@ -110,7 +113,7 @@ public:
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
// check if source phrase is already in cache
- iterator it = m_phraseCache->find(&sourcePhrase);
+ iterator it = m_phraseCache->find(sourcePhrase);
if(it != m_phraseCache->end())
// if found, just update clock
it->second.m_clock = clock();
@@ -120,16 +123,16 @@ public:
TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector());
tpv_temp->resize(maxRank);
std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin());
- (*m_phraseCache)[&sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
+ (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
} else
- (*m_phraseCache)[&sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
+ (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
}
}
std::pair<TargetPhraseVectorPtr, size_t> Retrieve(const Phrase<Word> &sourcePhrase) {
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
- iterator it = m_phraseCache->find(&sourcePhrase);
+ iterator it = m_phraseCache->find(sourcePhrase);
if(it != m_phraseCache->end()) {
LastUsed &lu = it->second;
lu.m_clock = clock();
@@ -143,7 +146,7 @@ public:
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
if(m_phraseCache->size() > m_max * (1 + m_tolerance)) {
- typedef boost::unordered_set<std::pair<clock_t, const Phrase<Word>*> > Cands;
+ typedef std::set<std::pair<clock_t, PhraseCompact > > Cands;
Cands cands;
for(CacheMap::iterator it = m_phraseCache->begin();
it != m_phraseCache->end(); it++) {
@@ -152,7 +155,7 @@ public:
}
for(Cands::iterator it = cands.begin(); it != cands.end(); it++) {
- const Phrase<Word> *p = it->second;
+ const PhraseCompact& p = it->second;
m_phraseCache->erase(p);
if(m_phraseCache->size() < (m_max * (1 - m_tolerance)))