Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2017-02-03 13:01:02 +0300
committerHieu Hoang <hieuhoang@gmail.com>2017-02-03 13:01:02 +0300
commite0dc9364c86adc2085e61d53f5427c3a1228ed95 (patch)
tree7ec6165fdb62cdedf39f06ce035f43a03cde9219 /moses2/TranslationModel
parentaec2d51ce507913c39fb260d266c8353d81351cd (diff)
remove Compact pt from Moses2 (keep compact lex reordering model)
Diffstat (limited to 'moses2/TranslationModel')
-rw-r--r--moses2/TranslationModel/CompactPT/PhraseDecoder.cpp466
-rw-r--r--moses2/TranslationModel/CompactPT/PhraseDecoder.h142
-rw-r--r--moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp222
-rw-r--r--moses2/TranslationModel/CompactPT/PhraseTableCompact.h68
4 files changed, 0 insertions, 898 deletions
diff --git a/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp b/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp
deleted file mode 100644
index 7860fed94..000000000
--- a/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ /dev/null
@@ -1,466 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include <deque>
-
-#include "PhraseDecoder.h"
-#include "../../System.h"
-#include "../../SubPhrase.h"
-
-using namespace std;
-
-namespace Moses2
-{
-
-PhraseDecoder::PhraseDecoder(
- PhraseTableCompact &phraseDictionary,
- const std::vector<FactorType>* input,
- const std::vector<FactorType>* output,
- size_t numScoreComponent
- // , const std::vector<float>* weight
-)
- : m_coding(None), m_numScoreComponent(numScoreComponent),
- m_containsAlignmentInfo(true), m_maxRank(0),
- m_symbolTree(0), m_multipleScoreTrees(false),
- m_scoreTrees(1), m_alignTree(0),
- m_phraseDictionary(phraseDictionary), m_input(input), m_output(output),
- // m_weight(weight),
- m_separator(" ||| ")
-{ }
-
-PhraseDecoder::~PhraseDecoder()
-{
- if(m_symbolTree)
- delete m_symbolTree;
-
- for(size_t i = 0; i < m_scoreTrees.size(); i++)
- if(m_scoreTrees[i])
- delete m_scoreTrees[i];
-
- if(m_alignTree)
- delete m_alignTree;
-}
-
-inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol)
-{
- boost::unordered_map<std::string, unsigned>::iterator it
- = m_sourceSymbolsMap.find(symbol);
- if(it != m_sourceSymbolsMap.end())
- return it->second;
-
- size_t idx = m_sourceSymbols.find(symbol);
- m_sourceSymbolsMap[symbol] = idx;
- return idx;
-}
-
-inline std::string PhraseDecoder::GetTargetSymbol(unsigned idx) const
-{
- if(idx < m_targetSymbols.size())
- return m_targetSymbols[idx];
- return std::string("##ERROR##");
-}
-
-inline size_t PhraseDecoder::GetREncType(unsigned encodedSymbol)
-{
- return (encodedSymbol >> 30) + 1;
-}
-
-inline size_t PhraseDecoder::GetPREncType(unsigned encodedSymbol)
-{
- return (encodedSymbol >> 31) + 1;
-}
-
-inline unsigned PhraseDecoder::GetTranslation(unsigned srcIdx, size_t rank)
-{
- size_t srcTrgIdx = m_lexicalTableIndex[srcIdx];
- return m_lexicalTable[srcTrgIdx + rank].second;
-}
-
-size_t PhraseDecoder::GetMaxSourcePhraseLength()
-{
- return m_maxPhraseLength;
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol1(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(3 << 30);
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol2Rank(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(255 << 24);
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol2Position(unsigned encodedSymbol)
-{
- encodedSymbol &= ~(3 << 30);
- encodedSymbol >>= 24;
- return encodedSymbol;
-}
-
-inline unsigned PhraseDecoder::DecodeREncSymbol3(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(3 << 30);
-}
-
-inline unsigned PhraseDecoder::DecodePREncSymbol1(unsigned encodedSymbol)
-{
- return encodedSymbol &= ~(1 << 31);
-}
-
-inline int PhraseDecoder::DecodePREncSymbol2Left(unsigned encodedSymbol)
-{
- return ((encodedSymbol >> 25) & 63) - 32;
-}
-
-inline int PhraseDecoder::DecodePREncSymbol2Right(unsigned encodedSymbol)
-{
- return ((encodedSymbol >> 19) & 63) - 32;
-}
-
-inline unsigned PhraseDecoder::DecodePREncSymbol2Rank(unsigned encodedSymbol)
-{
- return (encodedSymbol & 524287);
-}
-
-size_t PhraseDecoder::Load(std::FILE* in)
-{
- size_t start = std::ftell(in);
- size_t read = 0;
-
- read += std::fread(&m_coding, sizeof(m_coding), 1, in);
- read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, in);
- read += std::fread(&m_containsAlignmentInfo, sizeof(m_containsAlignmentInfo), 1, in);
- read += std::fread(&m_maxRank, sizeof(m_maxRank), 1, in);
- read += std::fread(&m_maxPhraseLength, sizeof(m_maxPhraseLength), 1, in);
-
- if(m_coding == REnc) {
- m_sourceSymbols.load(in);
-
- size_t size;
- read += std::fread(&size, sizeof(size_t), 1, in);
- m_lexicalTableIndex.resize(size);
- read += std::fread(&m_lexicalTableIndex[0], sizeof(size_t), size, in);
-
- read += std::fread(&size, sizeof(size_t), 1, in);
- m_lexicalTable.resize(size);
- read += std::fread(&m_lexicalTable[0], sizeof(SrcTrg), size, in);
- }
-
- m_targetSymbols.load(in);
-
- m_symbolTree = new CanonicalHuffman<unsigned>(in);
-
- read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, in);
- if(m_multipleScoreTrees) {
- m_scoreTrees.resize(m_numScoreComponent);
- for(size_t i = 0; i < m_numScoreComponent; i++)
- m_scoreTrees[i] = new CanonicalHuffman<float>(in);
- } else {
- m_scoreTrees.resize(1);
- m_scoreTrees[0] = new CanonicalHuffman<float>(in);
- }
-
- if(m_containsAlignmentInfo)
- m_alignTree = new CanonicalHuffman<AlignPoint>(in);
-
- size_t end = std::ftell(in);
- return end - start;
-}
-
-std::string PhraseDecoder::MakeSourceKey(std::string &source)
-{
- return source + m_separator;
-}
-
-TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(
- const ManagerBase &mgr,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval)
-{
-
- // Not using TargetPhraseCollection avoiding "new" operator
- // which can introduce heavy locking with multiple threads
- TargetPhraseVectorPtr tpv(new TargetPhraseVector());
- size_t bitsLeft = 0;
-
- if(m_coding == PREnc) {
- std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
- = m_decodingCache.Retrieve(sourcePhrase);
-
- // Has been cached and is complete or does not need to be completed
- if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
- return cachedPhraseColl.first;
-
- // Has been cached, but is incomplete
- else if(cachedPhraseColl.first != NULL) {
- bitsLeft = cachedPhraseColl.second;
- tpv->resize(cachedPhraseColl.first->size());
- std::copy(cachedPhraseColl.first->begin(),
- cachedPhraseColl.first->end(),
- tpv->begin());
- }
- }
-
- // Retrieve source phrase identifier
- std::string sourcePhraseString = sourcePhrase.GetString(*m_input);
- size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)];
- /*
- cerr << "sourcePhraseString=" << sourcePhraseString << " "
- << sourcePhraseId
- << endl;
- */
- if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize()) {
- // Retrieve compressed and encoded target phrase collection
- std::string encodedPhraseCollection;
- if(m_phraseDictionary.m_inMemory)
- encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId].str();
- else
- encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId].str();
-
- BitWrapper<> encodedBitStream(encodedPhraseCollection);
- if(m_coding == PREnc && bitsLeft)
- encodedBitStream.SeekFromEnd(bitsLeft);
-
- // Decompress and decode target phrase collection
- TargetPhraseVectorPtr decodedPhraseColl =
- DecodeCollection(mgr, tpv, encodedBitStream, sourcePhrase, topLevel, eval);
-
- return decodedPhraseColl;
- } else
- return TargetPhraseVectorPtr();
-}
-
-TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
- const ManagerBase &mgr,
- TargetPhraseVectorPtr tpv,
- BitWrapper<> &encodedBitStream,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval)
-{
- const System &system = mgr.system;
- FactorCollection &vocab = system.GetVocab();
-
- bool extending = tpv->size();
- size_t bitsLeft = encodedBitStream.TellFromEnd();
-
- std::vector<int> sourceWords;
- if(m_coding == REnc) {
- for(size_t i = 0; i < sourcePhrase.GetSize(); i++) {
- std::string sourceWord
- = sourcePhrase[i].GetString(*m_input);
- unsigned idx = GetSourceSymbolId(sourceWord);
- sourceWords.push_back(idx);
- }
- }
-
- unsigned phraseStopSymbol = 0;
- AlignPoint alignStopSymbol(-1, -1);
-
- std::vector<float> scores;
- std::set<AlignPointSizeT> alignment;
-
- enum DecodeState { New, Symbol, Score, Alignment, Add } state = New;
-
- size_t srcSize = sourcePhrase.GetSize();
-
- TPCompact* targetPhrase = NULL;
- while(encodedBitStream.TellFromEnd()) {
-
- if(state == New) {
- // Creating new TargetPhrase on the heap
- tpv->push_back(TPCompact());
- targetPhrase = &tpv->back();
-
- alignment.clear();
- scores.clear();
-
- state = Symbol;
- }
-
- if(state == Symbol) {
- unsigned symbol = m_symbolTree->Read(encodedBitStream);
- if(symbol == phraseStopSymbol) {
- state = Score;
- } else {
- if(m_coding == REnc) {
- std::string wordString;
- size_t type = GetREncType(symbol);
-
- if(type == 1) {
- unsigned decodedSymbol = DecodeREncSymbol1(symbol);
- wordString = GetTargetSymbol(decodedSymbol);
- } else if (type == 2) {
- size_t rank = DecodeREncSymbol2Rank(symbol);
- size_t srcPos = DecodeREncSymbol2Position(symbol);
-
- if(srcPos >= sourceWords.size())
- return TargetPhraseVectorPtr();
-
- wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
- if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t trgPos = targetPhrase->words.size();
- alignment.insert(AlignPoint(srcPos, trgPos));
- }
- } else if(type == 3) {
- size_t rank = DecodeREncSymbol3(symbol);
- size_t srcPos = targetPhrase->words.size();
-
- if(srcPos >= sourceWords.size())
- return TargetPhraseVectorPtr();
-
- wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
- if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t trgPos = srcPos;
- alignment.insert(AlignPoint(srcPos, trgPos));
- }
- }
-
- Word word;
- word.CreateFromString(vocab, system, wordString);
- targetPhrase->words.push_back(word);
- } else if(m_coding == PREnc) {
- // if the symbol is just a word
- if(GetPREncType(symbol) == 1) {
- unsigned decodedSymbol = DecodePREncSymbol1(symbol);
-
- Word word;
- word.CreateFromString(vocab, system, GetTargetSymbol(decodedSymbol));
- targetPhrase->words.push_back(word);
- }
- // if the symbol is a subphrase pointer
- else {
- int left = DecodePREncSymbol2Left(symbol);
- int right = DecodePREncSymbol2Right(symbol);
- unsigned rank = DecodePREncSymbol2Rank(symbol);
-
- int srcStart = left + targetPhrase->words.size();
- int srcEnd = srcSize - right - 1;
-
- // false positive consistency check
- if(0 > srcStart || srcStart > srcEnd || unsigned(srcEnd) >= srcSize)
- return TargetPhraseVectorPtr();
-
- // false positive consistency check
- if(m_maxRank && rank > m_maxRank)
- return TargetPhraseVectorPtr();
-
- // set subphrase by default to itself
- TargetPhraseVectorPtr subTpv = tpv;
-
- // if range smaller than source phrase retrieve subphrase
- if(unsigned(srcEnd - srcStart + 1) != srcSize) {
- SubPhrase<Word> subPhrase = sourcePhrase.GetSubPhrase(srcStart, srcEnd - srcStart + 1);
- subTpv = CreateTargetPhraseCollection(mgr, subPhrase, false);
- } else {
- // false positive consistency check
- if(rank >= tpv->size()-1)
- return TargetPhraseVectorPtr();
- }
-
- // false positive consistency check
- if(subTpv != NULL && rank < subTpv->size()) {
- // insert the subphrase into the main target phrase
- TPCompact& subTp = subTpv->at(rank);
- if(m_phraseDictionary.m_useAlignmentInfo) {
- // reconstruct the alignment data based on the alignment of the subphrase
- for(std::set<AlignPointSizeT>::const_iterator it = subTp.alignment.begin();
- it != subTp.alignment.end(); it++) {
- alignment.insert(AlignPointSizeT(srcStart + it->first,
- targetPhrase->words.size() + it->second));
- }
- }
-
- std::copy(subTp.words.begin(), subTp.words.end(), std::back_inserter(targetPhrase->words));
- } else
- return TargetPhraseVectorPtr();
- }
- } else {
- Word word;
- word.CreateFromString(vocab, system, GetTargetSymbol(symbol));
- targetPhrase->words.push_back(word);
- }
- }
- } else if(state == Score) {
- size_t idx = m_multipleScoreTrees ? scores.size() : 0;
- float score = m_scoreTrees[idx]->Read(encodedBitStream);
- scores.push_back(score);
-
- if(scores.size() == m_numScoreComponent) {
- targetPhrase->scores = scores;
-
- if(m_containsAlignmentInfo)
- state = Alignment;
- else
- state = Add;
- }
- } else if(state == Alignment) {
- AlignPoint alignPoint = m_alignTree->Read(encodedBitStream);
- if(alignPoint == alignStopSymbol) {
- state = Add;
- } else {
- if(m_phraseDictionary.m_useAlignmentInfo)
- alignment.insert(AlignPointSizeT(alignPoint));
- }
- }
-
- if(state == Add) {
- if(m_phraseDictionary.m_useAlignmentInfo) {
- size_t sourceSize = sourcePhrase.GetSize();
- size_t targetSize = targetPhrase->words.size();
- for(std::set<AlignPointSizeT>::iterator it = alignment.begin(); it != alignment.end(); it++) {
- if(it->first >= sourceSize || it->second >= targetSize)
- return TargetPhraseVectorPtr();
- }
- targetPhrase->alignment = alignment;
- }
-
- if(m_coding == PREnc) {
- if(!m_maxRank || tpv->size() <= m_maxRank)
- bitsLeft = encodedBitStream.TellFromEnd();
-
- if(!topLevel && m_maxRank && tpv->size() >= m_maxRank)
- break;
- }
-
- if(encodedBitStream.TellFromEnd() <= 8)
- break;
-
- state = New;
- }
- }
-
- if(m_coding == PREnc && !extending) {
- bitsLeft = bitsLeft > 8 ? bitsLeft : 0;
- m_decodingCache.Cache(sourcePhrase, tpv, bitsLeft, m_maxRank);
- }
-
- return tpv;
-}
-
-void PhraseDecoder::PruneCache()
-{
- m_decodingCache.Prune();
-}
-
-}
diff --git a/moses2/TranslationModel/CompactPT/PhraseDecoder.h b/moses2/TranslationModel/CompactPT/PhraseDecoder.h
deleted file mode 100644
index 79faa38a6..000000000
--- a/moses2/TranslationModel/CompactPT/PhraseDecoder.h
+++ /dev/null
@@ -1,142 +0,0 @@
-// $Id$
-// vim:tabstop=2
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#pragma once
-
-#include <sstream>
-#include <vector>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <string>
-#include <iterator>
-#include <algorithm>
-#include <sys/stat.h>
-
-#include "PhraseTableCompact.h"
-#include "StringVector.h"
-#include "CanonicalHuffman.h"
-#include "TargetPhraseCollectionCache.h"
-
-#include "../../Phrase.h"
-#include "../../ManagerBase.h"
-
-namespace Moses2
-{
-
-class PhraseTableCompact;
-
-class PhraseDecoder
-{
-protected:
-
- friend class PhraseTableCompact;
-
- typedef std::pair<unsigned char, unsigned char> AlignPoint;
- typedef std::pair<unsigned, unsigned> SrcTrg;
-
- enum Coding { None, REnc, PREnc } m_coding;
-
- size_t m_numScoreComponent;
- bool m_containsAlignmentInfo;
- size_t m_maxRank;
- size_t m_maxPhraseLength;
-
- boost::unordered_map<std::string, unsigned> m_sourceSymbolsMap;
- StringVector<unsigned char, unsigned, std::allocator> m_sourceSymbols;
- StringVector<unsigned char, unsigned, std::allocator> m_targetSymbols;
-
- std::vector<size_t> m_lexicalTableIndex;
- std::vector<SrcTrg> m_lexicalTable;
-
- CanonicalHuffman<unsigned>* m_symbolTree;
-
- bool m_multipleScoreTrees;
- std::vector<CanonicalHuffman<float>*> m_scoreTrees;
-
- CanonicalHuffman<AlignPoint>* m_alignTree;
-
- TargetPhraseCollectionCache m_decodingCache;
-
- PhraseTableCompact& m_phraseDictionary;
-
- // ***********************************************
-
- const std::vector<FactorType>* m_input;
- const std::vector<FactorType>* m_output;
-
- std::string m_separator;
-
- // ***********************************************
-
- unsigned GetSourceSymbolId(std::string& s);
- std::string GetTargetSymbol(unsigned id) const;
-
- size_t GetREncType(unsigned encodedSymbol);
- size_t GetPREncType(unsigned encodedSymbol);
-
- unsigned GetTranslation(unsigned srcIdx, size_t rank);
-
- size_t GetMaxSourcePhraseLength();
-
- unsigned DecodeREncSymbol1(unsigned encodedSymbol);
- unsigned DecodeREncSymbol2Rank(unsigned encodedSymbol);
- unsigned DecodeREncSymbol2Position(unsigned encodedSymbol);
- unsigned DecodeREncSymbol3(unsigned encodedSymbol);
-
- unsigned DecodePREncSymbol1(unsigned encodedSymbol);
- int DecodePREncSymbol2Left(unsigned encodedSymbol);
- int DecodePREncSymbol2Right(unsigned encodedSymbol);
- unsigned DecodePREncSymbol2Rank(unsigned encodedSymbol);
-
- std::string MakeSourceKey(std::string &);
-
-public:
-
- PhraseDecoder(
- PhraseTableCompact &phraseDictionary,
- const std::vector<FactorType>* input,
- const std::vector<FactorType>* output,
- size_t numScoreComponent
- );
-
- ~PhraseDecoder();
-
- size_t Load(std::FILE* in);
-
- TargetPhraseVectorPtr CreateTargetPhraseCollection(
- const ManagerBase &mgr,
- const Phrase<Word> &sourcePhrase,
- bool topLevel = false,
- bool eval = true);
-
- TargetPhraseVectorPtr DecodeCollection(
- const ManagerBase &mgr,
- TargetPhraseVectorPtr tpv,
- BitWrapper<> &encodedBitStream,
- const Phrase<Word> &sourcePhrase,
- bool topLevel,
- bool eval);
-
- void PruneCache();
-};
-
-}
-
diff --git a/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp b/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp
deleted file mode 100644
index 49244df1b..000000000
--- a/moses2/TranslationModel/CompactPT/PhraseTableCompact.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-#include <boost/algorithm/string/predicate.hpp>
-#include <boost/thread/tss.hpp>
-#include "PhraseTableCompact.h"
-#include "PhraseDecoder.h"
-#include "../../PhraseBased/InputPath.h"
-#include "../../PhraseBased/Manager.h"
-#include "../../PhraseBased/TargetPhrases.h"
-#include "../../PhraseBased/TargetPhraseImpl.h"
-#include "../../PhraseBased/Sentence.h"
-
-using namespace std;
-using namespace boost::algorithm;
-
-namespace Moses2
-{
-bool PhraseTableCompact::s_inMemoryByDefault = false;
-
-PhraseTableCompact::PhraseTableCompact(size_t startInd, const std::string &line)
-:PhraseTable(startInd, line)
-,m_inMemory(s_inMemoryByDefault)
-,m_useAlignmentInfo(true)
-,m_hash(10, 16)
-,m_phraseDecoder(0)
-{
- ReadParameters();
-}
-
-PhraseTableCompact::~PhraseTableCompact()
-{
-
-}
-
-void PhraseTableCompact::Load(System &system)
-{
- std::string tFilePath = m_path;
-
- std::string suffix = ".minphr";
- if (!ends_with(tFilePath, suffix)) tFilePath += suffix;
- if (!FileExists(tFilePath))
- throw runtime_error("Error: File " + tFilePath + " does not exist.");
-
- m_phraseDecoder
- = new PhraseDecoder(*this, &m_input, &m_output, GetNumScores());
-
- std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
-
- size_t indexSize;
- //if(m_inMemory)
- // Load source phrase index into memory
- indexSize = m_hash.Load(pFile);
- // else
- // Keep source phrase index on disk
- //indexSize = m_hash.LoadIndex(pFile);
-
- size_t coderSize = m_phraseDecoder->Load(pFile);
-
- size_t phraseSize;
- if(m_inMemory) {
- // Load target phrase collections into memory
- phraseSize = m_targetPhrasesMemory.load(pFile, false);
- }
- else {
- // Keep target phrase collections on disk
- phraseSize = m_targetPhrasesMapped.load(pFile, true);
- }
-
- UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0,
- "Not successfully loaded");
-}
-
-void PhraseTableCompact::SetParameter(const std::string& key, const std::string& value)
-{
- if (key == "blah") {
-
- }
- else {
- PhraseTable::SetParameter(key, value);
- }
-}
-
-void PhraseTableCompact::CleanUpAfterSentenceProcessing() const
-{
- //if(!m_sentenceCache.get())
- // m_sentenceCache.reset(new PhraseCache());
-
- m_phraseDecoder->PruneCache();
- //m_sentenceCache->clear();
-}
-
-
-// pb
-void PhraseTableCompact::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const
-{
- size_t inputSize = static_cast<const Sentence&>(mgr.GetInput()).GetSize();
- InputPaths &inputPathsCast = static_cast<InputPaths&>(inputPaths);
-
- for (size_t i = 0; i < inputSize; ++i) {
- for (size_t startPos = 0; startPos < inputSize; ++startPos) {
- size_t endPos = startPos + i;
- if (endPos >= inputSize) {
- break;
- }
- InputPath *path = inputPathsCast.GetMatrix().GetValue(startPos, i);
- //cerr << "path=" << path->Debug(mgr.system) << endl;
- TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
- path->AddTargetPhrases(*this, tps);
- }
- }
-}
-
-TargetPhrases *PhraseTableCompact::Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const
-{
- TargetPhrases *ret = NULL;
-
- const Phrase<Word> &sourcePhrase = inputPath.subPhrase;
- //cerr << "sourcePhrase=" << sourcePhrase.Debug(mgr.system) << endl;
-
- // There is no souch source phrase if source phrase is longer than longest
- // observed source phrase during compilation
- if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
- return ret;
-
- // Retrieve target phrase collection from phrase table
- TargetPhraseVectorPtr decodedPhraseColl
- = m_phraseDecoder->CreateTargetPhraseCollection(mgr, sourcePhrase, true, true);
-
- if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
- TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
- //TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
- ret = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, decodedPhraseColl->size());
-
- for (size_t i = 0; i < decodedPhraseColl->size(); ++i) {
- const TPCompact &tpCompact = decodedPhraseColl->at(i);
- const TargetPhraseImpl *tp = CreateTargetPhrase(mgr, tpCompact, sourcePhrase);
-
- ret->AddTargetPhrase(*tp);
- }
-
- ret->SortAndPrune(m_tableLimit);
- mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *ret, sourcePhrase);
-
- //cerr << "RET2=" << ret->Debug(mgr.system) << endl;
- /*
- // Cache phrase pair for clean-up or retrieval with PREnc
- const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
-
- return phraseColl;
- */
- }
-
- return ret;
-
-}
-
-const TargetPhraseImpl *PhraseTableCompact::CreateTargetPhrase(
- const Manager &mgr,
- const TPCompact &tpCompact,
- const Phrase<Word> &sourcePhrase) const
-{
- MemPool &pool = mgr.GetPool();
-
- size_t size = tpCompact.words.size();
- TargetPhraseImpl *ret = new TargetPhraseImpl(pool, *this, mgr.system, size);
-
- // words
- for (size_t i = 0; i < size; ++i) {
- const Word &compactWord = tpCompact.words[i];
- Word &tpWord = (*ret)[i];
- tpWord = compactWord;
- }
-
- // scores
- Scores &scores = ret->GetScores();
- scores.Assign(mgr.system, *this, tpCompact.scores);
-
- // align
- ret->SetAlignTerm(tpCompact.alignment);
-
- // score
- mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *ret);
-
- // Cache phrase pair for clean-up or retrieval with PREnc
- //const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
-
- //cerr << "ret=" << ret->Debug(mgr.system) << endl;
- return ret;
-}
-
-
-// scfg
-void PhraseTableCompact::InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void PhraseTableCompact::Lookup(
- MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-void PhraseTableCompact::LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const
-{
- UTIL_THROW2("Not implemented");
-}
-
-}
diff --git a/moses2/TranslationModel/CompactPT/PhraseTableCompact.h b/moses2/TranslationModel/CompactPT/PhraseTableCompact.h
deleted file mode 100644
index 84ea7e4b2..000000000
--- a/moses2/TranslationModel/CompactPT/PhraseTableCompact.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#pragma once
-#include "../PhraseTable.h"
-#include "BlockHashIndex.h"
-
-namespace Moses2
-{
-class PhraseDecoder;
-class TPCompact;
-
-class PhraseTableCompact: public PhraseTable
-{
-public:
- PhraseTableCompact(size_t startInd, const std::string &line);
- virtual ~PhraseTableCompact();
- void Load(System &system);
- virtual void SetParameter(const std::string& key, const std::string& value);
-
- virtual void CleanUpAfterSentenceProcessing() const;
-
- virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
- InputPath &inputPath) const;
-
- // scfg
- virtual void InitActiveChart(
- MemPool &pool,
- const SCFG::Manager &mgr,
- SCFG::InputPath &path) const;
-
- virtual void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
-
- virtual void Lookup(
- MemPool &pool,
- const SCFG::Manager &mgr,
- size_t maxChartSpan,
- const SCFG::Stacks &stacks,
- SCFG::InputPath &path) const;
-
-protected:
- static bool s_inMemoryByDefault;
- bool m_inMemory;
- bool m_useAlignmentInfo;
-
- BlockHashIndex m_hash;
-
- StringVector<unsigned char, size_t, MmapAllocator> m_targetPhrasesMapped;
- StringVector<unsigned char, size_t, std::allocator> m_targetPhrasesMemory;
-
- friend class PhraseDecoder;
- PhraseDecoder* m_phraseDecoder;
-
- const TargetPhraseImpl *CreateTargetPhrase(
- const Manager &mgr,
- const TPCompact &tpCompact,
- const Phrase<Word> &sourcePhrase) const;
-
- // SCFG
- virtual void LookupGivenNode(
- MemPool &pool,
- const SCFG::Manager &mgr,
- const SCFG::ActiveChartEntry &prevEntry,
- const SCFG::Word &wordSought,
- const Moses2::Hypotheses *hypos,
- const Moses2::Range &subPhraseRange,
- SCFG::InputPath &outPath) const;
-
-};
-
-}