Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/src/LM/Rand.cpp')
-rw-r--r--moses/src/LM/Rand.cpp164
1 files changed, 164 insertions, 0 deletions
diff --git a/moses/src/LM/Rand.cpp b/moses/src/LM/Rand.cpp
new file mode 100644
index 000000000..93411b8d3
--- /dev/null
+++ b/moses/src/LM/Rand.cpp
@@ -0,0 +1,164 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "util/check.hh"
+#include <limits>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+#include "Factor.h"
+#include "Util.h"
+#include "LM/SingleFactor.h"
+#include "RandLM.h"
+
+
+#include "LM/Rand.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+
+
+namespace Moses
+{
+namespace
+{
+using namespace std;
+
+class LanguageModelRandLM : public LanguageModelPointerState
+{
+public:
+ LanguageModelRandLM()
+ : m_lm(0) {}
+ bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
+ virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
+ ~LanguageModelRandLM() {
+ delete m_lm;
+ }
+ void CleanUpAfterSentenceProcessing() {
+ m_lm->clearCaches(); // clear caches
+ }
+ void InitializeBeforeSentenceProcessing() {
+ m_lm->initThreadSpecificData(); // Creates thread specific data iff
+ // compiled with multithreading.
+ }
+protected:
+ std::vector<randlm::WordID> m_randlm_ids_vec;
+ randlm::RandLM* m_lm;
+ randlm::WordID m_oov_id;
+ void CreateFactors(FactorCollection &factorCollection);
+ randlm::WordID GetLmID( const std::string &str ) const;
+ randlm::WordID GetLmID( const Factor *factor ) const {
+ size_t factorId = factor->GetId();
+ return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
+ };
+
+};
+
+
+bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType,
+ size_t nGramOrder)
+{
+ cerr << "Loading LanguageModelRandLM..." << endl;
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ m_filePath = filePath;
+ m_factorType = factorType;
+ m_nGramOrder = nGramOrder;
+ int cache_MB = 50; // increase cache size
+ m_lm = randlm::RandLM::initRandLM(filePath, nGramOrder, cache_MB);
+ CHECK(m_lm != NULL);
+ // get special word ids
+ m_oov_id = m_lm->getWordID(m_lm->getOOV());
+ CreateFactors(factorCollection);
+ m_lm->initThreadSpecificData();
+ return true;
+}
+
+void LanguageModelRandLM::CreateFactors(FactorCollection &factorCollection) // add factors which have randlm id
+{
+ // code copied & paste from SRI LM class. should do template function
+ // first get all bf vocab in map
+ std::map<size_t, randlm::WordID> randlm_ids_map; // map from factor id -> randlm id
+ size_t maxFactorId = 0; // to create lookup vector later on
+ for(std::map<randlm::Word, randlm::WordID>::const_iterator vIter = m_lm->vocabStart();
+ vIter != m_lm->vocabEnd(); vIter++) {
+ // get word from randlm vocab and associate with (new) factor id
+ size_t factorId=factorCollection.AddFactor(Output,m_factorType,vIter->first)->GetId();
+ randlm_ids_map[factorId] = vIter->second;
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ }
+ // add factors for BOS and EOS and store bf word ids
+ size_t factorId;
+ m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, m_lm->getBOS());
+ factorId = m_sentenceStart->GetId();
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceStartArray[m_factorType] = m_sentenceStart;
+
+ m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, m_lm->getEOS());
+ factorId = m_sentenceEnd->GetId();
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+
+ // add to lookup vector in object
+ m_randlm_ids_vec.resize(maxFactorId+1);
+ // fill with OOV code
+ fill(m_randlm_ids_vec.begin(), m_randlm_ids_vec.end(), m_oov_id);
+
+ for (map<size_t, randlm::WordID>::const_iterator iter = randlm_ids_map.begin();
+ iter != randlm_ids_map.end() ; ++iter)
+ m_randlm_ids_vec[iter->first] = iter->second;
+
+}
+
+randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const
+{
+ return m_lm->getWordID(str);
+}
+
+LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
+ State* finalState) const
+{
+ FactorType factorType = GetFactorType();
+ // set up context
+ randlm::WordID ngram[MAX_NGRAM_SIZE];
+ int count = contextFactor.size();
+ for (int i = 0 ; i < count ; i++) {
+ ngram[i] = GetLmID((*contextFactor[i])[factorType]);
+ //std::cerr << m_lm->getWord(ngram[i]) << " ";
+ }
+ int found = 0;
+ LMResult ret;
+ ret.score = FloorScore(TransformLMScore(m_lm->getProb(&ngram[0], count, &found, finalState)));
+ ret.unknown = count && (ngram[count - 1] == m_oov_id);
+ //if (finalState)
+ // std::cerr << " = " << logprob << "(" << *finalState << ", " <<")"<< std::endl;
+ //else
+ // std::cerr << " = " << logprob << std::endl;
+ return ret;
+}
+
+}
+
+LanguageModelPointerState *NewRandLM() {
+ return new LanguageModelRandLM();
+}
+
+}
+