Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'misc/addLexROtoPT.cpp')
-rw-r--r--misc/addLexROtoPT.cpp168
1 files changed, 168 insertions, 0 deletions
diff --git a/misc/addLexROtoPT.cpp b/misc/addLexROtoPT.cpp
new file mode 100644
index 000000000..5bf3ad835
--- /dev/null
+++ b/misc/addLexROtoPT.cpp
@@ -0,0 +1,168 @@
+#include <iostream>
+#include <string>
+
+#include "moses/Phrase.h"
+#include "moses/FactorCollection.h"
+#include "moses/Timer.h"
+#include "moses/InputFileStream.h"
+#include "moses/TranslationModel/CompactPT/BlockHashIndex.h"
+#include "moses/TranslationModel/CompactPT/CanonicalHuffman.h"
+#include "moses/TranslationModel/CompactPT/StringVector.h"
+
+using namespace Moses;
+using namespace std;
+
+Timer timer;
+
+FactorList m_factorsF, m_factorsE, m_factorsC;
+
+BlockHashIndex m_hash(10, 16);
+size_t m_numScoreComponent;
+bool m_multipleScoreTrees;
+bool m_inMemory = false;
+
+typedef CanonicalHuffman<float> ScoreTree;
+std::vector<ScoreTree*> m_scoreTrees;
+
+StringVector<unsigned char, unsigned long, MmapAllocator> m_scoresMapped;
+StringVector<unsigned char, unsigned long, std::allocator> m_scoresMemory;
+
+////////////////////////////////////////////////////////////////////////////////////
+void Load(const string &filePath)
+{
+ std::FILE* pFile = std::fopen(filePath.c_str(), "r");
+ UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened");
+
+ //if(m_inMemory)
+ m_hash.Load(pFile);
+ //else
+ //m_hash.LoadIndex(pFile);
+
+ size_t read = 0;
+ read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile);
+ read += std::fread(&m_multipleScoreTrees,
+ sizeof(m_multipleScoreTrees), 1, pFile);
+
+ if(m_multipleScoreTrees) {
+ m_scoreTrees.resize(m_numScoreComponent);
+ for(size_t i = 0; i < m_numScoreComponent; i++)
+ m_scoreTrees[i] = new CanonicalHuffman<float>(pFile);
+ } else {
+ m_scoreTrees.resize(1);
+ m_scoreTrees[0] = new CanonicalHuffman<float>(pFile);
+ }
+
+ if(m_inMemory)
+ m_scoresMemory.load(pFile, false);
+ else
+ m_scoresMapped.load(pFile, true);
+
+}
+
+////////////////////////////////////////////////////////////////////////////////////
+
+std::string
+MakeKey(const std::string& f,
+ const std::string& e,
+ const std::string& c)
+{
+ std::string key;
+ if(!f.empty()) key += f;
+ if(!m_factorsE.empty()) {
+ if(!key.empty()) key += " ||| ";
+ key += e;
+ }
+ if(!m_factorsC.empty()) {
+ if(!key.empty()) key += " ||| ";
+ key += c;
+ }
+ key += " ||| ";
+ return key;
+}
+
+////////////////////////////////////////////////////////////////////////////////////
+
+std::vector<float>
+GetScore(const std::string& f, const std::string& e, const std::string& c)
+{
+ std::string key;
+ std::vector<float> probs;
+
+ key = MakeKey(f, e, c);
+
+ size_t index = m_hash[key];
+ if(m_hash.GetSize() != index) {
+ std::string scoresString;
+ if(m_inMemory)
+ scoresString = m_scoresMemory[index].str();
+ else
+ scoresString = m_scoresMapped[index].str();
+
+
+ BitWrapper<> bitStream(scoresString);
+ for(size_t i = 0; i < m_numScoreComponent; i++) {
+ float prob = m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream);
+ prob = exp(prob);
+ probs.push_back(prob);
+ }
+
+ return probs;
+ } else {
+ // return empty vector;
+ }
+
+ return probs;
+}
+
+////////////////////////////////////////////////////////////////////////////////////
+
+int main(int argc, char** argv)
+{
+ string ptPath(argv[1]);
+ string roPath(argv[2]);
+
+ // lex reordering model
+ m_factorsF.push_back(0);
+ m_factorsE.push_back(0);
+
+ Load(roPath);
+
+ // phrase table
+ InputFileStream ptStrm(ptPath);
+
+ string line;
+ while (getline(ptStrm, line)) {
+ //cerr << line << endl;
+ std::vector<std::string> columns(7);
+ std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
+ assert(toks.size() >= 2);
+
+ for (size_t i = 0; i < toks.size(); ++i) {
+ columns[i] = Trim(toks[i]);
+ }
+
+ std::vector<float> scores = GetScore(columns[0], columns[1], "");
+ // key-value pairs
+ if (scores.size()) {
+ if (!columns[6].empty()) {
+ columns[6] += " ";
+ }
+ columns[6] += "{{LexRO ";
+ for (size_t i = 0; i < scores.size() - 1; ++i) {
+ columns[6] += Moses::SPrint(scores[i]);
+ columns[6] += " ";
+ }
+ columns[6] += Moses::SPrint(scores[scores.size() - 1]);
+ columns[6] += "}}";
+ }
+
+ // output
+ for (size_t i = 0; i < columns.size() - 1; ++i) {
+ cout << columns[i] << " ||| ";
+ }
+ cout << columns[columns.size() - 1] << endl;
+ }
+
+}
+
+