From a2fd8d5b2c43f0008a050aa850ed387b2289c9c9 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 6 Oct 2016 13:57:33 +0100 Subject: quering -> querying --- contrib/moses2/Jamfile | 2 +- .../TranslationModel/ProbingPT/ProbingPT.cpp | 2 +- .../moses2/TranslationModel/ProbingPT/quering.cpp | 143 --------------------- .../moses2/TranslationModel/ProbingPT/quering.hh | 65 ---------- .../moses2/TranslationModel/ProbingPT/querying.cpp | 143 +++++++++++++++++++++ .../moses2/TranslationModel/ProbingPT/querying.hh | 65 ++++++++++ contrib/other-builds/moses/.project | 8 +- moses/TranslationModel/ProbingPT/ProbingPT.cpp | 2 +- moses/TranslationModel/ProbingPT/quering.cpp | 142 -------------------- moses/TranslationModel/ProbingPT/quering.hh | 65 ---------- moses/TranslationModel/ProbingPT/querying.cpp | 142 ++++++++++++++++++++ moses/TranslationModel/ProbingPT/querying.hh | 65 ++++++++++ 12 files changed, 422 insertions(+), 422 deletions(-) delete mode 100644 contrib/moses2/TranslationModel/ProbingPT/quering.cpp delete mode 100644 contrib/moses2/TranslationModel/ProbingPT/quering.hh create mode 100644 contrib/moses2/TranslationModel/ProbingPT/querying.cpp create mode 100644 contrib/moses2/TranslationModel/ProbingPT/querying.hh delete mode 100644 moses/TranslationModel/ProbingPT/quering.cpp delete mode 100644 moses/TranslationModel/ProbingPT/quering.hh create mode 100644 moses/TranslationModel/ProbingPT/querying.cpp create mode 100644 moses/TranslationModel/ProbingPT/querying.hh diff --git a/contrib/moses2/Jamfile b/contrib/moses2/Jamfile index 8791e3cf9..850dbcd1f 100644 --- a/contrib/moses2/Jamfile +++ b/contrib/moses2/Jamfile @@ -72,7 +72,7 @@ alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../mose TranslationModel/ProbingPT/hash.cpp TranslationModel/ProbingPT/line_splitter.cpp TranslationModel/ProbingPT/probing_hash_utils.cpp - TranslationModel/ProbingPT/quering.cpp + TranslationModel/ProbingPT/querying.cpp TranslationModel/ProbingPT/storing.cpp TranslationModel/ProbingPT/StoreVocab.cpp TranslationModel/ProbingPT/StoreTarget.cpp diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index 829906b55..2c9a5f31a 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -6,7 +6,7 @@ */ #include #include "ProbingPT.h" -#include "quering.hh" +#include "querying.hh" #include "probing_hash_utils.hh" #include "util/exception.hh" #include "../../System.h" diff --git a/contrib/moses2/TranslationModel/ProbingPT/quering.cpp b/contrib/moses2/TranslationModel/ProbingPT/quering.cpp deleted file mode 100644 index 36e384e73..000000000 --- a/contrib/moses2/TranslationModel/ProbingPT/quering.cpp +++ /dev/null @@ -1,143 +0,0 @@ -#include "quering.hh" -#include "util/exception.hh" -#include "../../legacy/Util2.h" - -using namespace std; - -namespace Moses2 -{ - -QueryEngine::QueryEngine(const char * filepath) -{ - - //Create filepaths - std::string basepath(filepath); - std::string path_to_config = basepath + "/config"; - std::string path_to_hashtable = basepath + "/probing_hash.dat"; - std::string path_to_source_vocabid = basepath + "/source_vocabids"; - std::string alignPath = basepath + "/Alignments.dat"; - - if (!FileExists(path_to_config)) { - UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config); - } - - ///Source phrase vocabids - read_map(source_vocabids, path_to_source_vocabid.c_str()); - - // alignments - read_alignments(alignPath); - - //Read config file - boost::unordered_map keyValue; - - std::ifstream config(path_to_config.c_str()); - std::string line; - while (getline(config, line)) { - std::vector toks = Moses2::Tokenize(line, "\t"); - UTIL_THROW_IF2(toks.size() != 2, "Wrong config format:" << line); - keyValue[ toks[0] ] = toks[1]; - } - - bool found; - //Check API version: - int version; - found = Get(keyValue, "API_VERSION", version); - if (!found) { - std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl; - } - else if (version != API_VERSION) { - std::cerr << "The ProbingPT API has changed. " << version << "!=" - << API_VERSION << " Please rebinarize your phrase tables." << std::endl; - exit(EXIT_FAILURE); - } - - //Get tablesize. - int tablesize; - found = Get(keyValue, "uniq_entries", tablesize); - if (!found) { - std::cerr << "uniq_entries not found" << std::endl; - exit(EXIT_FAILURE); - } - - //Number of scores - found = Get(keyValue, "num_scores", num_scores); - if (!found) { - std::cerr << "num_scores not found" << std::endl; - exit(EXIT_FAILURE); - } - - //How may scores from lex reordering models - found = Get(keyValue, "num_lex_scores", num_lex_scores); - if (!found) { - std::cerr << "num_lex_scores not found" << std::endl; - exit(EXIT_FAILURE); - } - - // have the scores been log() and FloorScore()? - found = Get(keyValue, "log_prob", logProb); - if (!found) { - std::cerr << "logProb not found" << std::endl; - exit(EXIT_FAILURE); - } - - config.close(); - - //Read hashtable - table_filesize = Table::Size(tablesize, 1.2); - mem = readTable(path_to_hashtable.c_str(), table_filesize); - Table table_init(mem, table_filesize); - table = table_init; - - std::cerr << "Initialized successfully! " << std::endl; -} - -QueryEngine::~QueryEngine() -{ - //Clear mmap content from memory. - munmap(mem, table_filesize); - -} - -uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const -{ - //TOO SLOW - //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size()); - return Moses2::getKey(source_phrase, size); -} - -std::pair QueryEngine::query(uint64_t key) -{ - std::pair ret; - - const Entry * entry; - ret.first = table.Find(key, entry); - if (ret.first) { - ret.second = entry->value; - } - return ret; -} - -void QueryEngine::read_alignments(const std::string &alignPath) -{ - std::ifstream strm(alignPath.c_str()); - - string line; - while (getline(strm, line)) { - vector toks = Moses2::Tokenize(line, "\t "); - UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file"); - - uint32_t alignInd = Scan(toks[0]); - if (alignInd >= alignColl.size()) { - alignColl.resize(alignInd + 1); - } - - Alignments &aligns = alignColl[alignInd]; - for (size_t i = 1; i < toks.size(); ++i) { - size_t pos = Scan(toks[i]); - aligns.push_back(pos); - } - } -} - -} - diff --git a/contrib/moses2/TranslationModel/ProbingPT/quering.hh b/contrib/moses2/TranslationModel/ProbingPT/quering.hh deleted file mode 100644 index aae4b4f09..000000000 --- a/contrib/moses2/TranslationModel/ProbingPT/quering.hh +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include -#include //For finding size of file -#include "vocabid.hh" -#include //toLower -#include -#include "probing_hash_utils.hh" -#include "hash.hh" //Includes line splitter -#include "line_splitter.hh" -#include "../../legacy/Util2.h" - -namespace Moses2 -{ - -class QueryEngine -{ - std::map source_vocabids; - - typedef std::vector Alignments; - std::vector alignColl; - - Table table; - char *mem; //Memory for the table, necessary so that we can correctly destroy the object - - size_t table_filesize; - bool is_reordering; - - void read_alignments(const std::string &alignPath); - -public: - int num_scores; - int num_lex_scores; - bool logProb; - - QueryEngine(const char *); - ~QueryEngine(); - - std::pair query(uint64_t key); - - const std::map &getSourceVocab() const - { return source_vocabids; } - - const std::vector &getAlignments() const - { return alignColl; } - - uint64_t getKey(uint64_t source_phrase[], size_t size) const; - - template - inline bool Get(const boost::unordered_map &keyValue, const std::string &sought, T &found) const - { - boost::unordered_map::const_iterator iter = keyValue.find(sought); - if (iter == keyValue.end()) { - return false; - } - - const std::string &foundStr = iter->second; - found = Scan(foundStr); - return true; - } - -}; - -} - diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp new file mode 100644 index 000000000..fb8ccef9a --- /dev/null +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp @@ -0,0 +1,143 @@ +#include "querying.hh" +#include "util/exception.hh" +#include "../../legacy/Util2.h" + +using namespace std; + +namespace Moses2 +{ + +QueryEngine::QueryEngine(const char * filepath) +{ + + //Create filepaths + std::string basepath(filepath); + std::string path_to_config = basepath + "/config"; + std::string path_to_hashtable = basepath + "/probing_hash.dat"; + std::string path_to_source_vocabid = basepath + "/source_vocabids"; + std::string alignPath = basepath + "/Alignments.dat"; + + if (!FileExists(path_to_config)) { + UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config); + } + + ///Source phrase vocabids + read_map(source_vocabids, path_to_source_vocabid.c_str()); + + // alignments + read_alignments(alignPath); + + //Read config file + boost::unordered_map keyValue; + + std::ifstream config(path_to_config.c_str()); + std::string line; + while (getline(config, line)) { + std::vector toks = Moses2::Tokenize(line, "\t"); + UTIL_THROW_IF2(toks.size() != 2, "Wrong config format:" << line); + keyValue[ toks[0] ] = toks[1]; + } + + bool found; + //Check API version: + int version; + found = Get(keyValue, "API_VERSION", version); + if (!found) { + std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl; + } + else if (version != API_VERSION) { + std::cerr << "The ProbingPT API has changed. " << version << "!=" + << API_VERSION << " Please rebinarize your phrase tables." << std::endl; + exit(EXIT_FAILURE); + } + + //Get tablesize. + int tablesize; + found = Get(keyValue, "uniq_entries", tablesize); + if (!found) { + std::cerr << "uniq_entries not found" << std::endl; + exit(EXIT_FAILURE); + } + + //Number of scores + found = Get(keyValue, "num_scores", num_scores); + if (!found) { + std::cerr << "num_scores not found" << std::endl; + exit(EXIT_FAILURE); + } + + //How may scores from lex reordering models + found = Get(keyValue, "num_lex_scores", num_lex_scores); + if (!found) { + std::cerr << "num_lex_scores not found" << std::endl; + exit(EXIT_FAILURE); + } + + // have the scores been log() and FloorScore()? + found = Get(keyValue, "log_prob", logProb); + if (!found) { + std::cerr << "logProb not found" << std::endl; + exit(EXIT_FAILURE); + } + + config.close(); + + //Read hashtable + table_filesize = Table::Size(tablesize, 1.2); + mem = readTable(path_to_hashtable.c_str(), table_filesize); + Table table_init(mem, table_filesize); + table = table_init; + + std::cerr << "Initialized successfully! " << std::endl; +} + +QueryEngine::~QueryEngine() +{ + //Clear mmap content from memory. + munmap(mem, table_filesize); + +} + +uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const +{ + //TOO SLOW + //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size()); + return Moses2::getKey(source_phrase, size); +} + +std::pair QueryEngine::query(uint64_t key) +{ + std::pair ret; + + const Entry * entry; + ret.first = table.Find(key, entry); + if (ret.first) { + ret.second = entry->value; + } + return ret; +} + +void QueryEngine::read_alignments(const std::string &alignPath) +{ + std::ifstream strm(alignPath.c_str()); + + string line; + while (getline(strm, line)) { + vector toks = Moses2::Tokenize(line, "\t "); + UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file"); + + uint32_t alignInd = Scan(toks[0]); + if (alignInd >= alignColl.size()) { + alignColl.resize(alignInd + 1); + } + + Alignments &aligns = alignColl[alignInd]; + for (size_t i = 1; i < toks.size(); ++i) { + size_t pos = Scan(toks[i]); + aligns.push_back(pos); + } + } +} + +} + diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.hh b/contrib/moses2/TranslationModel/ProbingPT/querying.hh new file mode 100644 index 000000000..aae4b4f09 --- /dev/null +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.hh @@ -0,0 +1,65 @@ +#pragma once + +#include +#include //For finding size of file +#include "vocabid.hh" +#include //toLower +#include +#include "probing_hash_utils.hh" +#include "hash.hh" //Includes line splitter +#include "line_splitter.hh" +#include "../../legacy/Util2.h" + +namespace Moses2 +{ + +class QueryEngine +{ + std::map source_vocabids; + + typedef std::vector Alignments; + std::vector alignColl; + + Table table; + char *mem; //Memory for the table, necessary so that we can correctly destroy the object + + size_t table_filesize; + bool is_reordering; + + void read_alignments(const std::string &alignPath); + +public: + int num_scores; + int num_lex_scores; + bool logProb; + + QueryEngine(const char *); + ~QueryEngine(); + + std::pair query(uint64_t key); + + const std::map &getSourceVocab() const + { return source_vocabids; } + + const std::vector &getAlignments() const + { return alignColl; } + + uint64_t getKey(uint64_t source_phrase[], size_t size) const; + + template + inline bool Get(const boost::unordered_map &keyValue, const std::string &sought, T &found) const + { + boost::unordered_map::const_iterator iter = keyValue.find(sought); + if (iter == keyValue.end()) { + return false; + } + + const std::string &foundStr = iter->second; + found = Scan(foundStr); + return true; + } + +}; + +} + diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index c25eb5225..c6b7de6f7 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -3391,14 +3391,14 @@ PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/probing_hash_utils.hh - TranslationModel/ProbingPT/quering.cpp + TranslationModel/ProbingPT/querying.cpp 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/quering.cpp + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/querying.cpp - TranslationModel/ProbingPT/quering.hh + TranslationModel/ProbingPT/querying.hh 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/quering.hh + PARENT-3-PROJECT_LOC/moses/TranslationModel/ProbingPT/querying.hh TranslationModel/ProbingPT/storing.cpp diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.cpp b/moses/TranslationModel/ProbingPT/ProbingPT.cpp index 06b1360cd..8b4505985 100644 --- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/moses/TranslationModel/ProbingPT/ProbingPT.cpp @@ -5,7 +5,7 @@ #include "moses/TargetPhraseCollection.h" #include "moses/InputFileStream.h" #include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" -#include "quering.hh" +#include "querying.hh" using namespace std; diff --git a/moses/TranslationModel/ProbingPT/quering.cpp b/moses/TranslationModel/ProbingPT/quering.cpp deleted file mode 100644 index 52cd7f516..000000000 --- a/moses/TranslationModel/ProbingPT/quering.cpp +++ /dev/null @@ -1,142 +0,0 @@ -#include "quering.hh" -#include "util/exception.hh" - -using namespace std; - -namespace Moses -{ - -QueryEngine::QueryEngine(const char * filepath) -{ - - //Create filepaths - std::string basepath(filepath); - std::string path_to_config = basepath + "/config"; - std::string path_to_hashtable = basepath + "/probing_hash.dat"; - std::string path_to_source_vocabid = basepath + "/source_vocabids"; - std::string alignPath = basepath + "/Alignments.dat"; - - if (!FileExists(path_to_config)) { - UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config); - } - - ///Source phrase vocabids - read_map(source_vocabids, path_to_source_vocabid.c_str()); - - // alignments - read_alignments(alignPath); - - //Read config file - boost::unordered_map keyValue; - - std::ifstream config(path_to_config.c_str()); - std::string line; - while (getline(config, line)) { - std::vector toks = Tokenize(line, "\t"); - UTIL_THROW_IF2(toks.size() != 2, "Wrong config format:" << line); - keyValue[ toks[0] ] = toks[1]; - } - - bool found; - //Check API version: - int version; - found = Get(keyValue, "API_VERSION", version); - if (!found) { - std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl; - } - else if (version != API_VERSION) { - std::cerr << "The ProbingPT API has changed. " << version << "!=" - << API_VERSION << " Please rebinarize your phrase tables." << std::endl; - exit(EXIT_FAILURE); - } - - //Get tablesize. - int tablesize; - found = Get(keyValue, "uniq_entries", tablesize); - if (!found) { - std::cerr << "uniq_entries not found" << std::endl; - exit(EXIT_FAILURE); - } - - //Number of scores - found = Get(keyValue, "num_scores", num_scores); - if (!found) { - std::cerr << "num_scores not found" << std::endl; - exit(EXIT_FAILURE); - } - - //How may scores from lex reordering models - found = Get(keyValue, "num_lex_scores", num_lex_scores); - if (!found) { - std::cerr << "num_lex_scores not found" << std::endl; - exit(EXIT_FAILURE); - } - - // have the scores been log() and FloorScore()? - found = Get(keyValue, "log_prob", logProb); - if (!found) { - std::cerr << "logProb not found" << std::endl; - exit(EXIT_FAILURE); - } - - config.close(); - - //Read hashtable - table_filesize = Table::Size(tablesize, 1.2); - mem = readTable(path_to_hashtable.c_str(), table_filesize); - Table table_init(mem, table_filesize); - table = table_init; - - std::cerr << "Initialized successfully! " << std::endl; -} - -QueryEngine::~QueryEngine() -{ - //Clear mmap content from memory. - munmap(mem, table_filesize); - -} - -uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const -{ - //TOO SLOW - //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size()); - return Moses::getKey(source_phrase, size); -} - -std::pair QueryEngine::query(uint64_t key) -{ - std::pair ret; - - const Entry * entry; - ret.first = table.Find(key, entry); - if (ret.first) { - ret.second = entry->value; - } - return ret; -} - -void QueryEngine::read_alignments(const std::string &alignPath) -{ - std::ifstream strm(alignPath.c_str()); - - string line; - while (getline(strm, line)) { - vector toks = Tokenize(line, "\t "); - UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file"); - - uint32_t alignInd = Scan(toks[0]); - if (alignInd >= alignColl.size()) { - alignColl.resize(alignInd + 1); - } - - Alignments &aligns = alignColl[alignInd]; - for (size_t i = 1; i < toks.size(); ++i) { - size_t pos = Scan(toks[i]); - aligns.push_back(pos); - } - } -} - -} - diff --git a/moses/TranslationModel/ProbingPT/quering.hh b/moses/TranslationModel/ProbingPT/quering.hh deleted file mode 100644 index c43c7f3b9..000000000 --- a/moses/TranslationModel/ProbingPT/quering.hh +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include -#include //For finding size of file -#include "vocabid.hh" -#include //toLower -#include -#include "probing_hash_utils.hh" -#include "hash.hh" //Includes line splitter -#include "line_splitter.hh" -#include "moses//Util.h" - -namespace Moses -{ - -class QueryEngine -{ - std::map source_vocabids; - - typedef std::vector Alignments; - std::vector alignColl; - - Table table; - char *mem; //Memory for the table, necessary so that we can correctly destroy the object - - size_t table_filesize; - bool is_reordering; - - void read_alignments(const std::string &alignPath); - -public: - int num_scores; - int num_lex_scores; - bool logProb; - - QueryEngine(const char *); - ~QueryEngine(); - - std::pair query(uint64_t key); - - const std::map &getSourceVocab() const - { return source_vocabids; } - - const std::vector &getAlignments() const - { return alignColl; } - - uint64_t getKey(uint64_t source_phrase[], size_t size) const; - - template - inline bool Get(const boost::unordered_map &keyValue, const std::string &sought, T &found) const - { - boost::unordered_map::const_iterator iter = keyValue.find(sought); - if (iter == keyValue.end()) { - return false; - } - - const std::string &foundStr = iter->second; - found = Scan(foundStr); - return true; - } - -}; - -} - diff --git a/moses/TranslationModel/ProbingPT/querying.cpp b/moses/TranslationModel/ProbingPT/querying.cpp new file mode 100644 index 000000000..52cd7f516 --- /dev/null +++ b/moses/TranslationModel/ProbingPT/querying.cpp @@ -0,0 +1,142 @@ +#include "quering.hh" +#include "util/exception.hh" + +using namespace std; + +namespace Moses +{ + +QueryEngine::QueryEngine(const char * filepath) +{ + + //Create filepaths + std::string basepath(filepath); + std::string path_to_config = basepath + "/config"; + std::string path_to_hashtable = basepath + "/probing_hash.dat"; + std::string path_to_source_vocabid = basepath + "/source_vocabids"; + std::string alignPath = basepath + "/Alignments.dat"; + + if (!FileExists(path_to_config)) { + UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config); + } + + ///Source phrase vocabids + read_map(source_vocabids, path_to_source_vocabid.c_str()); + + // alignments + read_alignments(alignPath); + + //Read config file + boost::unordered_map keyValue; + + std::ifstream config(path_to_config.c_str()); + std::string line; + while (getline(config, line)) { + std::vector toks = Tokenize(line, "\t"); + UTIL_THROW_IF2(toks.size() != 2, "Wrong config format:" << line); + keyValue[ toks[0] ] = toks[1]; + } + + bool found; + //Check API version: + int version; + found = Get(keyValue, "API_VERSION", version); + if (!found) { + std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl; + } + else if (version != API_VERSION) { + std::cerr << "The ProbingPT API has changed. " << version << "!=" + << API_VERSION << " Please rebinarize your phrase tables." << std::endl; + exit(EXIT_FAILURE); + } + + //Get tablesize. + int tablesize; + found = Get(keyValue, "uniq_entries", tablesize); + if (!found) { + std::cerr << "uniq_entries not found" << std::endl; + exit(EXIT_FAILURE); + } + + //Number of scores + found = Get(keyValue, "num_scores", num_scores); + if (!found) { + std::cerr << "num_scores not found" << std::endl; + exit(EXIT_FAILURE); + } + + //How may scores from lex reordering models + found = Get(keyValue, "num_lex_scores", num_lex_scores); + if (!found) { + std::cerr << "num_lex_scores not found" << std::endl; + exit(EXIT_FAILURE); + } + + // have the scores been log() and FloorScore()? + found = Get(keyValue, "log_prob", logProb); + if (!found) { + std::cerr << "logProb not found" << std::endl; + exit(EXIT_FAILURE); + } + + config.close(); + + //Read hashtable + table_filesize = Table::Size(tablesize, 1.2); + mem = readTable(path_to_hashtable.c_str(), table_filesize); + Table table_init(mem, table_filesize); + table = table_init; + + std::cerr << "Initialized successfully! " << std::endl; +} + +QueryEngine::~QueryEngine() +{ + //Clear mmap content from memory. + munmap(mem, table_filesize); + +} + +uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const +{ + //TOO SLOW + //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size()); + return Moses::getKey(source_phrase, size); +} + +std::pair QueryEngine::query(uint64_t key) +{ + std::pair ret; + + const Entry * entry; + ret.first = table.Find(key, entry); + if (ret.first) { + ret.second = entry->value; + } + return ret; +} + +void QueryEngine::read_alignments(const std::string &alignPath) +{ + std::ifstream strm(alignPath.c_str()); + + string line; + while (getline(strm, line)) { + vector toks = Tokenize(line, "\t "); + UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file"); + + uint32_t alignInd = Scan(toks[0]); + if (alignInd >= alignColl.size()) { + alignColl.resize(alignInd + 1); + } + + Alignments &aligns = alignColl[alignInd]; + for (size_t i = 1; i < toks.size(); ++i) { + size_t pos = Scan(toks[i]); + aligns.push_back(pos); + } + } +} + +} + diff --git a/moses/TranslationModel/ProbingPT/querying.hh b/moses/TranslationModel/ProbingPT/querying.hh new file mode 100644 index 000000000..c43c7f3b9 --- /dev/null +++ b/moses/TranslationModel/ProbingPT/querying.hh @@ -0,0 +1,65 @@ +#pragma once + +#include +#include //For finding size of file +#include "vocabid.hh" +#include //toLower +#include +#include "probing_hash_utils.hh" +#include "hash.hh" //Includes line splitter +#include "line_splitter.hh" +#include "moses//Util.h" + +namespace Moses +{ + +class QueryEngine +{ + std::map source_vocabids; + + typedef std::vector Alignments; + std::vector alignColl; + + Table table; + char *mem; //Memory for the table, necessary so that we can correctly destroy the object + + size_t table_filesize; + bool is_reordering; + + void read_alignments(const std::string &alignPath); + +public: + int num_scores; + int num_lex_scores; + bool logProb; + + QueryEngine(const char *); + ~QueryEngine(); + + std::pair query(uint64_t key); + + const std::map &getSourceVocab() const + { return source_vocabids; } + + const std::vector &getAlignments() const + { return alignColl; } + + uint64_t getKey(uint64_t source_phrase[], size_t size) const; + + template + inline bool Get(const boost::unordered_map &keyValue, const std::string &sought, T &found) const + { + boost::unordered_map::const_iterator iter = keyValue.find(sought); + if (iter == keyValue.end()) { + return false; + } + + const std::string &foundStr = iter->second; + found = Scan(foundStr); + return true; + } + +}; + +} + -- cgit v1.2.3