diff options
Diffstat (limited to 'moses/TranslationModel/ProbingPT/hash.cpp')
-rw-r--r-- | moses/TranslationModel/ProbingPT/hash.cpp | 36 |
1 files changed, 25 insertions, 11 deletions
diff --git a/moses/TranslationModel/ProbingPT/hash.cpp b/moses/TranslationModel/ProbingPT/hash.cpp index 8945649ef..27a64b129 100644 --- a/moses/TranslationModel/ProbingPT/hash.cpp +++ b/moses/TranslationModel/ProbingPT/hash.cpp @@ -1,5 +1,11 @@ +#include <iostream> #include "hash.hh" +using namespace std; + +namespace Moses +{ + uint64_t getHash(StringPiece text) { std::size_t len = text.size(); @@ -7,24 +13,32 @@ uint64_t getHash(StringPiece text) return key; } -std::vector<uint64_t> getVocabIDs(StringPiece textin) +std::vector<uint64_t> getVocabIDs(const StringPiece &textin) { //Tokenize std::vector<uint64_t> output; - util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' ')); + util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' ')); + + while (itWord) { + StringPiece word = *itWord; + uint64_t id = 0; + + util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|')); + while (itFactor) { + StringPiece factor = *itFactor; + //cerr << "factor=" << factor << endl; - while(it) { - output.push_back(getHash(*it)); - it++; + id += getHash(factor); + itFactor++; + } + + output.push_back(id); + itWord++; } return output; } -uint64_t getVocabID(std::string candidate) -{ - std::size_t len = candidate.length(); - uint64_t key = util::MurmurHashNative(candidate.c_str(), len); - return key; -}
\ No newline at end of file +} + |