Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/TranslationModel/ProbingPT/hash.cpp')
-rw-r--r--moses/TranslationModel/ProbingPT/hash.cpp36
1 files changed, 25 insertions, 11 deletions
diff --git a/moses/TranslationModel/ProbingPT/hash.cpp b/moses/TranslationModel/ProbingPT/hash.cpp
index 8945649ef..47242e25d 100644
--- a/moses/TranslationModel/ProbingPT/hash.cpp
+++ b/moses/TranslationModel/ProbingPT/hash.cpp
@@ -1,5 +1,11 @@
+#include <iostream>
#include "hash.hh"
+using namespace std;
+
+namespace Moses
+{
+
uint64_t getHash(StringPiece text)
{
std::size_t len = text.size();
@@ -7,24 +13,32 @@ uint64_t getHash(StringPiece text)
return key;
}
-std::vector<uint64_t> getVocabIDs(StringPiece textin)
+std::vector<uint64_t> getVocabIDs(const StringPiece &textin)
{
//Tokenize
std::vector<uint64_t> output;
- util::TokenIter<util::SingleCharacter> it(textin, util::SingleCharacter(' '));
+ util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
+
+ while (itWord) {
+ StringPiece word = *itWord;
+ uint64_t id = 0;
+
+ util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
+ while (itFactor) {
+ StringPiece factor = *itFactor;
+ //cerr << "factor=" << factor << endl;
- while(it) {
- output.push_back(getHash(*it));
- it++;
+ id += getHash(factor);
+ itFactor++;
+ }
+
+ output.push_back(id);
+ itWord++;
}
return output;
}
-uint64_t getVocabID(std::string candidate)
-{
- std::size_t len = candidate.length();
- uint64_t key = util::MurmurHashNative(candidate.c_str(), len);
- return key;
-} \ No newline at end of file
+}
+