Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/moses2/TranslationModel/ProbingPT/storing.hh')
-rw-r--r--contrib/moses2/TranslationModel/ProbingPT/storing.hh95
1 files changed, 95 insertions, 0 deletions
diff --git a/contrib/moses2/TranslationModel/ProbingPT/storing.hh b/contrib/moses2/TranslationModel/ProbingPT/storing.hh
new file mode 100644
index 000000000..10d7050d3
--- /dev/null
+++ b/contrib/moses2/TranslationModel/ProbingPT/storing.hh
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <boost/unordered_set.hpp>
+#include <boost/unordered_map.hpp>
+#include <cstdio>
+#include <sstream>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <queue>
+#include <sys/stat.h> //mkdir
+
+#include "hash.hh" //Includes line_splitter
+#include "probing_hash_utils.hh"
+
+#include "util/file_piece.hh"
+#include "util/file.hh"
+#include "vocabid.hh"
+
+namespace Moses2
+{
+typedef std::vector<uint64_t> SourcePhrase;
+
+
+class Node
+{
+ typedef boost::unordered_map<uint64_t, Node> Children;
+ Children m_children;
+
+public:
+ uint64_t key;
+ bool done;
+
+ Node()
+ :done(false)
+ {}
+
+ void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0);
+ void Write(Table &table);
+};
+
+
+void createProbingPT(const std::string &phrasetable_path,
+ const std::string &basepath, int num_scores, int num_lex_scores,
+ bool log_prob, int max_cache_size, bool scfg);
+uint64_t getKey(const std::vector<uint64_t> &source_phrase);
+
+std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos);
+
+template<typename T>
+std::string Debug(const std::vector<T> &vec)
+{
+ std::stringstream strm;
+ for (size_t i = 0; i < vec.size(); ++i) {
+ strm << vec[i] << " ";
+ }
+ return strm.str();
+}
+
+size_t countUniqueSource(const std::string &path);
+
+class CacheItem
+{
+public:
+ std::string source;
+ uint64_t sourceKey;
+ float count;
+ CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount)
+ :source(vSource)
+ ,sourceKey(vSourceKey)
+ ,count(vCount)
+ {
+ }
+
+ bool operator<(const CacheItem &other) const
+ {
+ return count > other.count;
+ }
+};
+
+class CacheItemOrderer
+{
+public:
+ bool operator()(const CacheItem* a, const CacheItem* b) const
+ {
+ return (*a) < (*b);
+ }
+};
+
+void serialize_cache(
+ std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
+ const std::string &path, float totalSourceCount);
+
+}
+