diff options
Diffstat (limited to 'contrib/moses2/TranslationModel/ProbingPT/storing.hh')
-rw-r--r-- | contrib/moses2/TranslationModel/ProbingPT/storing.hh | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/contrib/moses2/TranslationModel/ProbingPT/storing.hh b/contrib/moses2/TranslationModel/ProbingPT/storing.hh new file mode 100644 index 000000000..10d7050d3 --- /dev/null +++ b/contrib/moses2/TranslationModel/ProbingPT/storing.hh @@ -0,0 +1,95 @@ +#pragma once + +#include <boost/unordered_set.hpp> +#include <boost/unordered_map.hpp> +#include <cstdio> +#include <sstream> +#include <fstream> +#include <iostream> +#include <string> +#include <queue> +#include <sys/stat.h> //mkdir + +#include "hash.hh" //Includes line_splitter +#include "probing_hash_utils.hh" + +#include "util/file_piece.hh" +#include "util/file.hh" +#include "vocabid.hh" + +namespace Moses2 +{ +typedef std::vector<uint64_t> SourcePhrase; + + +class Node +{ + typedef boost::unordered_map<uint64_t, Node> Children; + Children m_children; + +public: + uint64_t key; + bool done; + + Node() + :done(false) + {} + + void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0); + void Write(Table &table); +}; + + +void createProbingPT(const std::string &phrasetable_path, + const std::string &basepath, int num_scores, int num_lex_scores, + bool log_prob, int max_cache_size, bool scfg); +uint64_t getKey(const std::vector<uint64_t> &source_phrase); + +std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos); + +template<typename T> +std::string Debug(const std::vector<T> &vec) +{ + std::stringstream strm; + for (size_t i = 0; i < vec.size(); ++i) { + strm << vec[i] << " "; + } + return strm.str(); +} + +size_t countUniqueSource(const std::string &path); + +class CacheItem +{ +public: + std::string source; + uint64_t sourceKey; + float count; + CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount) + :source(vSource) + ,sourceKey(vSourceKey) + ,count(vCount) + { + } + + bool operator<(const CacheItem &other) const + { + return count > other.count; + } +}; + +class CacheItemOrderer +{ +public: + bool operator()(const CacheItem* a, const CacheItem* b) const + { + return (*a) < (*b); + } +}; + +void serialize_cache( + std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache, + const std::string &path, float totalSourceCount); + +} + |