Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h')
-rw-r--r--contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h199
1 files changed, 199 insertions, 0 deletions
diff --git a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h b/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h
new file mode 100644
index 000000000..5706fca09
--- /dev/null
+++ b/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h
@@ -0,0 +1,199 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#ifndef moses_BlockHashIndex_h
+#define moses_BlockHashIndex_h
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <queue>
+#include <cstring>
+#include <cstdio>
+
+#include "MurmurHash3.h"
+#include "StringVector.h"
+#include "PackedArray.h"
+#include "util/exception.hh"
+#include "util/string_stream.hh"
+
+#ifdef WITH_THREADS
+#include "../../legacy/ThreadPool.h"
+#else
+#include <ctime>
+#endif
+
+#include <boost/shared_ptr.hpp>
+
+namespace Moses2
+{
+
+class BlockHashIndex
+{
+private:
+ std::priority_queue<int> m_queue;
+
+ size_t m_orderBits;
+ size_t m_fingerPrintBits;
+
+ std::FILE* m_fileHandle;
+ size_t m_fileHandleStart;
+
+ StringVector<unsigned char, unsigned long> m_landmarks;
+
+ std::vector<void*> m_hashes;
+ std::vector<clock_t> m_clocks;
+ std::vector<PairedPackedArray<>*> m_arrays;
+
+ std::vector<size_t> m_seekIndex;
+
+ size_t m_size;
+ int m_lastSaved;
+ int m_lastDropped;
+ size_t m_numLoadedRanges;
+
+#ifdef WITH_THREADS
+ ThreadPool m_threadPool;
+ boost::mutex m_mutex;
+
+ template<typename Keys>
+ class HashTask: public Task
+ {
+ public:
+ HashTask(int id, BlockHashIndex& hash, Keys& keys) :
+ m_id(id), m_hash(hash), m_keys(new Keys(keys))
+ {
+ }
+
+ virtual void Run()
+ {
+ m_hash.CalcHash(m_id, *m_keys);
+ }
+
+ virtual ~HashTask()
+ {
+ delete m_keys;
+ }
+
+ private:
+ int m_id;
+ BlockHashIndex& m_hash;
+ Keys* m_keys;
+ };
+#endif
+
+ size_t GetFprint(const char* key) const;
+ size_t GetHash(size_t i, const char* key);
+
+public:
+#ifdef WITH_THREADS
+ BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
+ size_t threadsNum = 2);
+#else
+ BlockHashIndex(size_t orderBits, size_t fingerPrintBits);
+#endif
+
+ ~BlockHashIndex();
+
+ size_t GetHash(const char* key);
+ size_t GetHash(std::string key);
+
+ size_t operator[](std::string key);
+ size_t operator[](char* key);
+
+ void BeginSave(std::FILE* mphf);
+ void SaveRange(size_t i);
+ void SaveLastRange();
+ size_t FinalizeSave();
+
+#ifdef WITH_THREADS
+ void WaitAll();
+#endif
+
+ void DropRange(size_t i);
+ void DropLastRange();
+
+ size_t LoadIndex(std::FILE* mphf);
+ void LoadRange(size_t i);
+
+ size_t Save(std::string filename);
+ size_t Save(std::FILE * mphf);
+
+ size_t Load(std::string filename);
+ size_t Load(std::FILE * mphf);
+
+ size_t GetSize() const;
+
+ void KeepNLastRanges(float ratio = 0.1, float tolerance = 0.1);
+
+ template<typename Keys>
+ void AddRange(Keys &keys)
+ {
+ size_t current = m_landmarks.size();
+
+ if (m_landmarks.size() && m_landmarks.back().str() >= keys[0]) {
+ util::StringStream strme;
+ strme
+ << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort\n";
+ strme << "1: " << m_landmarks.back().str() << "\n";
+ strme << "2: " << keys[0] << "\n";
+ UTIL_THROW2(strme.str());
+ }
+
+ m_landmarks.push_back(keys[0]);
+ m_size += keys.size();
+
+ if (keys.size() == 1) {
+ // add dummy key to avoid null hash
+ keys.push_back("###DUMMY_KEY###");
+ }
+
+#ifdef WITH_THREADS
+
+ boost::shared_ptr<HashTask<Keys> > ht(
+ new HashTask<Keys>(current, *this, keys));
+ m_threadPool.Submit(ht);
+#else
+ CalcHash(current, keys);
+#endif
+ }
+
+ template<typename Keys>
+ void CalcHash(size_t current, Keys &keys)
+ {
+#ifdef HAVE_CMPH
+ void* source = vectorAdapter(keys);
+ CalcHash(current, source);
+#endif
+ }
+
+ void CalcHash(size_t current, void* source);
+
+#ifdef HAVE_CMPH
+ void* vectorAdapter(std::vector<std::string>& v);
+ void* vectorAdapter(StringVector<unsigned, size_t, std::allocator>& sv);
+ void* vectorAdapter(StringVector<unsigned, size_t, MmapAllocator>& sv);
+#endif
+};
+
+}
+#endif