Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-02-28 22:58:00 +0400
committerKenneth Heafield <github@kheafield.com>2012-02-28 22:58:00 +0400
commite48de47c2381547f78f4dbd89f4fa3e76ba0c6bf (patch)
treecdcbb888209bee7dd9c02a7d678cce4262c35416 /util
parent7927979298644923cf02ad6c757c3d7c209e365a (diff)
KenLM 98814b2 including faster malloc-backed building and portability improvements
Diffstat (limited to 'util')
-rw-r--r--util/bit_packing.hh14
-rw-r--r--util/file.cc7
-rw-r--r--util/file.hh2
-rw-r--r--util/file_piece.cc4
-rw-r--r--util/file_piece.hh2
-rw-r--r--util/key_value_packing.hh126
-rw-r--r--util/key_value_packing_test.cc75
-rw-r--r--util/mmap.cc21
-rw-r--r--util/mmap.hh2
-rw-r--r--util/murmur_hash.cc39
-rw-r--r--util/sorted_uniform_test.cc1
11 files changed, 78 insertions, 215 deletions
diff --git a/util/bit_packing.hh b/util/bit_packing.hh
index f28f71f81..73a5cb226 100644
--- a/util/bit_packing.hh
+++ b/util/bit_packing.hh
@@ -47,7 +47,14 @@ inline uint8_t BitPackShift(uint8_t bit, uint8_t length) {
#endif
inline uint64_t ReadOff(const void *base, uint64_t bit_off) {
+#if defined(__arm) || defined(__arm__)
+ const uint8_t *base_off = reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3);
+ uint64_t value64;
+ memcpy(&value64, base_off, sizeof(value64));
+ return value64;
+#else
return *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3));
+#endif
}
/* Pack integers up to 57 bits using their least significant digits.
@@ -75,7 +82,14 @@ inline void WriteInt57(void *base, uint64_t bit_off, uint8_t length, uint64_t va
/* Same caveats as above, but for a 25 bit limit. */
inline uint32_t ReadInt25(const void *base, uint64_t bit_off, uint8_t length, uint32_t mask) {
+#if defined(__arm) || defined(__arm__)
+ const uint8_t *base_off = reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3);
+ uint32_t value32;
+ memcpy(&value32, base_off, sizeof(value32));
+ return (value32 >> BitPackShift(bit_off & 7, length)) & mask;
+#else
return (*reinterpret_cast<const uint32_t*>(reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3)) >> BitPackShift(bit_off & 7, length)) & mask;
+#endif
}
inline void WriteInt25(void *base, uint64_t bit_off, uint8_t length, uint32_t value) {
diff --git a/util/file.cc b/util/file.cc
index 77922cfad..aee7c77ac 100644
--- a/util/file.cc
+++ b/util/file.cc
@@ -99,6 +99,13 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
}
}
+void FSyncOrThrow(int fd) {
+// Apparently windows doesn't have fsync?
+#if !defined(_WIN32) && !defined(_WIN64)
+ UTIL_THROW_IF(-1 == fsync(fd), ErrnoException, "Sync of " << fd << " failed.");
+#endif
+}
+
namespace {
void InternalSeek(int fd, off_t off, int whence) {
UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed");
diff --git a/util/file.hh b/util/file.hh
index 04023dec0..5c57e2a9f 100644
--- a/util/file.hh
+++ b/util/file.hh
@@ -78,6 +78,8 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount);
void WriteOrThrow(int fd, const void *data_void, std::size_t size);
+void FSyncOrThrow(int fd);
+
// Seeking
void SeekOrThrow(int fd, uint64_t off);
void AdvanceOrThrow(int fd, int64_t off);
diff --git a/util/file_piece.cc b/util/file_piece.cc
index 17cd23cbb..081e662ba 100644
--- a/util/file_piece.cc
+++ b/util/file_piece.cc
@@ -24,12 +24,12 @@ ParseNumberException::ParseNumberException(StringPiece value) throw() {
*this << "Could not parse \"" << value << "\" into a number";
}
-GZException::GZException(gzFile file) {
#ifdef HAVE_ZLIB
+GZException::GZException(gzFile file) {
int num;
*this << gzerror( file, &num) << " from zlib";
-#endif // HAVE_ZLIB
}
+#endif // HAVE_ZLIB
// Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale).
const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
diff --git a/util/file_piece.hh b/util/file_piece.hh
index 2e4a91bbd..af93d8aab 100644
--- a/util/file_piece.hh
+++ b/util/file_piece.hh
@@ -27,7 +27,9 @@ class ParseNumberException : public Exception {
class GZException : public Exception {
public:
+#ifdef HAVE_ZLIB
explicit GZException(gzFile file);
+#endif
GZException() throw() {}
~GZException() throw() {}
};
diff --git a/util/key_value_packing.hh b/util/key_value_packing.hh
deleted file mode 100644
index 8339980b5..000000000
--- a/util/key_value_packing.hh
+++ /dev/null
@@ -1,126 +0,0 @@
-#ifndef UTIL_KEY_VALUE_PACKING__
-#define UTIL_KEY_VALUE_PACKING__
-
-/* Why such a general interface? I'm planning on doing bit-level packing. */
-
-#include <algorithm>
-#include <cstddef>
-#include <cstring>
-
-#include <stdint.h>
-
-namespace util {
-
-template <class Key, class Value> struct Entry {
- Key key;
- Value value;
-
- const Key &GetKey() const { return key; }
- const Value &GetValue() const { return value; }
-
- Value &MutableValue() { return value; }
-
- void Set(const Key &key_in, const Value &value_in) {
- SetKey(key_in);
- SetValue(value_in);
- }
- void SetKey(const Key &key_in) { key = key_in; }
- void SetValue(const Value &value_in) { value = value_in; }
-
- bool operator<(const Entry<Key, Value> &other) const { return GetKey() < other.GetKey(); }
-};
-
-// And now for a brief interlude to specialize std::swap.
-} // namespace util
-namespace std {
-template <class Key, class Value> void swap(util::Entry<Key, Value> &first, util::Entry<Key, Value> &second) {
- swap(first.key, second.key);
- swap(first.value, second.value);
-}
-}// namespace std
-namespace util {
-
-template <class KeyT, class ValueT> class AlignedPacking {
- public:
- typedef KeyT Key;
- typedef ValueT Value;
-
- public:
- static const std::size_t kBytes = sizeof(Entry<Key, Value>);
- static const std::size_t kBits = kBytes * 8;
-
- typedef Entry<Key, Value> * MutableIterator;
- typedef const Entry<Key, Value> * ConstIterator;
- typedef const Entry<Key, Value> & ConstReference;
-
- static MutableIterator FromVoid(void *start) {
- return reinterpret_cast<MutableIterator>(start);
- }
-
- static Entry<Key, Value> Make(const Key &key, const Value &value) {
- Entry<Key, Value> ret;
- ret.Set(key, value);
- return ret;
- }
-};
-
-template <class KeyT, class ValueT> class ByteAlignedPacking {
- public:
- typedef KeyT Key;
- typedef ValueT Value;
-
- private:
-#pragma pack(push)
-#pragma pack(1)
- struct RawEntry {
- Key key;
- Value value;
-
- const Key &GetKey() const { return key; }
- const Value &GetValue() const { return value; }
-
- Value &MutableValue() { return value; }
-
- void Set(const Key &key_in, const Value &value_in) {
- SetKey(key_in);
- SetValue(value_in);
- }
- void SetKey(const Key &key_in) { key = key_in; }
- void SetValue(const Value &value_in) { value = value_in; }
-
- bool operator<(const RawEntry &other) const { return GetKey() < other.GetKey(); }
- };
-#pragma pack(pop)
-
- friend void std::swap<>(RawEntry&, RawEntry&);
-
- public:
- typedef RawEntry *MutableIterator;
- typedef const RawEntry *ConstIterator;
- typedef RawEntry &ConstReference;
-
- static const std::size_t kBytes = sizeof(RawEntry);
- static const std::size_t kBits = kBytes * 8;
-
- static MutableIterator FromVoid(void *start) {
- return MutableIterator(reinterpret_cast<RawEntry*>(start));
- }
-
- static RawEntry Make(const Key &key, const Value &value) {
- RawEntry ret;
- ret.Set(key, value);
- return ret;
- }
-};
-
-} // namespace util
-namespace std {
-template <class Key, class Value> void swap(
- typename util::ByteAlignedPacking<Key, Value>::RawEntry &first,
- typename util::ByteAlignedPacking<Key, Value>::RawEntry &second) {
- swap(first.key, second.key);
- swap(first.value, second.value);
-}
-}// namespace std
-
-#endif // UTIL_KEY_VALUE_PACKING__
diff --git a/util/key_value_packing_test.cc b/util/key_value_packing_test.cc
deleted file mode 100644
index a0d33fd76..000000000
--- a/util/key_value_packing_test.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-#include "util/key_value_packing.hh"
-
-#include <boost/random/mersenne_twister.hpp>
-#include <boost/random/uniform_int.hpp>
-#include <boost/random/variate_generator.hpp>
-#include <boost/scoped_array.hpp>
-#define BOOST_TEST_MODULE KeyValueStoreTest
-#include <boost/test/unit_test.hpp>
-
-#include <limits>
-#include <stdlib.h>
-
-namespace util {
-namespace {
-
-BOOST_AUTO_TEST_CASE(basic_in_out) {
- typedef ByteAlignedPacking<uint64_t, unsigned char> Packing;
- void *backing = malloc(Packing::kBytes * 2);
- Packing::MutableIterator i(Packing::FromVoid(backing));
- i->SetKey(10);
- BOOST_CHECK_EQUAL(10, i->GetKey());
- i->SetValue(3);
- BOOST_CHECK_EQUAL(3, i->GetValue());
- ++i;
- i->SetKey(5);
- BOOST_CHECK_EQUAL(5, i->GetKey());
- i->SetValue(42);
- BOOST_CHECK_EQUAL(42, i->GetValue());
-
- Packing::ConstIterator c(i);
- BOOST_CHECK_EQUAL(5, c->GetKey());
- --c;
- BOOST_CHECK_EQUAL(10, c->GetKey());
- BOOST_CHECK_EQUAL(42, i->GetValue());
-
- BOOST_CHECK_EQUAL(5, i->GetKey());
- free(backing);
-}
-
-BOOST_AUTO_TEST_CASE(simple_sort) {
- typedef ByteAlignedPacking<uint64_t, unsigned char> Packing;
- char foo[Packing::kBytes * 4];
- Packing::MutableIterator begin(Packing::FromVoid(foo));
- Packing::MutableIterator i = begin;
- i->SetKey(0); ++i;
- i->SetKey(2); ++i;
- i->SetKey(3); ++i;
- i->SetKey(1); ++i;
- std::sort(begin, i);
- BOOST_CHECK_EQUAL(0, begin[0].GetKey());
- BOOST_CHECK_EQUAL(1, begin[1].GetKey());
- BOOST_CHECK_EQUAL(2, begin[2].GetKey());
- BOOST_CHECK_EQUAL(3, begin[3].GetKey());
-}
-
-BOOST_AUTO_TEST_CASE(big_sort) {
- typedef ByteAlignedPacking<uint64_t, unsigned char> Packing;
- boost::scoped_array<char> memory(new char[Packing::kBytes * 1000]);
- Packing::MutableIterator begin(Packing::FromVoid(memory.get()));
-
- boost::mt19937 rng;
- boost::uniform_int<uint64_t> range(0, std::numeric_limits<uint64_t>::max());
- boost::variate_generator<boost::mt19937&, boost::uniform_int<uint64_t> > gen(rng, range);
-
- for (size_t i = 0; i < 1000; ++i) {
- (begin + i)->SetKey(gen());
- }
- std::sort(begin, begin + 1000);
- for (size_t i = 0; i < 999; ++i) {
- BOOST_CHECK(begin[i] < begin[i+1]);
- }
-}
-
-} // namespace
-} // namespace util
diff --git a/util/mmap.cc b/util/mmap.cc
index d3a2526fa..a329ce4e3 100644
--- a/util/mmap.cc
+++ b/util/mmap.cc
@@ -101,9 +101,10 @@ void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int
#if defined(_WIN32) || defined(_WIN64)
int protectC = for_write ? PAGE_READWRITE : PAGE_READONLY;
int protectM = for_write ? FILE_MAP_WRITE : FILE_MAP_READ;
- HANDLE hMapping = CreateFileMapping((HANDLE)_get_osfhandle(fd), NULL, protectC, 0, size + offset, NULL);
+ uint64_t total_size = size + offset;
+ HANDLE hMapping = CreateFileMapping((HANDLE)_get_osfhandle(fd), NULL, protectC, total_size >> 32, static_cast<DWORD>(total_size), NULL);
UTIL_THROW_IF(!hMapping, ErrnoException, "CreateFileMapping failed");
- LPVOID ret = MapViewOfFile(hMapping, protectM, 0, offset, size);
+ LPVOID ret = MapViewOfFile(hMapping, protectM, offset >> 32, offset, size);
CloseHandle(hMapping);
UTIL_THROW_IF(!ret, ErrnoException, "MapViewOfFile failed");
#else
@@ -147,16 +148,20 @@ void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scope
}
}
-void *MapAnonymous(std::size_t size) {
- return MapOrThrow(size, true,
+// Allocates zeroed memory in to.
+void MapAnonymous(std::size_t size, util::scoped_memory &to) {
+ to.reset();
#if defined(_WIN32) || defined(_WIN64)
- 0 // MapOrThrow ignores the flags anyway.
-#elif defined(MAP_ANONYMOUS)
- MAP_ANONYMOUS | MAP_PRIVATE // Linux
+ to.reset(calloc(1, size), size, scoped_memory::MALLOC_ALLOCATED);
#else
+ to.reset(MapOrThrow(size, true,
+# if defined(MAP_ANONYMOUS)
+ MAP_ANONYMOUS | MAP_PRIVATE // Linux
+# else
MAP_ANON | MAP_PRIVATE // BSD
+# endif
+ , false, -1, 0), size, scoped_memory::MMAP_ALLOCATED);
#endif
- , false, -1, 0);
}
void *MapZeroedWrite(int fd, std::size_t size) {
diff --git a/util/mmap.hh b/util/mmap.hh
index 921147c34..b218c4d19 100644
--- a/util/mmap.hh
+++ b/util/mmap.hh
@@ -100,7 +100,7 @@ void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int
void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out);
-void *MapAnonymous(std::size_t size);
+void MapAnonymous(std::size_t size, scoped_memory &to);
// Open file name with mmap of size bytes, all of which are initially zero.
void *MapZeroedWrite(int fd, std::size_t size);
diff --git a/util/murmur_hash.cc b/util/murmur_hash.cc
index ef5783fec..6accc21af 100644
--- a/util/murmur_hash.cc
+++ b/util/murmur_hash.cc
@@ -7,9 +7,11 @@
* placed in namespace util
* add MurmurHashNative
* default option = 0 for seed
+ * ARM port from NICT
*/
#include "util/murmur_hash.hh"
+#include <string.h>
namespace util {
@@ -28,12 +30,24 @@ uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed )
uint64_t h = seed ^ (len * m);
+#if defined(__arm) || defined(__arm__)
+ const size_t ksize = sizeof(uint64_t);
+ const unsigned char * data = (const unsigned char *)key;
+ const unsigned char * end = data + (std::size_t)(len/8) * ksize;
+#else
const uint64_t * data = (const uint64_t *)key;
const uint64_t * end = data + (len/8);
+#endif
while(data != end)
{
+#if defined(__arm) || defined(__arm__)
+ uint64_t k;
+ memcpy(&k, data, ksize);
+ data += ksize;
+#else
uint64_t k = *data++;
+#endif
k *= m;
k ^= k >> r;
@@ -75,16 +89,30 @@ uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed )
unsigned int h1 = seed ^ len;
unsigned int h2 = 0;
+#if defined(__arm) || defined(__arm__)
+ size_t ksize = sizeof(unsigned int);
+ const unsigned char * data = (const unsigned char *)key;
+#else
const unsigned int * data = (const unsigned int *)key;
+#endif
+ unsigned int k1, k2;
while(len >= 8)
{
- unsigned int k1 = *data++;
+#if defined(__arm) || defined(__arm__)
+ memcpy(&k1, data, ksize);
+ data += ksize;
+ memcpy(&k2, data, ksize);
+ data += ksize;
+#else
+ k1 = *data++;
+ k2 = *data++;
+#endif
+
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
- unsigned int k2 = *data++;
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
@@ -92,7 +120,12 @@ uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed )
if(len >= 4)
{
- unsigned int k1 = *data++;
+#if defined(__arm) || defined(__arm__)
+ memcpy(&k1, data, ksize);
+ data += ksize;
+#else
+ k1 = *data++;
+#endif
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
diff --git a/util/sorted_uniform_test.cc b/util/sorted_uniform_test.cc
index ac7a0bfc5..d9f6fad1e 100644
--- a/util/sorted_uniform_test.cc
+++ b/util/sorted_uniform_test.cc
@@ -5,6 +5,7 @@
#include <boost/random/variate_generator.hpp>
#include <boost/scoped_array.hpp>
#include <boost/unordered_map.hpp>
+
#define BOOST_TEST_MODULE SortedUniformTest
#include <boost/test/unit_test.hpp>