Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2014-01-28 04:51:35 +0400
committerKenneth Heafield <github@kheafield.com>2014-01-28 04:51:35 +0400
commit14e02978fcbbe9ec6ed3faa232fcb3f30664f40e (patch)
tree0b0d19a9c1ba1020ae6b3f1230b8b5b9e6a945f9 /util
parentb68a906fdd97c344ce7d6fa90a2db5a79c5853e6 (diff)
KenLM 5cc905bc2d214efa7de2db56a9a672b749a95591
Avoid unspecified behavior of mmap when a file is resized reported by Christian Hardmeier Fixes for Mavericks and a workaround for Boost's broken semaphore Clean clang compile (of kenlm) Merged some of 744376b3fbebc41c4a270bf549826d5eb9219ae0 but also undid some of it because it was just masking a fundaemntal problem with pread rather than working around windows limitations
Diffstat (limited to 'util')
-rw-r--r--util/Jamfile11
-rw-r--r--util/exception.cc8
-rw-r--r--util/file.cc8
-rw-r--r--util/joint_sort.hh26
-rw-r--r--util/murmur_hash.cc7
-rw-r--r--util/murmur_hash.hh4
-rw-r--r--util/pcqueue.hh58
-rw-r--r--util/pcqueue_test.cc20
-rw-r--r--util/probing_hash_table.hh7
-rw-r--r--util/proxy_iterator.hh12
-rw-r--r--util/read_compressed_test.cc1
-rw-r--r--util/sized_iterator.hh14
-rw-r--r--util/sized_iterator_test.cc16
-rw-r--r--util/usage.cc8
14 files changed, 150 insertions, 50 deletions
diff --git a/util/Jamfile b/util/Jamfile
index 910b30550..5ee5c1c10 100644
--- a/util/Jamfile
+++ b/util/Jamfile
@@ -23,11 +23,8 @@ fakelib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_pi
import testing ;
-unit-test bit_packing_test : bit_packing_test.cc kenutil /top//boost_unit_test_framework ;
run file_piece_test.o kenutil /top//boost_unit_test_framework : : file_piece.cc ;
-unit-test read_compressed_test : read_compressed_test.o kenutil /top//boost_unit_test_framework ;
-unit-test joint_sort_test : joint_sort_test.cc kenutil /top//boost_unit_test_framework ;
-unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil /top//boost_unit_test_framework ;
-unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil /top//boost_unit_test_framework ;
-unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil /top//boost_unit_test_framework ;
-unit-test multi_intersection_test : multi_intersection_test.cc kenutil /top//boost_unit_test_framework ;
+for local t in [ glob *_test.cc : file_piece_test.cc read_compressed_test.cc ] {
+ local name = [ MATCH "(.*)\.cc" : $(t) ] ;
+ unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_system ;
+}
diff --git a/util/exception.cc b/util/exception.cc
index 557c39862..083bac20d 100644
--- a/util/exception.cc
+++ b/util/exception.cc
@@ -51,6 +51,11 @@ void Exception::SetLocation(const char *file, unsigned int line, const char *fun
}
namespace {
+// At least one of these functions will not be called.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
// The XOPEN version.
const char *HandleStrerror(int ret, const char *buf) {
if (!ret) return buf;
@@ -61,6 +66,9 @@ const char *HandleStrerror(int ret, const char *buf) {
const char *HandleStrerror(const char *ret, const char * /*buf*/) {
return ret;
}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
} // namespace
ErrnoException::ErrnoException() throw() : errno_(errno) {
diff --git a/util/file.cc b/util/file.cc
index 0b333e003..51eaf972f 100644
--- a/util/file.cc
+++ b/util/file.cc
@@ -20,6 +20,7 @@
#if defined __MINGW32__
#include <windows.h>
#include <unistd.h>
+#warning "The file functions on MinGW have not been tested for file sizes above 2^31 - 1. Please read https://stackoverflow.com/questions/12539488/determine-64-bit-file-size-in-c-on-mingw-32-bit and fix"
#elif defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
@@ -81,6 +82,7 @@ int CreateOrThrow(const char *name) {
uint64_t SizeFile(int fd) {
#if defined __MINGW32__
struct stat sb;
+ // Does this handle 64-bit?
int ret = fstat(fd, &sb);
if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
return sb.st_size;
@@ -109,6 +111,7 @@ uint64_t SizeOrThrow(int fd) {
void ResizeOrThrow(int fd, uint64_t to) {
#if defined __MINGW32__
+ // Does this handle 64-bit?
int ret = ftruncate
#elif defined(_WIN32) || defined(_WIN64)
errno_t ret = _chsize_s
@@ -125,7 +128,7 @@ namespace {
std::size_t GuardLarge(std::size_t size) {
// The following operating systems have broken read/write/pread/pwrite that
// only supports up to 2^31.
-#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID)
+#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID) || defined(__MINGW32__)
return std::min(static_cast<std::size_t>(static_cast<unsigned>(-1)), size);
#else
return size;
@@ -172,7 +175,7 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
uint8_t *to = static_cast<uint8_t*>(to_void);
#if defined(_WIN32) || defined(_WIN64)
- //UTIL_THROW(Exception, "This pread implementation for windows is broken. Please send me a patch that does not change the file pointer. Atomically. Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
+ UTIL_THROW(Exception, "This pread implementation for windows is broken. Please send me a patch that does not change the file pointer. Atomically. Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
#endif
for (;size ;) {
@@ -262,6 +265,7 @@ typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
void InternalSeek(int fd, int64_t off, int whence) {
if (
#if defined __MINGW32__
+ // Does this handle 64-bit?
(off_t)-1 == lseek(fd, off, whence)
#elif defined(_WIN32) || defined(_WIN64)
(__int64)-1 == _lseeki64(fd, off, whence)
diff --git a/util/joint_sort.hh b/util/joint_sort.hh
index 1b43ddcf4..13a52b67b 100644
--- a/util/joint_sort.hh
+++ b/util/joint_sort.hh
@@ -9,7 +9,6 @@
#include <algorithm>
#include <functional>
-#include <iostream>
namespace util {
@@ -35,9 +34,10 @@ template <class KeyIter, class ValueIter> class JointIter {
return *this;
}
- void swap(const JointIter &other) {
- std::swap(key_, other.key_);
- std::swap(value_, other.value_);
+ friend void swap(JointIter &first, JointIter &second) {
+ using std::swap;
+ swap(first.key_, second.key_);
+ swap(first.value_, second.value_);
}
private:
@@ -83,9 +83,11 @@ template <class KeyIter, class ValueIter> class JointProxy {
return *(inner_.key_);
}
- void swap(JointProxy<KeyIter, ValueIter> &other) {
- std::swap(*inner_.key_, *other.inner_.key_);
- std::swap(*inner_.value_, *other.inner_.value_);
+ friend void swap(JointProxy<KeyIter, ValueIter> first, JointProxy<KeyIter, ValueIter> second) {
+ // Allow argument-dependent lookup.
+ using std::swap;
+ swap(*first.inner_.key_, *second.inner_.key_);
+ swap(*first.inner_.value_, *second.inner_.value_);
}
private:
@@ -138,14 +140,4 @@ template <class KeyIter, class ValueIter> void JointSort(const KeyIter &key_begi
} // namespace util
-namespace std {
-template <class KeyIter, class ValueIter> void swap(util::detail::JointIter<KeyIter, ValueIter> &left, util::detail::JointIter<KeyIter, ValueIter> &right) {
- left.swap(right);
-}
-
-template <class KeyIter, class ValueIter> void swap(util::detail::JointProxy<KeyIter, ValueIter> &left, util::detail::JointProxy<KeyIter, ValueIter> &right) {
- left.swap(right);
-}
-} // namespace std
-
#endif // UTIL_JOINT_SORT__
diff --git a/util/murmur_hash.cc b/util/murmur_hash.cc
index 4f519312d..189668c01 100644
--- a/util/murmur_hash.cc
+++ b/util/murmur_hash.cc
@@ -153,12 +153,19 @@ uint64_t MurmurHash64B ( const void * key, std::size_t len, uint64_t seed )
// Trick to test for 64-bit architecture at compile time.
namespace {
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
template <unsigned L> inline uint64_t MurmurHashNativeBackend(const void * key, std::size_t len, uint64_t seed) {
return MurmurHash64A(key, len, seed);
}
template <> inline uint64_t MurmurHashNativeBackend<4>(const void * key, std::size_t len, uint64_t seed) {
return MurmurHash64B(key, len, seed);
}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
} // namespace
uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed) {
diff --git a/util/murmur_hash.hh b/util/murmur_hash.hh
index ae7e88dec..4891833e9 100644
--- a/util/murmur_hash.hh
+++ b/util/murmur_hash.hh
@@ -5,8 +5,12 @@
namespace util {
+// 64-bit machine version
uint64_t MurmurHash64A(const void * key, std::size_t len, uint64_t seed = 0);
+// 32-bit machine version (not the same function as above)
uint64_t MurmurHash64B(const void * key, std::size_t len, uint64_t seed = 0);
+// Use the version for this arch. Because the values differ across
+// architectures, really only use it for in-memory structures.
uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed = 0);
} // namespace util
diff --git a/util/pcqueue.hh b/util/pcqueue.hh
index 3df8749b1..07e4146f5 100644
--- a/util/pcqueue.hh
+++ b/util/pcqueue.hh
@@ -1,6 +1,8 @@
#ifndef UTIL_PCQUEUE__
#define UTIL_PCQUEUE__
+#include "util/exception.hh"
+
#include <boost/interprocess/sync/interprocess_semaphore.hpp>
#include <boost/scoped_array.hpp>
#include <boost/thread/mutex.hpp>
@@ -8,20 +10,68 @@
#include <errno.h>
+#ifdef __APPLE__
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#include <mach/mach_traps.h>
+#include <mach/mach.h>
+#endif // __APPLE__
+
namespace util {
-inline void WaitSemaphore (boost::interprocess::interprocess_semaphore &on) {
+/* OS X Maverick and Boost interprocess were doing "Function not implemented."
+ * So this is my own wrapper around the mach kernel APIs.
+ */
+#ifdef __APPLE__
+
+#define MACH_CALL(call) UTIL_THROW_IF(KERN_SUCCESS != (call), Exception, "Mach call failure")
+
+class Semaphore {
+ public:
+ explicit Semaphore(int value) : task_(mach_task_self()) {
+ MACH_CALL(semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value));
+ }
+
+ ~Semaphore() {
+ MACH_CALL(semaphore_destroy(task_, back_));
+ }
+
+ void wait() {
+ MACH_CALL(semaphore_wait(back_));
+ }
+
+ void post() {
+ MACH_CALL(semaphore_signal(back_));
+ }
+
+ private:
+ semaphore_t back_;
+ task_t task_;
+};
+
+inline void WaitSemaphore(Semaphore &semaphore) {
+ semaphore.wait();
+}
+
+#else
+typedef boost::interprocess::interprocess_semaphore Semaphore;
+
+inline void WaitSemaphore (Semaphore &on) {
while (1) {
try {
on.wait();
break;
}
catch (boost::interprocess::interprocess_exception &e) {
- if (e.get_native_error() != EINTR) throw;
+ if (e.get_native_error() != EINTR) {
+ throw;
+ }
}
}
}
+#endif // __APPLE__
+
/* Producer consumer queue safe for multiple producers and multiple consumers.
* T must be default constructable and have operator=.
* The value is copied twice for Consume(T &out) or three times for Consume(),
@@ -82,9 +132,9 @@ template <class T> class PCQueue : boost::noncopyable {
private:
// Number of empty spaces in storage_.
- boost::interprocess::interprocess_semaphore empty_;
+ Semaphore empty_;
// Number of occupied spaces in storage_.
- boost::interprocess::interprocess_semaphore used_;
+ Semaphore used_;
boost::scoped_array<T> storage_;
diff --git a/util/pcqueue_test.cc b/util/pcqueue_test.cc
new file mode 100644
index 000000000..22ed2c6f3
--- /dev/null
+++ b/util/pcqueue_test.cc
@@ -0,0 +1,20 @@
+#include "util/pcqueue.hh"
+
+#define BOOST_TEST_MODULE PCQueueTest
+#include <boost/test/unit_test.hpp>
+
+namespace util {
+namespace {
+
+BOOST_AUTO_TEST_CASE(SingleThread) {
+ PCQueue<int> queue(10);
+ for (int i = 0; i < 10; ++i) {
+ queue.Produce(i);
+ }
+ for (int i = 0; i < 10; ++i) {
+ BOOST_CHECK_EQUAL(i, queue.Consume());
+ }
+}
+
+}
+} // namespace util
diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh
index 9566028f5..38524806c 100644
--- a/util/probing_hash_table.hh
+++ b/util/probing_hash_table.hh
@@ -70,6 +70,11 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
#endif
{}
+ void Relocate(void *new_base) {
+ begin_ = reinterpret_cast<MutableIterator>(new_base);
+ end_ = begin_ + buckets_;
+ }
+
template <class T> MutableIterator Insert(const T &t) {
#ifdef DEBUG
assert(initialized_);
@@ -98,8 +103,6 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
void FinishedInserting() {}
- void LoadedBinary() {}
-
// Don't change anything related to GetKey,
template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
#ifdef DEBUG
diff --git a/util/proxy_iterator.hh b/util/proxy_iterator.hh
index 0ee1716f4..a2810a472 100644
--- a/util/proxy_iterator.hh
+++ b/util/proxy_iterator.hh
@@ -38,8 +38,8 @@ template <class Proxy> class ProxyIterator {
typedef std::random_access_iterator_tag iterator_category;
typedef typename Proxy::value_type value_type;
typedef std::ptrdiff_t difference_type;
- typedef Proxy & reference;
- typedef Proxy * pointer;
+ typedef Proxy reference;
+ typedef ProxyIterator<Proxy> * pointer;
ProxyIterator() {}
@@ -47,10 +47,10 @@ template <class Proxy> class ProxyIterator {
template <class AlternateProxy> ProxyIterator(const ProxyIterator<AlternateProxy> &in) : p_(*in) {}
explicit ProxyIterator(const Proxy &p) : p_(p) {}
- // p_'s swap does value swapping, but here we want iterator swapping
+/* // p_'s swap does value swapping, but here we want iterator swapping
friend inline void swap(ProxyIterator<Proxy> &first, ProxyIterator<Proxy> &second) {
swap(first.I(), second.I());
- }
+ }*/
// p_'s operator= does value copying, but here we want iterator copying.
S &operator=(const S &other) {
@@ -77,8 +77,8 @@ template <class Proxy> class ProxyIterator {
std::ptrdiff_t operator-(const S &other) const { return I() - other.I(); }
- Proxy &operator*() { return p_; }
- const Proxy &operator*() const { return p_; }
+ Proxy operator*() { return p_; }
+ const Proxy operator*() const { return p_; }
Proxy *operator->() { return &p_; }
const Proxy *operator->() const { return &p_; }
Proxy operator[](std::ptrdiff_t amount) const { return *(*this + amount); }
diff --git a/util/read_compressed_test.cc b/util/read_compressed_test.cc
index 71b97b0f6..50450a025 100644
--- a/util/read_compressed_test.cc
+++ b/util/read_compressed_test.cc
@@ -17,6 +17,7 @@
#include <fcntl.h>
#if !defined mkstemp
+// TODO insecure
int mkstemp(char * stemplate)
{
char *filename = mktemp(stemplate);
diff --git a/util/sized_iterator.hh b/util/sized_iterator.hh
index dce8f229a..a72657b50 100644
--- a/util/sized_iterator.hh
+++ b/util/sized_iterator.hh
@@ -36,7 +36,7 @@ class SizedInnerIterator {
void *Data() { return ptr_; }
std::size_t EntrySize() const { return size_; }
- friend inline void swap(SizedInnerIterator &first, SizedInnerIterator &second) {
+ friend void swap(SizedInnerIterator &first, SizedInnerIterator &second) {
std::swap(first.ptr_, second.ptr_);
std::swap(first.size_, second.size_);
}
@@ -69,17 +69,7 @@ class SizedProxy {
const void *Data() const { return inner_.Data(); }
void *Data() { return inner_.Data(); }
- /**
- // TODO: this (deep) swap was recently added. why? if any std heap sort etc
- // algs are using swap, that's going to be worse performance than using
- // =. i'm not sure why we *want* a deep swap. if C++11 compilers are
- // choosing between move constructor and swap, then we'd better implement a
- // (deep) move constructor. it may also be that this is moot since i made
- // ProxyIterator a reference and added a shallow ProxyIterator swap? (I
- // need Ken or someone competent to judge whether that's correct also. -
- // let me know at graehl@gmail.com
- */
- friend void swap(SizedProxy &first, SizedProxy &second) {
+ friend void swap(SizedProxy first, SizedProxy second) {
std::swap_ranges(
static_cast<char*>(first.inner_.Data()),
static_cast<char*>(first.inner_.Data()) + first.inner_.EntrySize(),
diff --git a/util/sized_iterator_test.cc b/util/sized_iterator_test.cc
new file mode 100644
index 000000000..c36bcb2d2
--- /dev/null
+++ b/util/sized_iterator_test.cc
@@ -0,0 +1,16 @@
+#include "util/sized_iterator.hh"
+
+#define BOOST_TEST_MODULE SizedIteratorTest
+#include <boost/test/unit_test.hpp>
+
+namespace util { namespace {
+
+BOOST_AUTO_TEST_CASE(swap_works) {
+ char str[2] = { 0, 1 };
+ SizedProxy first(str, 1), second(str + 1, 1);
+ swap(first, second);
+ BOOST_CHECK_EQUAL(1, str[0]);
+ BOOST_CHECK_EQUAL(0, str[1]);
+}
+
+}} // namespace anonymous util
diff --git a/util/usage.cc b/util/usage.cc
index 2f870d854..e68d7c7c1 100644
--- a/util/usage.cc
+++ b/util/usage.cc
@@ -66,6 +66,11 @@ Wall GetWall() {
}
#endif
+// Some of these functions are only used on some platforms.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
// These all assume first > second
double Subtract(time_t first, time_t second) {
return difftime(first, second);
@@ -87,6 +92,9 @@ double DoubleSec(const struct timespec &tv) {
return static_cast<double>(tv.tv_sec) + (static_cast<double>(tv.tv_nsec) / 1000000000.0);
}
#endif
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
class RecordStart {
public: