Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen Vermeulen <jtv@precisiontranslationtools.com>2015-04-22 16:43:29 +0300
committerJeroen Vermeulen <jtv@precisiontranslationtools.com>2015-04-22 16:43:29 +0300
commit75bfb758822cc926a1852c6a86a7ce5d153a1320 (patch)
tree7a967ee360aeb4898450888b31fc2ad602d9c7e6
parent1083999d3e4447ac347b8bcbb04a0733ae8c3654 (diff)
Thread-safe, platform-agnostic randomizer.
Some places in mert use srandom()/random(), but these are POSIX-specific. The standard alternative, srand()/rand(), is not thread-safe. This module wraps srand()/rand() in mutexes (very short-lived, so should not cost much) so that it relies on just Boost and the C standard library, not on a Unix-like environment. This may reduce the width of the random numbers on some platforms: it goes from "long int" to just "int". If that is a problem, we may have to use Boost's randomizer utilities, or eventually, the C++ ones.
-rw-r--r--mert/Data.cpp3
-rw-r--r--mert/Point.cpp5
-rw-r--r--mert/evaluator.cpp7
-rw-r--r--mert/mert.cpp5
-rw-r--r--util/Jamfile2
-rw-r--r--util/random.cc39
-rw-r--r--util/random.hh32
-rw-r--r--util/random_test.cc39
8 files changed, 123 insertions, 9 deletions
diff --git a/mert/Data.cpp b/mert/Data.cpp
index 49c1239e5..428886b99 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -17,6 +17,7 @@
#include "util/exception.hh"
#include "util/file_piece.hh"
+#include "util/random.hh"
#include "util/tokenize_piece.hh"
#include "util/string_piece.hh"
#include "FeatureDataIterator.h"
@@ -286,7 +287,7 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
} else {
//create shards by randomly sampling
for (size_t i = 0; i < floor(shard_size+0.5); ++i) {
- shard_contents.push_back(rand() % data_size);
+ shard_contents.push_back(util::rand_int() % data_size);
}
}
diff --git a/mert/Point.cpp b/mert/Point.cpp
index 55dc6a6b2..562249492 100644
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@@ -3,6 +3,7 @@
#include <cmath>
#include <cstdlib>
#include "util/exception.hh"
+#include "util/random.hh"
#include "FeatureStats.h"
#include "Optimizer.h"
@@ -58,8 +59,8 @@ void Point::Randomize()
UTIL_THROW_IF(m_max.size() != Point::m_dim, util::Exception, "Error");
for (unsigned int i = 0; i < size(); i++) {
- operator[](i) = m_min[i] +
- static_cast<float>(random()) / static_cast<float>(RAND_MAX) * (m_max[i] - m_min[i]);
+ const float scale = (m_max[i] - m_min[i]) / float(RAND_MAX);
+ operator[](i) = m_min[i] + util::rand_int() * scale;
}
}
diff --git a/mert/evaluator.cpp b/mert/evaluator.cpp
index 026abf397..61775a354 100644
--- a/mert/evaluator.cpp
+++ b/mert/evaluator.cpp
@@ -16,6 +16,7 @@
#include "Timer.h"
#include "Util.h"
#include "Data.h"
+#include "util/random.hh"
using namespace std;
using namespace MosesTuning;
@@ -94,7 +95,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i
for (int i = 0; i < bootstrap; ++i) {
ScoreData scoredata(g_scorer);
for (int j = 0; j < n; ++j) {
- int randomIndex = random() % n;
+ int randomIndex = util::rand_int() % n;
scoredata.add(entries[randomIndex], j);
}
g_scorer->setScoreData(&scoredata);
@@ -284,10 +285,10 @@ void InitSeed(const ProgramOption *opt)
{
if (opt->has_seed) {
cerr << "Seeding random numbers with " << opt->seed << endl;
- srandom(opt->seed);
+ util::rand_int_init(opt->seed);
} else {
cerr << "Seeding random numbers with system clock " << endl;
- srandom(time(NULL));
+ util::rand_int_init();
}
}
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 275aa7b09..e79ba7c6b 100644
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -24,6 +24,7 @@
#include "Types.h"
#include "Timer.h"
#include "Util.h"
+#include "util/random.hh"
#include "moses/ThreadPool.h"
@@ -289,10 +290,10 @@ int main(int argc, char **argv)
if (option.has_seed) {
cerr << "Seeding random numbers with " << option.seed << endl;
- srandom(option.seed);
+ util::rand_int_init(option.seed);
} else {
cerr << "Seeding random numbers with system clock " << endl;
- srandom(time(NULL));
+ util::rand_int_init();
}
if (option.sparse_weights_file.size()) ++option.pdim;
diff --git a/util/Jamfile b/util/Jamfile
index a82a5e23d..2d3cede01 100644
--- a/util/Jamfile
+++ b/util/Jamfile
@@ -21,7 +21,7 @@ obj file_piece_test.o : file_piece_test.cc /top//boost_unit_test_framework : $(c
fakelib parallel_read : parallel_read.cc : <threading>multi:<source>/top//boost_thread <threading>multi:<define>WITH_THREADS : : <include>.. ;
-fakelib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc parallel_read pool.cc read_compressed scoped.cc string_piece.cc usage.cc double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ;
+fakelib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc parallel_read pool.cc random.cc read_compressed scoped.cc string_piece.cc usage.cc double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ;
exe cat_compressed : cat_compressed_main.cc kenutil ;
diff --git a/util/random.cc b/util/random.cc
new file mode 100644
index 000000000..368718c9f
--- /dev/null
+++ b/util/random.cc
@@ -0,0 +1,39 @@
+#include "util/random.hh"
+
+#include <cstdlib>
+
+#include <boost/thread/locks.hpp>
+#include <boost/thread/lock_guard.hpp>
+#include <boost/thread/mutex.hpp>
+
+namespace util
+{
+namespace
+{
+/** Lock to protect randomizer.
+ *
+ * This module is implemented in terms of rand()/srand() from <cstdlib>.
+ * These functions are standard C, but they're not thread-safe. Scalability
+ * is not worth much complexity here, so just slap a mutex around it.
+ */
+boost::mutex rand_lock;
+} // namespace
+
+void rand_int_init(unsigned int seed)
+{
+ boost::lock_guard<boost::mutex> lock(rand_lock);
+ srand(seed);
+}
+
+
+void rand_int_init()
+{
+ rand_int_init(time(NULL));
+}
+
+int rand_int()
+{
+ boost::lock_guard<boost::mutex> lock(rand_lock);
+ return rand();
+}
+} // namespace util
diff --git a/util/random.hh b/util/random.hh
new file mode 100644
index 000000000..c372de463
--- /dev/null
+++ b/util/random.hh
@@ -0,0 +1,32 @@
+#ifndef UTIL_RANDOM_H
+#define UTIL_RANDOM_H
+
+namespace util
+{
+
+/** Initialize randomizer with a fixed seed.
+ *
+ * After this, unless the randomizer gets seeded again, consecutive calls to
+ * rand_int() will return a sequence of pseudo-random numbers determined by
+ * the seed. Every time the randomizer is seeded with this same seed, it will
+ * again start returning the same sequence of numbers.
+ */
+void rand_int_init(unsigned int);
+
+/** Initialize randomizer based on current time.
+ *
+ * Call this to make the randomizer return hard-to-predict numbers. It won't
+ * produce high-grade randomness, but enough to make the program act
+ * differently on different runs.
+ */
+void rand_int_init();
+
+/** Return a pseudorandom number between 0 and RAND_MAX inclusive.
+ *
+ * Initialize (seed) the randomizer before starting to call this.
+ */
+int rand_int();
+
+} // namespace util
+
+#endif
diff --git a/util/random_test.cc b/util/random_test.cc
new file mode 100644
index 000000000..d1c555a0c
--- /dev/null
+++ b/util/random_test.cc
@@ -0,0 +1,39 @@
+#include "util/random.hh"
+
+#define BOOST_TEST_MODULE RandomTest
+#include <boost/test/unit_test.hpp>
+
+namespace util
+{
+namespace
+{
+
+BOOST_AUTO_TEST_CASE(returns_different_consecutive_numbers)
+{
+ rand_int_init(99);
+ const int first = rand_int(), second = rand_int(), third = rand_int();
+ // Sometimes you'll get the same number twice in a row, but generally the
+ // randomizer returns different numbers.
+ BOOST_CHECK(second != first || third != first);
+}
+
+BOOST_AUTO_TEST_CASE(returns_different_numbers_for_different_seeds)
+{
+ rand_int_init(1);
+ const int one1 = rand_int(), one2 = rand_int();
+ rand_int_init(2);
+ const int two1 = rand_int(), two2 = rand_int();
+ BOOST_CHECK(two1 != one1 || two2 != two1);
+}
+
+BOOST_AUTO_TEST_CASE(returns_same_sequence_for_same_seed)
+{
+ rand_int_init(1);
+ const int first = rand_int();
+ rand_int_init(1);
+ const int second = rand_int();
+ BOOST_CHECK_EQUAL(first, second);
+}
+
+} // namespace
+} // namespace util