diff options
author | Jeroen Vermeulen <jtv@precisiontranslationtools.com> | 2015-04-22 16:43:29 +0300 |
---|---|---|
committer | Jeroen Vermeulen <jtv@precisiontranslationtools.com> | 2015-04-22 16:43:29 +0300 |
commit | 75bfb758822cc926a1852c6a86a7ce5d153a1320 (patch) | |
tree | 7a967ee360aeb4898450888b31fc2ad602d9c7e6 | |
parent | 1083999d3e4447ac347b8bcbb04a0733ae8c3654 (diff) |
Thread-safe, platform-agnostic randomizer.
Some places in mert use srandom()/random(), but these are POSIX-specific.
The standard alternative, srand()/rand(), is not thread-safe. This module
wraps srand()/rand() in mutexes (very short-lived, so should not cost much)
so that it relies on just Boost and the C standard library, not on a Unix-like
environment.
This may reduce the width of the random numbers on some platforms: it goes
from "long int" to just "int". If that is a problem, we may have to use
Boost's randomizer utilities, or eventually, the C++ ones.
-rw-r--r-- | mert/Data.cpp | 3 | ||||
-rw-r--r-- | mert/Point.cpp | 5 | ||||
-rw-r--r-- | mert/evaluator.cpp | 7 | ||||
-rw-r--r-- | mert/mert.cpp | 5 | ||||
-rw-r--r-- | util/Jamfile | 2 | ||||
-rw-r--r-- | util/random.cc | 39 | ||||
-rw-r--r-- | util/random.hh | 32 | ||||
-rw-r--r-- | util/random_test.cc | 39 |
8 files changed, 123 insertions, 9 deletions
diff --git a/mert/Data.cpp b/mert/Data.cpp index 49c1239e5..428886b99 100644 --- a/mert/Data.cpp +++ b/mert/Data.cpp @@ -17,6 +17,7 @@ #include "util/exception.hh" #include "util/file_piece.hh" +#include "util/random.hh" #include "util/tokenize_piece.hh" #include "util/string_piece.hh" #include "FeatureDataIterator.h" @@ -286,7 +287,7 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor } else { //create shards by randomly sampling for (size_t i = 0; i < floor(shard_size+0.5); ++i) { - shard_contents.push_back(rand() % data_size); + shard_contents.push_back(util::rand_int() % data_size); } } diff --git a/mert/Point.cpp b/mert/Point.cpp index 55dc6a6b2..562249492 100644 --- a/mert/Point.cpp +++ b/mert/Point.cpp @@ -3,6 +3,7 @@ #include <cmath> #include <cstdlib> #include "util/exception.hh" +#include "util/random.hh" #include "FeatureStats.h" #include "Optimizer.h" @@ -58,8 +59,8 @@ void Point::Randomize() UTIL_THROW_IF(m_max.size() != Point::m_dim, util::Exception, "Error"); for (unsigned int i = 0; i < size(); i++) { - operator[](i) = m_min[i] + - static_cast<float>(random()) / static_cast<float>(RAND_MAX) * (m_max[i] - m_min[i]); + const float scale = (m_max[i] - m_min[i]) / float(RAND_MAX); + operator[](i) = m_min[i] + util::rand_int() * scale; } } diff --git a/mert/evaluator.cpp b/mert/evaluator.cpp index 026abf397..61775a354 100644 --- a/mert/evaluator.cpp +++ b/mert/evaluator.cpp @@ -16,6 +16,7 @@ #include "Timer.h" #include "Util.h" #include "Data.h" +#include "util/random.hh" using namespace std; using namespace MosesTuning; @@ -94,7 +95,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i for (int i = 0; i < bootstrap; ++i) { ScoreData scoredata(g_scorer); for (int j = 0; j < n; ++j) { - int randomIndex = random() % n; + int randomIndex = util::rand_int() % n; scoredata.add(entries[randomIndex], j); } g_scorer->setScoreData(&scoredata); @@ -284,10 +285,10 @@ void InitSeed(const ProgramOption *opt) { if (opt->has_seed) { cerr << "Seeding random numbers with " << opt->seed << endl; - srandom(opt->seed); + util::rand_int_init(opt->seed); } else { cerr << "Seeding random numbers with system clock " << endl; - srandom(time(NULL)); + util::rand_int_init(); } } diff --git a/mert/mert.cpp b/mert/mert.cpp index 275aa7b09..e79ba7c6b 100644 --- a/mert/mert.cpp +++ b/mert/mert.cpp @@ -24,6 +24,7 @@ #include "Types.h" #include "Timer.h" #include "Util.h" +#include "util/random.hh" #include "moses/ThreadPool.h" @@ -289,10 +290,10 @@ int main(int argc, char **argv) if (option.has_seed) { cerr << "Seeding random numbers with " << option.seed << endl; - srandom(option.seed); + util::rand_int_init(option.seed); } else { cerr << "Seeding random numbers with system clock " << endl; - srandom(time(NULL)); + util::rand_int_init(); } if (option.sparse_weights_file.size()) ++option.pdim; diff --git a/util/Jamfile b/util/Jamfile index a82a5e23d..2d3cede01 100644 --- a/util/Jamfile +++ b/util/Jamfile @@ -21,7 +21,7 @@ obj file_piece_test.o : file_piece_test.cc /top//boost_unit_test_framework : $(c fakelib parallel_read : parallel_read.cc : <threading>multi:<source>/top//boost_thread <threading>multi:<define>WITH_THREADS : : <include>.. ; -fakelib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc parallel_read pool.cc read_compressed scoped.cc string_piece.cc usage.cc double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ; +fakelib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc parallel_read pool.cc random.cc read_compressed scoped.cc string_piece.cc usage.cc double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ; exe cat_compressed : cat_compressed_main.cc kenutil ; diff --git a/util/random.cc b/util/random.cc new file mode 100644 index 000000000..368718c9f --- /dev/null +++ b/util/random.cc @@ -0,0 +1,39 @@ +#include "util/random.hh" + +#include <cstdlib> + +#include <boost/thread/locks.hpp> +#include <boost/thread/lock_guard.hpp> +#include <boost/thread/mutex.hpp> + +namespace util +{ +namespace +{ +/** Lock to protect randomizer. + * + * This module is implemented in terms of rand()/srand() from <cstdlib>. + * These functions are standard C, but they're not thread-safe. Scalability + * is not worth much complexity here, so just slap a mutex around it. + */ +boost::mutex rand_lock; +} // namespace + +void rand_int_init(unsigned int seed) +{ + boost::lock_guard<boost::mutex> lock(rand_lock); + srand(seed); +} + + +void rand_int_init() +{ + rand_int_init(time(NULL)); +} + +int rand_int() +{ + boost::lock_guard<boost::mutex> lock(rand_lock); + return rand(); +} +} // namespace util diff --git a/util/random.hh b/util/random.hh new file mode 100644 index 000000000..c372de463 --- /dev/null +++ b/util/random.hh @@ -0,0 +1,32 @@ +#ifndef UTIL_RANDOM_H +#define UTIL_RANDOM_H + +namespace util +{ + +/** Initialize randomizer with a fixed seed. + * + * After this, unless the randomizer gets seeded again, consecutive calls to + * rand_int() will return a sequence of pseudo-random numbers determined by + * the seed. Every time the randomizer is seeded with this same seed, it will + * again start returning the same sequence of numbers. + */ +void rand_int_init(unsigned int); + +/** Initialize randomizer based on current time. + * + * Call this to make the randomizer return hard-to-predict numbers. It won't + * produce high-grade randomness, but enough to make the program act + * differently on different runs. + */ +void rand_int_init(); + +/** Return a pseudorandom number between 0 and RAND_MAX inclusive. + * + * Initialize (seed) the randomizer before starting to call this. + */ +int rand_int(); + +} // namespace util + +#endif diff --git a/util/random_test.cc b/util/random_test.cc new file mode 100644 index 000000000..d1c555a0c --- /dev/null +++ b/util/random_test.cc @@ -0,0 +1,39 @@ +#include "util/random.hh" + +#define BOOST_TEST_MODULE RandomTest +#include <boost/test/unit_test.hpp> + +namespace util +{ +namespace +{ + +BOOST_AUTO_TEST_CASE(returns_different_consecutive_numbers) +{ + rand_int_init(99); + const int first = rand_int(), second = rand_int(), third = rand_int(); + // Sometimes you'll get the same number twice in a row, but generally the + // randomizer returns different numbers. + BOOST_CHECK(second != first || third != first); +} + +BOOST_AUTO_TEST_CASE(returns_different_numbers_for_different_seeds) +{ + rand_int_init(1); + const int one1 = rand_int(), one2 = rand_int(); + rand_int_init(2); + const int two1 = rand_int(), two2 = rand_int(); + BOOST_CHECK(two1 != one1 || two2 != two1); +} + +BOOST_AUTO_TEST_CASE(returns_same_sequence_for_same_seed) +{ + rand_int_init(1); + const int first = rand_int(); + rand_int_init(1); + const int second = rand_int(); + BOOST_CHECK_EQUAL(first, second); +} + +} // namespace +} // namespace util |