diff options
author | Kenneth Heafield <github@kheafield.com> | 2015-05-19 22:27:30 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2015-05-19 22:27:30 +0300 |
commit | a70d37e46fce323f7a9720e3a621f35d19e4ac9f (patch) | |
tree | d45651f2d44ba7c373f2a198611dca3b196e36d9 /util | |
parent | 90309aebfa0184ac611725520443b34c3331794b (diff) |
KenLM 7408730be415db9b650560a8b2bd3e4e3af49ec9.
unistd.hh is dead.
Diffstat (limited to 'util')
-rw-r--r-- | util/Jamfile | 7 | ||||
-rw-r--r-- | util/exception.hh | 6 | ||||
-rw-r--r-- | util/fake_ofstream.hh | 118 | ||||
-rw-r--r-- | util/file_piece.cc | 82 | ||||
-rw-r--r-- | util/file_piece.hh | 32 | ||||
-rw-r--r-- | util/file_piece_test.cc | 17 | ||||
-rw-r--r-- | util/float_to_string.cc | 23 | ||||
-rw-r--r-- | util/float_to_string.hh | 25 | ||||
-rw-r--r-- | util/integer_to_string.cc | 639 | ||||
-rw-r--r-- | util/integer_to_string.hh | 56 | ||||
-rw-r--r-- | util/integer_to_string_test.cc | 65 | ||||
-rw-r--r-- | util/probing_hash_table.hh | 24 | ||||
-rw-r--r-- | util/probing_hash_table_benchmark_main.cc | 49 | ||||
-rw-r--r-- | util/scoped.cc | 12 | ||||
-rw-r--r-- | util/scoped.hh | 2 | ||||
-rw-r--r-- | util/stream/Jamfile | 3 | ||||
-rw-r--r-- | util/stream/block.hh | 1 | ||||
-rw-r--r-- | util/stream/chain.hh | 2 | ||||
-rw-r--r-- | util/stream/io.hh | 4 | ||||
-rw-r--r-- | util/stream/rewindable_stream.cc | 117 | ||||
-rw-r--r-- | util/stream/rewindable_stream.hh | 108 | ||||
-rw-r--r-- | util/stream/rewindable_stream_test.cc | 41 | ||||
-rw-r--r-- | util/stream/sort.hh | 49 | ||||
-rw-r--r-- | util/tempfile.hh | 3 | ||||
-rw-r--r-- | util/unistd.hh | 22 | ||||
-rw-r--r-- | util/usage.cc | 10 | ||||
-rw-r--r-- | util/usage.hh | 3 |
27 files changed, 1393 insertions, 127 deletions
diff --git a/util/Jamfile b/util/Jamfile index 2d3cede01..7538f7d17 100644 --- a/util/Jamfile +++ b/util/Jamfile @@ -21,10 +21,13 @@ obj file_piece_test.o : file_piece_test.cc /top//boost_unit_test_framework : $(c fakelib parallel_read : parallel_read.cc : <threading>multi:<source>/top//boost_thread <threading>multi:<define>WITH_THREADS : : <include>.. ; -fakelib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc parallel_read pool.cc random.cc read_compressed scoped.cc string_piece.cc usage.cc double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ; +fakelib kenutil : [ glob *.cc : parallel_read.cc read_compressed.cc *_main.cc *_test.cc ] read_compressed parallel_read double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ; exe cat_compressed : cat_compressed_main.cc kenutil ; +#Does not install this +exe probing_hash_table_benchmark : probing_hash_table_benchmark_main.cc kenutil ; + alias programs : cat_compressed ; import testing ; @@ -34,3 +37,5 @@ for local t in [ glob *_test.cc : file_piece_test.cc read_compressed_test.cc ] { local name = [ MATCH "(.*)\.cc" : $(t) ] ; unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_filesystem /top//boost_system ; } + +build-project stream ; diff --git a/util/exception.hh b/util/exception.hh index 7a0e7c44a..d67a6f9fb 100644 --- a/util/exception.hh +++ b/util/exception.hh @@ -91,6 +91,12 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep #define UTIL_UNLIKELY(x) (x) #endif +#if __GNUC__ >= 3 +#define UTIL_LIKELY(x) __builtin_expect (!!(x), 1) +#else +#define UTIL_LIKELY(x) (x) +#endif + #define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \ if (UTIL_UNLIKELY(Condition)) { \ UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \ diff --git a/util/fake_ofstream.hh b/util/fake_ofstream.hh index 8299ba9ac..d35bf0d83 100644 --- a/util/fake_ofstream.hh +++ b/util/fake_ofstream.hh @@ -1,111 +1,135 @@ /* Like std::ofstream but without being incredibly slow. Backed by a raw fd. - * Does not support many data types. Currently, it's targeted at writing ARPA - * files quickly. + * Supports most of the built-in types except for void* and long double. */ #ifndef UTIL_FAKE_OFSTREAM_H #define UTIL_FAKE_OFSTREAM_H -#include "util/double-conversion/double-conversion.h" -#include "util/double-conversion/utils.h" #include "util/file.hh" +#include "util/float_to_string.hh" +#include "util/integer_to_string.hh" #include "util/scoped.hh" #include "util/string_piece.hh" -#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE -#include <boost/lexical_cast.hpp> +#include <cassert> +#include <cstring> + +#include <stdint.h> namespace util { class FakeOFStream { public: + // Maximum over all ToString operations. + // static const std::size_t kMinBuf = 20; + // This was causing compile failures in debug, so now 20 is written directly. + // // Does not take ownership of out. // Allows default constructor, but must call SetFD. explicit FakeOFStream(int out = -1, std::size_t buffer_size = 1048576) - : buf_(util::MallocOrThrow(buffer_size)), - builder_(static_cast<char*>(buf_.get()), buffer_size), - // Mostly the default but with inf instead. And no flags. - convert_(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0), - fd_(out), - buffer_size_(buffer_size) {} + : buf_(util::MallocOrThrow(std::max(buffer_size, (size_t)20))), + current_(static_cast<char*>(buf_.get())), + end_(current_ + std::max(buffer_size, (size_t)20)), + fd_(out) {} ~FakeOFStream() { - if (buf_.get()) Flush(); + // Could have called Finish already + flush(); } void SetFD(int to) { - if (builder_.position()) Flush(); + flush(); fd_ = to; } - FakeOFStream &Write(const void *data, std::size_t length) { - // Dominant case - if (static_cast<std::size_t>(builder_.size() - builder_.position()) > length) { - builder_.AddSubstring((const char*)data, length); + FakeOFStream &write(const void *data, std::size_t length) { + if (UTIL_LIKELY(current_ + length <= end_)) { + std::memcpy(current_, data, length); + current_ += length; return *this; } - Flush(); - if (length > buffer_size_) { - util::WriteOrThrow(fd_, data, length); + flush(); + if (current_ + length <= end_) { + std::memcpy(current_, data, length); + current_ += length; } else { - builder_.AddSubstring((const char*)data, length); + util::WriteOrThrow(fd_, data, length); } return *this; } + // This also covers std::string and char* FakeOFStream &operator<<(StringPiece str) { - return Write(str.data(), str.size()); + return write(str.data(), str.size()); } - FakeOFStream &operator<<(float value) { - // Odd, but this is the largest number found in the comments. - EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8); - convert_.ToShortestSingle(value, &builder_); + // For anything with ToStringBuf<T>::kBytes, define operator<< using ToString. + // This includes uint64_t, int64_t, uint32_t, int32_t, uint16_t, int16_t, + // float, double + private: + template <int Arg> struct EnableIfKludge { + typedef FakeOFStream type; + }; + public: + template <class T> typename EnableIfKludge<ToStringBuf<T>::kBytes>::type &operator<<(const T value) { + EnsureRemaining(ToStringBuf<T>::kBytes); + current_ = ToString(value, current_); + assert(current_ <= end_); return *this; } - FakeOFStream &operator<<(double value) { - EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8); - convert_.ToShortest(value, &builder_); + FakeOFStream &operator<<(char c) { + EnsureRemaining(1); + *current_++ = c; return *this; } - // Inefficient! TODO: more efficient implementation - FakeOFStream &operator<<(unsigned value) { - return *this << boost::lexical_cast<std::string>(value); + FakeOFStream &operator<<(unsigned char c) { + EnsureRemaining(1); + *current_++ = static_cast<char>(c); + return *this; } - FakeOFStream &operator<<(char c) { - EnsureRemaining(1); - builder_.AddCharacter(c); + /* clang on OS X appears to consider std::size_t aka unsigned long distinct + * from uint64_t. So this function makes clang work. gcc considers + * uint64_t and std::size_t the same (on 64-bit) so this isn't necessary. + * But it does no harm since gcc sees it as a specialization of the + * EnableIfKludge template. + * Also, delegating to *this << static_cast<uint64_t>(value) would loop + * indefinitely on gcc. + */ + FakeOFStream &operator<<(std::size_t value) { + EnsureRemaining(ToStringBuf<uint64_t>::kBytes); + current_ = ToString(static_cast<uint64_t>(value), current_); return *this; } // Note this does not sync. - void Flush() { - util::WriteOrThrow(fd_, buf_.get(), builder_.position()); - builder_.Reset(); + void flush() { + if (current_ != buf_.get()) { + util::WriteOrThrow(fd_, buf_.get(), current_ - (char*)buf_.get()); + current_ = static_cast<char*>(buf_.get()); + } } // Not necessary, but does assure the data is cleared. void Finish() { - Flush(); - // It will segfault trying to null terminate otherwise. - builder_.Finalize(); + flush(); buf_.reset(); + current_ = NULL; util::FSyncOrThrow(fd_); } private: void EnsureRemaining(std::size_t amount) { - if (static_cast<std::size_t>(builder_.size() - builder_.position()) <= amount) { - Flush(); + if (UTIL_UNLIKELY(current_ + amount > end_)) { + flush(); + assert(current_ + amount <= end_); } } util::scoped_malloc buf_; - double_conversion::StringBuilder builder_; - double_conversion::DoubleToStringConverter convert_; + char *current_, *end_; + int fd_; - const std::size_t buffer_size_; }; } // namespace diff --git a/util/file_piece.cc b/util/file_piece.cc index c808e7d90..92edef27d 100644 --- a/util/file_piece.cc +++ b/util/file_piece.cc @@ -11,19 +11,22 @@ #include <unistd.h> #endif +#include <cassert> +#include <cerrno> +#include <cmath> +#include <cstdlib> #include <iostream> -#include <string> #include <limits> -#include <cassert> +#include <string> + #include <fcntl.h> -#include <cstdlib> #include <sys/types.h> #include <sys/stat.h> namespace util { ParseNumberException::ParseNumberException(StringPiece value) throw() { - *this << "Could not parse \"" << value << "\" into a number"; + *this << "Could not parse \"" << value << "\" into a "; } // Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale). @@ -62,12 +65,17 @@ FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buf FilePiece::~FilePiece() {} -StringPiece FilePiece::ReadLine(char delim) { +StringPiece FilePiece::ReadLine(char delim, bool strip_cr) { std::size_t skip = 0; while (true) { for (const char *i = position_ + skip; i < position_end_; ++i) { if (*i == delim) { - StringPiece ret(position_, i - position_); + // End of line. + // Take 1 byte off the end if it's an unwanted carriage return. + const std::size_t subtract_cr = ( + (strip_cr && i > position_ && *(i - 1) == '\r') ? + 1 : 0); + StringPiece ret(position_, i - position_ - subtract_cr); position_ = i + 1; return ret; } @@ -83,9 +91,9 @@ StringPiece FilePiece::ReadLine(char delim) { } } -bool FilePiece::ReadLineOrEOF(StringPiece &to, char delim) { +bool FilePiece::ReadLineOrEOF(StringPiece &to, char delim, bool strip_cr) { try { - to = ReadLine(delim); + to = ReadLine(delim, strip_cr); } catch (const util::EndOfFileException &e) { return false; } return true; } @@ -145,49 +153,59 @@ static const double_conversion::StringToDoubleConverter kConverter( "inf", "NaN"); -void ParseNumber(const char *begin, const char *&end, float &out) { +StringPiece FirstToken(StringPiece str) { + const char *i; + for (i = str.data(); i != str.data() + str.size(); ++i) { + if (kSpaces[(unsigned char)*i]) break; + } + return StringPiece(str.data(), i - str.data()); +} + +const char *ParseNumber(StringPiece str, float &out) { int count; - out = kConverter.StringToFloat(begin, end - begin, &count); - end = begin + count; + out = kConverter.StringToFloat(str.data(), str.size(), &count); + UTIL_THROW_IF_ARG(isnan(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "float"); + return str.data() + count; } -void ParseNumber(const char *begin, const char *&end, double &out) { +const char *ParseNumber(StringPiece str, double &out) { int count; - out = kConverter.StringToDouble(begin, end - begin, &count); - end = begin + count; + out = kConverter.StringToDouble(str.data(), str.size(), &count); + UTIL_THROW_IF_ARG(isnan(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "double"); + return str.data() + count; } -void ParseNumber(const char *begin, const char *&end, long int &out) { - char *silly_end; - out = strtol(begin, &silly_end, 10); - end = silly_end; +const char *ParseNumber(StringPiece str, long int &out) { + char *end; + errno = 0; + out = strtol(str.data(), &end, 10); + UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "long int"); + return end; } -void ParseNumber(const char *begin, const char *&end, unsigned long int &out) { - char *silly_end; - out = strtoul(begin, &silly_end, 10); - end = silly_end; +const char *ParseNumber(StringPiece str, unsigned long int &out) { + char *end; + errno = 0; + out = strtoul(str.data(), &end, 10); + UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "unsigned long int"); + return end; } } // namespace template <class T> T FilePiece::ReadNumber() { SkipSpaces(); while (last_space_ < position_) { - if (at_end_) { + if (UTIL_UNLIKELY(at_end_)) { // Hallucinate a null off the end of the file. std::string buffer(position_, position_end_); - const char *buf = buffer.c_str(); - const char *end = buf + buffer.size(); T ret; - ParseNumber(buf, end, ret); - if (buf == end) throw ParseNumberException(buffer); - position_ += end - buf; + // Has to be null-terminated. + const char *begin = buffer.c_str(); + const char *end = ParseNumber(StringPiece(begin, buffer.size()), ret); + position_ += end - begin; return ret; } Shift(); } - const char *end = last_space_; T ret; - ParseNumber(position_, end, ret); - if (end == position_) throw ParseNumberException(ReadDelimited()); - position_ = end; + position_ = ParseNumber(StringPiece(position_, last_space_ - position_), ret); return ret; } diff --git a/util/file_piece.hh b/util/file_piece.hh index 35f5eb648..d3d83054d 100644 --- a/util/file_piece.hh +++ b/util/file_piece.hh @@ -55,7 +55,7 @@ class FilePiece { return Consume(FindDelimiterOrEOF(delim)); } - // Read word until the line or file ends. + /// Read word until the line or file ends. bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) { assert(delim[static_cast<unsigned char>('\n')]); // Skip non-enter spaces. @@ -75,12 +75,30 @@ class FilePiece { return true; } - // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter. - // It is similar to getline in that way. - StringPiece ReadLine(char delim = '\n'); - - // Doesn't throw EndOfFileException, just returns false. - bool ReadLineOrEOF(StringPiece &to, char delim = '\n'); + /** Read a line of text from the file. + * + * Unlike ReadDelimited, this includes leading spaces and consumes the + * delimiter. It is similar to getline in that way. + * + * If strip_cr is true, any trailing carriate return (as would be found on + * a file written on Windows) will be left out of the returned line. + * + * Throws EndOfFileException if the end of the file is encountered. If the + * file does not end in a newline, this could mean that the last line is + * never read. + */ + StringPiece ReadLine(char delim = '\n', bool strip_cr = true); + + /** Read a line of text from the file, or return false on EOF. + * + * This is like ReadLine, except it returns false where ReadLine throws + * EndOfFileException. Like ReadLine it may not read the last line in the + * file if the file does not end in a newline. + * + * If strip_cr is true, any trailing carriate return (as would be found on + * a file written on Windows) will be left out of the returned line. + */ + bool ReadLineOrEOF(StringPiece &to, char delim = '\n', bool strip_cr = true); float ReadFloat(); double ReadDouble(); diff --git a/util/file_piece_test.cc b/util/file_piece_test.cc index 4a2a521ba..11e2ab3aa 100644 --- a/util/file_piece_test.cc +++ b/util/file_piece_test.cc @@ -1,6 +1,7 @@ // Tests might fail if you have creative characters in your path. Sue me. #include "util/file_piece.hh" +#include "util/fake_ofstream.hh" #include "util/file.hh" #include "util/scoped.hh" @@ -133,5 +134,21 @@ BOOST_AUTO_TEST_CASE(StreamZipReadLine) { #endif // HAVE_ZLIB +BOOST_AUTO_TEST_CASE(Numbers) { + scoped_fd file(MakeTemp(FileLocation())); + const float floating = 3.2; + { + util::FakeOFStream writing(file.get()); + writing << "94389483984398493890287 " << floating << " 5"; + } + SeekOrThrow(file.get(), 0); + util::FilePiece f(file.release()); + BOOST_CHECK_THROW(f.ReadULong(), ParseNumberException); + BOOST_CHECK_EQUAL("94389483984398493890287", f.ReadDelimited()); + // Yes, exactly equal. Isn't double-conversion wonderful? + BOOST_CHECK_EQUAL(floating, f.ReadFloat()); + BOOST_CHECK_EQUAL(5, f.ReadULong()); +} + } // namespace } // namespace util diff --git a/util/float_to_string.cc b/util/float_to_string.cc new file mode 100644 index 000000000..1e16d6f99 --- /dev/null +++ b/util/float_to_string.cc @@ -0,0 +1,23 @@ +#include "util/float_to_string.hh" + +#include "util/double-conversion/double-conversion.h" +#include "util/double-conversion/utils.h" + +namespace util { +namespace { +const double_conversion::DoubleToStringConverter kConverter(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0); +} // namespace + +char *ToString(double value, char *to) { + double_conversion::StringBuilder builder(to, ToStringBuf<double>::kBytes); + kConverter.ToShortest(value, &builder); + return &to[builder.position()]; +} + +char *ToString(float value, char *to) { + double_conversion::StringBuilder builder(to, ToStringBuf<float>::kBytes); + kConverter.ToShortestSingle(value, &builder); + return &to[builder.position()]; +} + +} // namespace util diff --git a/util/float_to_string.hh b/util/float_to_string.hh new file mode 100644 index 000000000..d1104e790 --- /dev/null +++ b/util/float_to_string.hh @@ -0,0 +1,25 @@ +#ifndef UTIL_FLOAT_TO_STRING_H +#define UTIL_FLOAT_TO_STRING_H + +// Just for ToStringBuf +#include "util/integer_to_string.hh" + +namespace util { + +template <> struct ToStringBuf<double> { + // DoubleToStringConverter::kBase10MaximalLength + 1 for null paranoia. + static const unsigned kBytes = 18; +}; + +// Single wasn't documented in double conversion, so be conservative and +// say the same as double. +template <> struct ToStringBuf<float> { + static const unsigned kBytes = 18; +}; + +char *ToString(double value, char *to); +char *ToString(float value, char *to); + +} // namespace util + +#endif // UTIL_FLOAT_TO_STRING_H diff --git a/util/integer_to_string.cc b/util/integer_to_string.cc new file mode 100644 index 000000000..32047291d --- /dev/null +++ b/util/integer_to_string.cc @@ -0,0 +1,639 @@ +/* Fast integer to string conversion. +Source: https://github.com/miloyip/itoa-benchmark +Local modifications: +1. Return end of buffer instead of null terminating +2. Collapse to single file +3. Namespace +4. Remove test hook +5. Non-x86 support from the branch_lut code +6. Rename functions + +Copyright (C) 2014 Milo Yip + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +Which is based on: http://0x80.pl/snippets/asm/sse-utoa.c + + SSE: conversion integers to decimal representation + + Author: Wojciech Muła + e-mail: wojciech_mula@poczta.onet.pl + www: http://0x80.pl/ + + License: BSD + + initial release 2011-10-21 + $Id$ +*/ + +#include "util/integer_to_string.hh" +#include <cassert> +#include <stdint.h> + +namespace util { + +namespace { +const char gDigitsLut[200] = { + '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9', + '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9', + '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9', + '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9', + '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9', + '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9', + '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9', + '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9', + '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9', + '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9' +}; +} // namespace + +// SSE2 implementation according to http://0x80.pl/articles/sse-itoa.html +// Modifications: (1) fix incorrect digits (2) accept all ranges (3) write to user provided buffer. + +#if defined(i386) || defined(__amd64) || defined(_M_IX86) || defined(_M_X64) + +#include <emmintrin.h> + +#ifdef _MSC_VER +#include "intrin.h" +#endif + +#ifdef _MSC_VER +#define ALIGN_PRE __declspec(align(16)) +#define ALIGN_SUF +#else +#define ALIGN_PRE +#define ALIGN_SUF __attribute__ ((aligned(16))) +#endif + +namespace { + +static const uint32_t kDiv10000 = 0xd1b71759; +ALIGN_PRE static const uint32_t kDiv10000Vector[4] ALIGN_SUF = { kDiv10000, kDiv10000, kDiv10000, kDiv10000 }; +ALIGN_PRE static const uint32_t k10000Vector[4] ALIGN_SUF = { 10000, 10000, 10000, 10000 }; +ALIGN_PRE static const uint16_t kDivPowersVector[8] ALIGN_SUF = { 8389, 5243, 13108, 32768, 8389, 5243, 13108, 32768 }; // 10^3, 10^2, 10^1, 10^0 +ALIGN_PRE static const uint16_t kShiftPowersVector[8] ALIGN_SUF = { + 1 << (16 - (23 + 2 - 16)), + 1 << (16 - (19 + 2 - 16)), + 1 << (16 - 1 - 2), + 1 << (15), + 1 << (16 - (23 + 2 - 16)), + 1 << (16 - (19 + 2 - 16)), + 1 << (16 - 1 - 2), + 1 << (15) +}; +ALIGN_PRE static const uint16_t k10Vector[8] ALIGN_SUF = { 10, 10, 10, 10, 10, 10, 10, 10 }; +ALIGN_PRE static const char kAsciiZero[16] ALIGN_SUF = { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' }; + +inline __m128i Convert8DigitsSSE2(uint32_t value) { + assert(value <= 99999999); + + // abcd, efgh = abcdefgh divmod 10000 + const __m128i abcdefgh = _mm_cvtsi32_si128(value); + const __m128i abcd = _mm_srli_epi64(_mm_mul_epu32(abcdefgh, reinterpret_cast<const __m128i*>(kDiv10000Vector)[0]), 45); + const __m128i efgh = _mm_sub_epi32(abcdefgh, _mm_mul_epu32(abcd, reinterpret_cast<const __m128i*>(k10000Vector)[0])); + + // v1 = [ abcd, efgh, 0, 0, 0, 0, 0, 0 ] + const __m128i v1 = _mm_unpacklo_epi16(abcd, efgh); + + // v1a = v1 * 4 = [ abcd * 4, efgh * 4, 0, 0, 0, 0, 0, 0 ] + const __m128i v1a = _mm_slli_epi64(v1, 2); + + // v2 = [ abcd * 4, abcd * 4, abcd * 4, abcd * 4, efgh * 4, efgh * 4, efgh * 4, efgh * 4 ] + const __m128i v2a = _mm_unpacklo_epi16(v1a, v1a); + const __m128i v2 = _mm_unpacklo_epi32(v2a, v2a); + + // v4 = v2 div 10^3, 10^2, 10^1, 10^0 = [ a, ab, abc, abcd, e, ef, efg, efgh ] + const __m128i v3 = _mm_mulhi_epu16(v2, reinterpret_cast<const __m128i*>(kDivPowersVector)[0]); + const __m128i v4 = _mm_mulhi_epu16(v3, reinterpret_cast<const __m128i*>(kShiftPowersVector)[0]); + + // v5 = v4 * 10 = [ a0, ab0, abc0, abcd0, e0, ef0, efg0, efgh0 ] + const __m128i v5 = _mm_mullo_epi16(v4, reinterpret_cast<const __m128i*>(k10Vector)[0]); + + // v6 = v5 << 16 = [ 0, a0, ab0, abc0, 0, e0, ef0, efg0 ] + const __m128i v6 = _mm_slli_epi64(v5, 16); + + // v7 = v4 - v6 = { a, b, c, d, e, f, g, h } + const __m128i v7 = _mm_sub_epi16(v4, v6); + + return v7; +} + +inline __m128i ShiftDigits_SSE2(__m128i a, unsigned digit) { + assert(digit <= 8); + switch (digit) { + case 0: return a; + case 1: return _mm_srli_si128(a, 1); + case 2: return _mm_srli_si128(a, 2); + case 3: return _mm_srli_si128(a, 3); + case 4: return _mm_srli_si128(a, 4); + case 5: return _mm_srli_si128(a, 5); + case 6: return _mm_srli_si128(a, 6); + case 7: return _mm_srli_si128(a, 7); + case 8: return _mm_srli_si128(a, 8); + } + return a; // should not execute here. +} + +} // namespace + +// Original name: u32toa_sse2 +char *ToString(uint32_t value, char* buffer) { + if (value < 10000) { + const uint32_t d1 = (value / 100) << 1; + const uint32_t d2 = (value % 100) << 1; + + if (value >= 1000) + *buffer++ = gDigitsLut[d1]; + if (value >= 100) + *buffer++ = gDigitsLut[d1 + 1]; + if (value >= 10) + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + //*buffer++ = '\0'; + return buffer; + } + else if (value < 100000000) { + // Experiment shows that this case SSE2 is slower +#if 0 + const __m128i a = Convert8DigitsSSE2(value); + + // Convert to bytes, add '0' + const __m128i va = _mm_add_epi8(_mm_packus_epi16(a, _mm_setzero_si128()), reinterpret_cast<const __m128i*>(kAsciiZero)[0]); + + // Count number of digit + const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0])); + unsigned long digit; +#ifdef _MSC_VER + _BitScanForward(&digit, ~mask | 0x8000); +#else + digit = __builtin_ctz(~mask | 0x8000); +#endif + + // Shift digits to the beginning + __m128i result = ShiftDigits_SSE2(va, digit); + //__m128i result = _mm_srl_epi64(va, _mm_cvtsi32_si128(digit * 8)); + _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result); + buffer[8 - digit] = '\0'; +#else + // value = bbbbcccc + const uint32_t b = value / 10000; + const uint32_t c = value % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = gDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = gDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + + *buffer++ = gDigitsLut[d3]; + *buffer++ = gDigitsLut[d3 + 1]; + *buffer++ = gDigitsLut[d4]; + *buffer++ = gDigitsLut[d4 + 1]; +// *buffer++ = '\0'; + return buffer; +#endif + } + else { + // value = aabbbbbbbb in decimal + + const uint32_t a = value / 100000000; // 1 to 42 + value %= 100000000; + + if (a >= 10) { + const unsigned i = a << 1; + *buffer++ = gDigitsLut[i]; + *buffer++ = gDigitsLut[i + 1]; + } + else + *buffer++ = '0' + static_cast<char>(a); + + const __m128i b = Convert8DigitsSSE2(value); + const __m128i ba = _mm_add_epi8(_mm_packus_epi16(_mm_setzero_si128(), b), reinterpret_cast<const __m128i*>(kAsciiZero)[0]); + const __m128i result = _mm_srli_si128(ba, 8); + _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result); +// buffer[8] = '\0'; + return buffer + 8; + } +} + +// Original name: u64toa_sse2 +char *ToString(uint64_t value, char* buffer) { + if (value < 100000000) { + uint32_t v = static_cast<uint32_t>(value); + if (v < 10000) { + const uint32_t d1 = (v / 100) << 1; + const uint32_t d2 = (v % 100) << 1; + + if (v >= 1000) + *buffer++ = gDigitsLut[d1]; + if (v >= 100) + *buffer++ = gDigitsLut[d1 + 1]; + if (v >= 10) + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + //*buffer++ = '\0'; + return buffer; + } + else { + // Experiment shows that this case SSE2 is slower +#if 0 + const __m128i a = Convert8DigitsSSE2(v); + + // Convert to bytes, add '0' + const __m128i va = _mm_add_epi8(_mm_packus_epi16(a, _mm_setzero_si128()), reinterpret_cast<const __m128i*>(kAsciiZero)[0]); + + // Count number of digit + const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0])); + unsigned long digit; +#ifdef _MSC_VER + _BitScanForward(&digit, ~mask | 0x8000); +#else + digit = __builtin_ctz(~mask | 0x8000); +#endif + + // Shift digits to the beginning + __m128i result = ShiftDigits_SSE2(va, digit); + _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result); + buffer[8 - digit] = '\0'; +#else + // value = bbbbcccc + const uint32_t b = v / 10000; + const uint32_t c = v % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = gDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = gDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + + *buffer++ = gDigitsLut[d3]; + *buffer++ = gDigitsLut[d3 + 1]; + *buffer++ = gDigitsLut[d4]; + *buffer++ = gDigitsLut[d4 + 1]; + //*buffer++ = '\0'; + return buffer; +#endif + } + } + else if (value < 10000000000000000) { + const uint32_t v0 = static_cast<uint32_t>(value / 100000000); + const uint32_t v1 = static_cast<uint32_t>(value % 100000000); + + const __m128i a0 = Convert8DigitsSSE2(v0); + const __m128i a1 = Convert8DigitsSSE2(v1); + + // Convert to bytes, add '0' + const __m128i va = _mm_add_epi8(_mm_packus_epi16(a0, a1), reinterpret_cast<const __m128i*>(kAsciiZero)[0]); + + // Count number of digit + const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0])); +#ifdef _MSC_VER + unsigned long digit; + _BitScanForward(&digit, ~mask | 0x8000); +#else + unsigned digit = __builtin_ctz(~mask | 0x8000); +#endif + + // Shift digits to the beginning + __m128i result = ShiftDigits_SSE2(va, digit); + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +// buffer[16 - digit] = '\0'; + return &buffer[16 - digit]; + } + else { + const uint32_t a = static_cast<uint32_t>(value / 10000000000000000); // 1 to 1844 + value %= 10000000000000000; + + if (a < 10) + *buffer++ = '0' + static_cast<char>(a); + else if (a < 100) { + const uint32_t i = a << 1; + *buffer++ = gDigitsLut[i]; + *buffer++ = gDigitsLut[i + 1]; + } + else if (a < 1000) { + *buffer++ = '0' + static_cast<char>(a / 100); + + const uint32_t i = (a % 100) << 1; + *buffer++ = gDigitsLut[i]; + *buffer++ = gDigitsLut[i + 1]; + } + else { + const uint32_t i = (a / 100) << 1; + const uint32_t j = (a % 100) << 1; + *buffer++ = gDigitsLut[i]; + *buffer++ = gDigitsLut[i + 1]; + *buffer++ = gDigitsLut[j]; + *buffer++ = gDigitsLut[j + 1]; + } + + const uint32_t v0 = static_cast<uint32_t>(value / 100000000); + const uint32_t v1 = static_cast<uint32_t>(value % 100000000); + + const __m128i a0 = Convert8DigitsSSE2(v0); + const __m128i a1 = Convert8DigitsSSE2(v1); + + // Convert to bytes, add '0' + const __m128i va = _mm_add_epi8(_mm_packus_epi16(a0, a1), reinterpret_cast<const __m128i*>(kAsciiZero)[0]); + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), va); +// buffer[16] = '\0'; + return &buffer[16]; + } +} + +#else // Generic Non-x86 case + +// Orignal name: u32toa_branchlut +char *ToString(uint32_t value, char* buffer) { + if (value < 10000) { + const uint32_t d1 = (value / 100) << 1; + const uint32_t d2 = (value % 100) << 1; + + if (value >= 1000) + *buffer++ = gDigitsLut[d1]; + if (value >= 100) + *buffer++ = gDigitsLut[d1 + 1]; + if (value >= 10) + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + } + else if (value < 100000000) { + // value = bbbbcccc + const uint32_t b = value / 10000; + const uint32_t c = value % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = gDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = gDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + + *buffer++ = gDigitsLut[d3]; + *buffer++ = gDigitsLut[d3 + 1]; + *buffer++ = gDigitsLut[d4]; + *buffer++ = gDigitsLut[d4 + 1]; + } + else { + // value = aabbbbcccc in decimal + + const uint32_t a = value / 100000000; // 1 to 42 + value %= 100000000; + + if (a >= 10) { + const unsigned i = a << 1; + *buffer++ = gDigitsLut[i]; + *buffer++ = gDigitsLut[i + 1]; + } + else + *buffer++ = '0' + static_cast<char>(a); + + const uint32_t b = value / 10000; // 0 to 9999 + const uint32_t c = value % 10000; // 0 to 9999 + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + *buffer++ = gDigitsLut[d1]; + *buffer++ = gDigitsLut[d1 + 1]; + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + *buffer++ = gDigitsLut[d3]; + *buffer++ = gDigitsLut[d3 + 1]; + *buffer++ = gDigitsLut[d4]; + *buffer++ = gDigitsLut[d4 + 1]; + } + return buffer; //*buffer++ = '\0'; +} + +// Original name: u64toa_branchlut +char *ToString(uint64_t value, char* buffer) { + if (value < 100000000) { + uint32_t v = static_cast<uint32_t>(value); + if (v < 10000) { + const uint32_t d1 = (v / 100) << 1; + const uint32_t d2 = (v % 100) << 1; + + if (v >= 1000) + *buffer++ = gDigitsLut[d1]; + if (v >= 100) + *buffer++ = gDigitsLut[d1 + 1]; + if (v >= 10) + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + } + else { + // value = bbbbcccc + const uint32_t b = v / 10000; + const uint32_t c = v % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = gDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = gDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + + *buffer++ = gDigitsLut[d3]; + *buffer++ = gDigitsLut[d3 + 1]; + *buffer++ = gDigitsLut[d4]; + *buffer++ = gDigitsLut[d4 + 1]; + } + } + else if (value < 10000000000000000) { + const uint32_t v0 = static_cast<uint32_t>(value / 100000000); + const uint32_t v1 = static_cast<uint32_t>(value % 100000000); + + const uint32_t b0 = v0 / 10000; + const uint32_t c0 = v0 % 10000; + + const uint32_t d1 = (b0 / 100) << 1; + const uint32_t d2 = (b0 % 100) << 1; + + const uint32_t d3 = (c0 / 100) << 1; + const uint32_t d4 = (c0 % 100) << 1; + + const uint32_t b1 = v1 / 10000; + const uint32_t c1 = v1 % 10000; + + const uint32_t d5 = (b1 / 100) << 1; + const uint32_t d6 = (b1 % 100) << 1; + + const uint32_t d7 = (c1 / 100) << 1; + const uint32_t d8 = (c1 % 100) << 1; + + if (value >= 1000000000000000) + *buffer++ = gDigitsLut[d1]; + if (value >= 100000000000000) + *buffer++ = gDigitsLut[d1 + 1]; + if (value >= 10000000000000) + *buffer++ = gDigitsLut[d2]; + if (value >= 1000000000000) + *buffer++ = gDigitsLut[d2 + 1]; + if (value >= 100000000000) + *buffer++ = gDigitsLut[d3]; + if (value >= 10000000000) + *buffer++ = gDigitsLut[d3 + 1]; + if (value >= 1000000000) + *buffer++ = gDigitsLut[d4]; + if (value >= 100000000) + *buffer++ = gDigitsLut[d4 + 1]; + + *buffer++ = gDigitsLut[d5]; + *buffer++ = gDigitsLut[d5 + 1]; + *buffer++ = gDigitsLut[d6]; + *buffer++ = gDigitsLut[d6 + 1]; + *buffer++ = gDigitsLut[d7]; + *buffer++ = gDigitsLut[d7 + 1]; + *buffer++ = gDigitsLut[d8]; + *buffer++ = gDigitsLut[d8 + 1]; + } + else { + const uint32_t a = static_cast<uint32_t>(value / 10000000000000000); // 1 to 1844 + value %= 10000000000000000; + + if (a < 10) + *buffer++ = '0' + static_cast<char>(a); + else if (a < 100) { + const uint32_t i = a << 1; + *buffer++ = gDigitsLut[i]; + *buffer++ = gDigitsLut[i + 1]; + } + else if (a < 1000) { + *buffer++ = '0' + static_cast<char>(a / 100); + + const uint32_t i = (a % 100) << 1; + *buffer++ = gDigitsLut[i]; + *buffer++ = gDigitsLut[i + 1]; + } + else { + const uint32_t i = (a / 100) << 1; + const uint32_t j = (a % 100) << 1; + *buffer++ = gDigitsLut[i]; + *buffer++ = gDigitsLut[i + 1]; + *buffer++ = gDigitsLut[j]; + *buffer++ = gDigitsLut[j + 1]; + } + + const uint32_t v0 = static_cast<uint32_t>(value / 100000000); + const uint32_t v1 = static_cast<uint32_t>(value % 100000000); + + const uint32_t b0 = v0 / 10000; + const uint32_t c0 = v0 % 10000; + + const uint32_t d1 = (b0 / 100) << 1; + const uint32_t d2 = (b0 % 100) << 1; + + const uint32_t d3 = (c0 / 100) << 1; + const uint32_t d4 = (c0 % 100) << 1; + + const uint32_t b1 = v1 / 10000; + const uint32_t c1 = v1 % 10000; + + const uint32_t d5 = (b1 / 100) << 1; + const uint32_t d6 = (b1 % 100) << 1; + + const uint32_t d7 = (c1 / 100) << 1; + const uint32_t d8 = (c1 % 100) << 1; + + *buffer++ = gDigitsLut[d1]; + *buffer++ = gDigitsLut[d1 + 1]; + *buffer++ = gDigitsLut[d2]; + *buffer++ = gDigitsLut[d2 + 1]; + *buffer++ = gDigitsLut[d3]; + *buffer++ = gDigitsLut[d3 + 1]; + *buffer++ = gDigitsLut[d4]; + *buffer++ = gDigitsLut[d4 + 1]; + *buffer++ = gDigitsLut[d5]; + *buffer++ = gDigitsLut[d5 + 1]; + *buffer++ = gDigitsLut[d6]; + *buffer++ = gDigitsLut[d6 + 1]; + *buffer++ = gDigitsLut[d7]; + *buffer++ = gDigitsLut[d7 + 1]; + *buffer++ = gDigitsLut[d8]; + *buffer++ = gDigitsLut[d8 + 1]; + } + return buffer; +} + +#endif // End of architecture if statement. + +// Signed wrappers. The negation is done on the unsigned version because +// doing so has defined behavior for INT_MIN. +char *ToString(int32_t value, char *to) { + uint32_t un = static_cast<uint32_t>(value); + if (value < 0) { + *to++ = '-'; + un = -un; + } + return ToString(un, to); +} + +char *ToString(int64_t value, char *to) { + uint64_t un = static_cast<uint64_t>(value); + if (value < 0) { + *to++ = '-'; + un = -un; + } + return ToString(un, to); +} + +// No optimization for this case yet. +char *ToString(int16_t value, char *to) { + return ToString((int32_t)value, to); +} +char *ToString(uint16_t value, char *to) { + return ToString((uint32_t)value, to); +} + +} // namespace util diff --git a/util/integer_to_string.hh b/util/integer_to_string.hh new file mode 100644 index 000000000..0d975b14e --- /dev/null +++ b/util/integer_to_string.hh @@ -0,0 +1,56 @@ +#ifndef UTIL_INTEGER_TO_STRING_H +#define UTIL_INTEGER_TO_STRING_H +#include <cstddef> +#include <stdint.h> + +namespace util { + +/* These functions convert integers to strings and return the end pointer. + */ +char *ToString(uint32_t value, char *to); +char *ToString(uint64_t value, char *to); + +// Implemented as wrappers to above +char *ToString(int32_t value, char *to); +char *ToString(int64_t value, char *to); + +// Calls the 32-bit versions for now. +char *ToString(uint16_t value, char *to); +char *ToString(int16_t value, char *to); + +inline char *ToString(bool value, char *to) { + *to++ = '0' + value; + return to; +} + +// How many bytes to reserve in the buffer for these strings: +// g++ 4.9.1 doesn't work with this: +// static const std::size_t kBytes = 5; +// So use enum. +template <class T> struct ToStringBuf; +template <> struct ToStringBuf<bool> { + enum { kBytes = 1 }; +}; +template <> struct ToStringBuf<uint16_t> { + enum { kBytes = 5 }; +}; +template <> struct ToStringBuf<int16_t> { + enum { kBytes = 6 }; +}; +template <> struct ToStringBuf<uint32_t> { + enum { kBytes = 10 }; +}; +template <> struct ToStringBuf<int32_t> { + enum { kBytes = 11 }; +}; +template <> struct ToStringBuf<uint64_t> { + enum { kBytes = 20 }; +}; +template <> struct ToStringBuf<int64_t> { + // Not a typo. 2^63 has 19 digits. + enum { kBytes = 20 }; +}; + +} // namespace util + +#endif // UTIL_INTEGER_TO_STRING_H diff --git a/util/integer_to_string_test.cc b/util/integer_to_string_test.cc new file mode 100644 index 000000000..ded1ecec7 --- /dev/null +++ b/util/integer_to_string_test.cc @@ -0,0 +1,65 @@ +#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE +#include "util/integer_to_string.hh" +#include "util/string_piece.hh" + +#define BOOST_TEST_MODULE IntegerToStringTest +#include <boost/test/unit_test.hpp> +#include <boost/lexical_cast.hpp> + +#include <limits> + +namespace util { +namespace { + +template <class T> void TestValue(const T value) { + char buf[ToStringBuf<T>::kBytes]; + StringPiece result(buf, ToString(value, buf) - buf); + BOOST_REQUIRE_GE(static_cast<std::size_t>(ToStringBuf<T>::kBytes), result.size()); + BOOST_CHECK_EQUAL(boost::lexical_cast<std::string>(value), result); +} + +template <class T> void TestCorners() { + TestValue(std::numeric_limits<T>::min()); + TestValue(std::numeric_limits<T>::max()); + TestValue(static_cast<T>(0)); + TestValue(static_cast<T>(-1)); + TestValue(static_cast<T>(1)); +} + +BOOST_AUTO_TEST_CASE(Corners) { + TestCorners<uint16_t>(); + TestCorners<uint32_t>(); + TestCorners<uint64_t>(); + TestCorners<int16_t>(); + TestCorners<int32_t>(); + TestCorners<int64_t>(); +} + +template <class T> void TestAll() { + for (T i = std::numeric_limits<T>::min(); i < std::numeric_limits<T>::max(); ++i) { + TestValue(i); + } + TestValue(std::numeric_limits<T>::max()); +} + +BOOST_AUTO_TEST_CASE(Short) { + TestAll<uint16_t>(); + TestAll<int16_t>(); +} + +template <class T> void Test10s() { + for (T i = 1; i < std::numeric_limits<T>::max() / 10; i *= 10) { + TestValue(i); + TestValue(i - 1); + TestValue(i + 1); + } +} + +BOOST_AUTO_TEST_CASE(Tens) { + Test10s<uint64_t>(); + Test10s<int64_t>(); + Test10s<uint32_t>(); + Test10s<int32_t>(); +} + +}} // namespaces diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh index 245340ddb..f32b64ea3 100644 --- a/util/probing_hash_table.hh +++ b/util/probing_hash_table.hh @@ -88,7 +88,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry #ifdef DEBUG assert(initialized_); #endif - for (MutableIterator i = Ideal(t);;) { + for (MutableIterator i = Ideal(t.GetKey());;) { Key got(i->GetKey()); if (equal_(got, t.GetKey())) { out = i; return true; } if (equal_(got, invalid_)) { @@ -108,7 +108,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry #ifdef DEBUG assert(initialized_); #endif - for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) { + for (MutableIterator i(Ideal(key));;) { Key got(i->GetKey()); if (equal_(got, key)) { out = i; return true; } if (equal_(got, invalid_)) return false; @@ -118,7 +118,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry // Like UnsafeMutableFind, but the key must be there. template <class Key> MutableIterator UnsafeMutableMustFind(const Key key) { - for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) { + for (MutableIterator i(Ideal(key));;) { Key got(i->GetKey()); if (equal_(got, key)) { return i; } assert(!equal_(got, invalid_)); @@ -131,7 +131,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry #ifdef DEBUG assert(initialized_); #endif - for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) { + for (ConstIterator i(Ideal(key));;) { Key got(i->GetKey()); if (equal_(got, key)) { out = i; return true; } if (equal_(got, invalid_)) return false; @@ -141,7 +141,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry // Like Find but we're sure it must be there. template <class Key> ConstIterator MustFind(const Key key) const { - for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) { + for (ConstIterator i(Ideal(key));;) { Key got(i->GetKey()); if (equal_(got, key)) { return i; } assert(!equal_(got, invalid_)); @@ -213,7 +213,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry MutableIterator i; // Beginning can be wrap-arounds. for (i = begin_; !equal_(i->GetKey(), invalid_); ++i) { - MutableIterator ideal = Ideal(*i); + MutableIterator ideal = Ideal(i->GetKey()); UTIL_THROW_IF(ideal > i && ideal <= last, Exception, "Inconsistency at position " << (i - begin_) << " should be at " << (ideal - begin_)); } MutableIterator pre_gap = i; @@ -222,7 +222,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry pre_gap = i; continue; } - MutableIterator ideal = Ideal(*i); + MutableIterator ideal = Ideal(i->GetKey()); UTIL_THROW_IF(ideal > i || ideal <= pre_gap, Exception, "Inconsistency at position " << (i - begin_) << " with ideal " << (ideal - begin_)); } } @@ -230,12 +230,15 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry private: friend class AutoProbing<Entry, Hash, Equal>; - template <class T> MutableIterator Ideal(const T &t) { - return begin_ + (hash_(t.GetKey()) % buckets_); + MutableIterator Ideal(const Key key) { + return begin_ + (hash_(key) % buckets_); + } + ConstIterator Ideal(const Key key) const { + return begin_ + (hash_(key) % buckets_); } template <class T> MutableIterator UncheckedInsert(const T &t) { - for (MutableIterator i(Ideal(t));;) { + for (MutableIterator i(Ideal(t.GetKey()));;) { if (equal_(i->GetKey(), invalid_)) { *i = t; return i; } if (++i == end_) { i = begin_; } } @@ -277,6 +280,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry // Assumes that the key is unique. Multiple insertions won't cause a failure, just inconsistent lookup. template <class T> MutableIterator Insert(const T &t) { + ++backend_.entries_; DoubleIfNeeded(); return backend_.UncheckedInsert(t); } diff --git a/util/probing_hash_table_benchmark_main.cc b/util/probing_hash_table_benchmark_main.cc new file mode 100644 index 000000000..3e12290cf --- /dev/null +++ b/util/probing_hash_table_benchmark_main.cc @@ -0,0 +1,49 @@ +#include "util/probing_hash_table.hh" +#include "util/scoped.hh" +#include "util/usage.hh" + +#include <boost/random/mersenne_twister.hpp> +#include <boost/random/uniform_int_distribution.hpp> + +#include <iostream> + +namespace util { +namespace { + +struct Entry { + typedef uint64_t Key; + Key key; + Key GetKey() const { return key; } +}; + +typedef util::ProbingHashTable<Entry, util::IdentityHash> Table; + +void Test(uint64_t entries, uint64_t lookups, float multiplier = 1.5) { + std::size_t size = Table::Size(entries, multiplier); + scoped_malloc backing(util::CallocOrThrow(size)); + Table table(backing.get(), size); + boost::random::mt19937 gen; + boost::random::uniform_int_distribution<> dist(std::numeric_limits<uint64_t>::min(), std::numeric_limits<uint64_t>::max()); + double start = UserTime(); + for (uint64_t i = 0; i < entries; ++i) { + Entry entry; + entry.key = dist(gen); + table.Insert(entry); + } + double inserted = UserTime(); + bool meaningless = true; + for (uint64_t i = 0; i < lookups; ++i) { + Table::ConstIterator it; + meaningless ^= table.Find(dist(gen), it); + } + std::cout << meaningless << ' ' << entries << ' ' << multiplier << ' ' << (inserted - start) << ' ' << (UserTime() - inserted) / static_cast<double>(lookups) << std::endl; +} + +} // namespace +} // namespace util + +int main() { + for (uint64_t i = 1; i <= 10000000ULL; i *= 10) { + util::Test(i, 10000000); + } +} diff --git a/util/scoped.cc b/util/scoped.cc index de1d9e940..84f4344b7 100644 --- a/util/scoped.cc +++ b/util/scoped.cc @@ -7,6 +7,8 @@ namespace util { +// TODO: if we're really under memory pressure, don't allocate memory to +// display the error. MallocException::MallocException(std::size_t requested) throw() { *this << "for " << requested << " bytes "; } @@ -16,10 +18,6 @@ MallocException::~MallocException() throw() {} namespace { void *InspectAddr(void *addr, std::size_t requested, const char *func_name) { UTIL_THROW_IF_ARG(!addr && requested, MallocException, (requested), "in " << func_name); - // These routines are often used for large chunks of memory where huge pages help. -#if MADV_HUGEPAGE - madvise(addr, requested, MADV_HUGEPAGE); -#endif return addr; } } // namespace @@ -36,4 +34,10 @@ void scoped_malloc::call_realloc(std::size_t requested) { p_ = InspectAddr(std::realloc(p_, requested), requested, "realloc"); } +void AdviseHugePages(const void *addr, std::size_t size) { +#if MADV_HUGEPAGE + madvise((void*)addr, size, MADV_HUGEPAGE); +#endif +} + } // namespace util diff --git a/util/scoped.hh b/util/scoped.hh index c347a43cc..21e9a7566 100644 --- a/util/scoped.hh +++ b/util/scoped.hh @@ -104,6 +104,8 @@ template <class T> class scoped_ptr : public scoped<T, scoped_delete_forward> { explicit scoped_ptr(T *p = NULL) : scoped<T, scoped_delete_forward>(p) {} }; +void AdviseHugePages(const void *addr, std::size_t size); + } // namespace util #endif // UTIL_SCOPED_H diff --git a/util/stream/Jamfile b/util/stream/Jamfile index 2e99979f5..cde0247e7 100644 --- a/util/stream/Jamfile +++ b/util/stream/Jamfile @@ -4,9 +4,10 @@ # timer-link = ; #} -fakelib stream : chain.cc io.cc line_input.cc multi_progress.cc ..//kenutil /top//boost_thread : : : <library>/top//boost_thread ; +fakelib stream : chain.cc rewindable_stream.cc io.cc line_input.cc multi_progress.cc ..//kenutil /top//boost_thread : : : <library>/top//boost_thread ; import testing ; unit-test io_test : io_test.cc stream /top//boost_unit_test_framework ; unit-test stream_test : stream_test.cc stream /top//boost_unit_test_framework ; +unit-test rewindable_stream_test : rewindable_stream_test.cc stream /top//boost_unit_test_framework ; unit-test sort_test : sort_test.cc stream /top//boost_unit_test_framework ; diff --git a/util/stream/block.hh b/util/stream/block.hh index 6a70dba3e..42df13f32 100644 --- a/util/stream/block.hh +++ b/util/stream/block.hh @@ -72,6 +72,7 @@ class Block { private: friend class Link; + friend class RewindableStream; /** * Points this block's memory at NULL. diff --git a/util/stream/chain.hh b/util/stream/chain.hh index 0cd8c2aae..8caa1afcb 100644 --- a/util/stream/chain.hh +++ b/util/stream/chain.hh @@ -23,6 +23,7 @@ class ChainConfigException : public Exception { }; class Chain; +class RewindableStream; /** * Encapsulates a @ref PCQueue "producer queue" and a @ref PCQueue "consumer queue" within a @ref Chain "chain". @@ -35,6 +36,7 @@ class ChainPosition { private: friend class Chain; friend class Link; + friend class RewindableStream; ChainPosition(PCQueue<Block> &in, PCQueue<Block> &out, Chain *chain, MultiProgress &progress) : in_(&in), out_(&out), chain_(chain), progress_(progress.Add()) {} diff --git a/util/stream/io.hh b/util/stream/io.hh index c3b53bbfe..4605a8a79 100644 --- a/util/stream/io.hh +++ b/util/stream/io.hh @@ -70,8 +70,8 @@ class FileBuffer { return PWriteAndRecycle(file_.get()); } - PRead Source() const { - return PRead(file_.get()); + PRead Source(bool discard = false) { + return PRead(discard ? file_.release() : file_.get(), discard); } uint64_t Size() const { diff --git a/util/stream/rewindable_stream.cc b/util/stream/rewindable_stream.cc new file mode 100644 index 000000000..c7e39231b --- /dev/null +++ b/util/stream/rewindable_stream.cc @@ -0,0 +1,117 @@ +#include "util/stream/rewindable_stream.hh" +#include "util/pcqueue.hh" + +namespace util { +namespace stream { + +RewindableStream::RewindableStream() + : current_(NULL), in_(NULL), out_(NULL), poisoned_(true) { + // nothing +} + +void RewindableStream::Init(const ChainPosition &position) { + UTIL_THROW_IF2(in_, "RewindableStream::Init twice"); + in_ = position.in_; + out_ = position.out_; + poisoned_ = false; + progress_ = position.progress_; + entry_size_ = position.GetChain().EntrySize(); + block_size_ = position.GetChain().BlockSize(); + FetchBlock(); + current_bl_ = &second_bl_; + current_ = static_cast<uint8_t*>(current_bl_->Get()); + end_ = current_ + current_bl_->ValidSize(); +} + +const void *RewindableStream::Get() const { + return current_; +} + +void *RewindableStream::Get() { + return current_; +} + +RewindableStream &RewindableStream::operator++() { + assert(*this); + assert(current_ < end_); + current_ += entry_size_; + if (current_ == end_) { + // two cases: either we need to fetch the next block, or we've already + // fetched it before. We can check this by looking at the current_bl_ + // pointer: if it's at the second_bl_, we need to flush and fetch a new + // block. Otherwise, we can just move over to the second block. + if (current_bl_ == &second_bl_) { + if (first_bl_) { + out_->Produce(first_bl_); + progress_ += first_bl_.ValidSize(); + } + first_bl_ = second_bl_; + FetchBlock(); + } + current_bl_ = &second_bl_; + current_ = static_cast<uint8_t *>(second_bl_.Get()); + end_ = current_ + second_bl_.ValidSize(); + } + + if (!*current_bl_) + { + if (current_bl_ == &second_bl_ && first_bl_) + { + out_->Produce(first_bl_); + progress_ += first_bl_.ValidSize(); + } + out_->Produce(*current_bl_); + poisoned_ = true; + } + + return *this; +} + +void RewindableStream::FetchBlock() { + // The loop is needed since it is *feasible* that we're given 0 sized but + // valid blocks + do { + in_->Consume(second_bl_); + } while (second_bl_ && second_bl_.ValidSize() == 0); +} + +void RewindableStream::Mark() { + marked_ = current_; +} + +void RewindableStream::Rewind() { + if (marked_ >= first_bl_.Get() && marked_ < first_bl_.ValidEnd()) { + current_bl_ = &first_bl_; + current_ = marked_; + } else if (marked_ >= second_bl_.Get() && marked_ < second_bl_.ValidEnd()) { + current_bl_ = &second_bl_; + current_ = marked_; + } else { UTIL_THROW2("RewindableStream rewound too far"); } +} + +void RewindableStream::Poison() { + assert(!poisoned_); + + // Three things: if we have a buffered first block, we need to produce it + // first. Then, produce the partial "current" block, and then send the + // poison down the chain + + // if we still have a buffered first block, produce it first + if (current_bl_ == &second_bl_ && first_bl_) { + out_->Produce(first_bl_); + progress_ += first_bl_.ValidSize(); + } + + // send our partial block + current_bl_->SetValidSize(current_ + - static_cast<uint8_t *>(current_bl_->Get())); + out_->Produce(*current_bl_); + progress_ += current_bl_->ValidSize(); + + // send down the poison + current_bl_->SetToPoison(); + out_->Produce(*current_bl_); + poisoned_ = true; +} +} +} diff --git a/util/stream/rewindable_stream.hh b/util/stream/rewindable_stream.hh new file mode 100644 index 000000000..9ee637c99 --- /dev/null +++ b/util/stream/rewindable_stream.hh @@ -0,0 +1,108 @@ +#ifndef UTIL_STREAM_REWINDABLE_STREAM_H +#define UTIL_STREAM_REWINDABLE_STREAM_H + +#include "util/stream/chain.hh" + +#include <boost/noncopyable.hpp> + +namespace util { +namespace stream { + +/** + * A RewindableStream is like a Stream (but one that is only used for + * creating input at the start of a chain) except that it can be rewound to + * be able to re-write a part of the stream before it is sent. Rewinding + * has a limit of 2 * block_size_ - 1 in distance (it does *not* buffer an + * entire stream into memory, only a maximum of 2 * block_size_). + */ +class RewindableStream : boost::noncopyable { + public: + /** + * Creates an uninitialized RewindableStream. You **must** call Init() + * on it later! + */ + RewindableStream(); + + /** + * Initializes an existing RewindableStream at a specific position in + * a Chain. + * + * @param position The position in the chain to get input from and + * produce output on + */ + void Init(const ChainPosition &position); + + /** + * Constructs a RewindableStream at a specific position in a Chain all + * in one step. + * + * Equivalent to RewindableStream a(); a.Init(....); + */ + explicit RewindableStream(const ChainPosition &position); + + /** + * Gets the record at the current stream position. Const version. + */ + const void *Get() const; + + /** + * Gets the record at the current stream position. + */ + void *Get(); + + operator bool() const { return current_; } + + bool operator!() const { return !(*this); } + + /** + * Marks the current position in the stream to be rewound to later. + * Note that you can only rewind back as far as 2 * block_size_ - 1! + */ + void Mark(); + + /** + * Rewinds the stream back to the marked position. This will throw an + * exception if the marked position is too far away. + */ + void Rewind(); + + /** + * Moves the stream forward to the next record. This internally may + * buffer a block for the purposes of rewinding. + */ + RewindableStream& operator++(); + + /** + * Poisons the stream. This sends any buffered blocks down the chain + * and sends a poison block as well (sending at most 2 non-poison and 1 + * poison block). + */ + void Poison(); + + private: + void FetchBlock(); + + std::size_t entry_size_; + std::size_t block_size_; + + uint8_t *marked_, *current_, *end_; + + Block first_bl_; + Block second_bl_; + Block* current_bl_; + + PCQueue<Block> *in_, *out_; + + bool poisoned_; + + WorkerProgress progress_; +}; + +inline Chain &operator>>(Chain &chain, RewindableStream &stream) { + stream.Init(chain.Add()); + return chain; +} + +} +} +#endif diff --git a/util/stream/rewindable_stream_test.cc b/util/stream/rewindable_stream_test.cc new file mode 100644 index 000000000..3ed87f372 --- /dev/null +++ b/util/stream/rewindable_stream_test.cc @@ -0,0 +1,41 @@ +#include "util/stream/io.hh" + +#include "util/stream/rewindable_stream.hh" +#include "util/file.hh" + +#define BOOST_TEST_MODULE RewindableStreamTest +#include <boost/test/unit_test.hpp> + +namespace util { +namespace stream { +namespace { + +BOOST_AUTO_TEST_CASE(RewindableStreamTest) { + scoped_fd in(MakeTemp("io_test_temp")); + for (uint64_t i = 0; i < 100000; ++i) { + WriteOrThrow(in.get(), &i, sizeof(uint64_t)); + } + SeekOrThrow(in.get(), 0); + + ChainConfig config; + config.entry_size = 8; + config.total_memory = 100; + config.block_count = 6; + + RewindableStream s; + Chain chain(config); + chain >> Read(in.get()) >> s >> kRecycle; + uint64_t i = 0; + for (; s; ++s, ++i) { + BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(s.Get())); + if (100000UL - i == 2) + s.Mark(); + } + BOOST_CHECK_EQUAL(100000ULL, i); + s.Rewind(); + BOOST_CHECK_EQUAL(100000ULL - 2, *static_cast<const uint64_t*>(s.Get())); +} + +} +} +} diff --git a/util/stream/sort.hh b/util/stream/sort.hh index a1e0a8539..1b4801ad6 100644 --- a/util/stream/sort.hh +++ b/util/stream/sort.hh @@ -25,6 +25,7 @@ #include "util/stream/timer.hh" #include "util/file.hh" +#include "util/fixed_array.hh" #include "util/scoped.hh" #include "util/sized_iterator.hh" @@ -544,6 +545,54 @@ template <class Compare, class Combine> uint64_t BlockingSort(Chain &chain, cons return size; } +/** + * Represents an @ref util::FixedArray "array" capable of storing @ref util::stream::Sort "Sort" objects. + * + * In the anticipated use case, an instance of this class will maintain one @ref util::stream::Sort "Sort" object + * for each n-gram order (ranging from 1 up to the maximum n-gram order being processed). + * Use in this manner would enable the n-grams each n-gram order to be sorted, in parallel. + * + * @tparam Compare An @ref Comparator "ngram comparator" to use during sorting. + */ +template <class Compare, class Combine = NeverCombine> class Sorts : public FixedArray<Sort<Compare, Combine> > { + private: + typedef Sort<Compare, Combine> S; + typedef FixedArray<S> P; + + public: + /** + * Constructs, but does not initialize. + * + * @ref util::FixedArray::Init() "Init" must be called before use. + * + * @see util::FixedArray::Init() + */ + Sorts() {} + + /** + * Constructs an @ref util::FixedArray "array" capable of storing a fixed number of @ref util::stream::Sort "Sort" objects. + * + * @param number The maximum number of @ref util::stream::Sort "sorters" that can be held by this @ref util::FixedArray "array" + * @see util::FixedArray::FixedArray() + */ + explicit Sorts(std::size_t number) : FixedArray<Sort<Compare, Combine> >(number) {} + + /** + * Constructs a new @ref util::stream::Sort "Sort" object which is stored in this @ref util::FixedArray "array". + * + * The new @ref util::stream::Sort "Sort" object is constructed using the provided @ref util::stream::SortConfig "SortConfig" and @ref Comparator "ngram comparator"; + * once constructed, a new worker @ref util::stream::Thread "thread" (owned by the @ref util::stream::Chain "chain") will sort the n-gram data stored + * in the @ref util::stream::Block "blocks" of the provided @ref util::stream::Chain "chain". + * + * @see util::stream::Sort::Sort() + * @see util::stream::Chain::operator>>() + */ + void push_back(util::stream::Chain &chain, const util::stream::SortConfig &config, const Compare &compare = Compare(), const Combine &combine = Combine()) { + new (P::end()) S(chain, config, compare, combine); // use "placement new" syntax to initalize S in an already-allocated memory location + P::Constructed(); + } +}; + } // namespace stream } // namespace util diff --git a/util/tempfile.hh b/util/tempfile.hh index 9b872a27e..9c28346fc 100644 --- a/util/tempfile.hh +++ b/util/tempfile.hh @@ -10,13 +10,14 @@ #if defined(_WIN32) || defined(_WIN64) #include <windows.h> +#else +#include <unistd.h> #endif #include <boost/filesystem.hpp> #include <boost/noncopyable.hpp> #include "util/exception.hh" -#include "util/unistd.hh" namespace util { diff --git a/util/unistd.hh b/util/unistd.hh deleted file mode 100644 index f99be592a..000000000 --- a/util/unistd.hh +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef UTIL_UNISTD_H -#define UTIL_UNISTD_H - -#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) - -// Windows doesn't define <unistd.h> -// -// So we define what we need here instead: -// -#define STDIN_FILENO=0 -#define STDOUT_FILENO=1 - - -#else // Huzzah for POSIX! - -#include <unistd.h> - -#endif - - - -#endif // UTIL_UNISTD_H diff --git a/util/usage.cc b/util/usage.cc index f2b661014..5f66b17d2 100644 --- a/util/usage.cc +++ b/util/usage.cc @@ -135,6 +135,16 @@ double WallTime() { return Subtract(GetWall(), kRecordStart.Started()); } +double UserTime() { +#if !defined(_WIN32) && !defined(_WIN64) + struct rusage usage; + if (getrusage(RUSAGE_SELF, &usage)) + return 0.0; + return DoubleSec(usage.ru_utime); +#endif + return 0.0; +} + void PrintUsage(std::ostream &out) { #if !defined(_WIN32) && !defined(_WIN64) // Linux doesn't set memory usage in getrusage :-( diff --git a/util/usage.hh b/util/usage.hh index 85bd2119a..dff81b59d 100644 --- a/util/usage.hh +++ b/util/usage.hh @@ -9,6 +9,8 @@ namespace util { // Time in seconds since process started. Zero on unsupported platforms. double WallTime(); +double UserTime(); + void PrintUsage(std::ostream &to); // Determine how much physical memory there is. Return 0 on failure. @@ -16,5 +18,6 @@ uint64_t GuessPhysicalMemory(); // Parse a size like unix sort. Sadly, this means the default multiplier is K. uint64_t ParseSize(const std::string &arg); + } // namespace util #endif // UTIL_USAGE_H |