KenLM 7408730be415db9b650560a8b2bd3e4e3af49ec9.

unistd.hh is dead.
author: Kenneth Heafield <github@kheafield.com> 2015-05-19 22:27:30 +0300
committer: Kenneth Heafield <github@kheafield.com> 2015-05-19 22:27:30 +0300
commit: a70d37e46fce323f7a9720e3a621f35d19e4ac9f (patch)
tree: d45651f2d44ba7c373f2a198611dca3b196e36d9 /util
parent: 90309aebfa0184ac611725520443b34c3331794b (diff)
27 files changed, 1393 insertions, 127 deletions
diff --git a/util/Jamfile b/util/Jamfile
index 2d3cede01..7538f7d17 100644
--- a/util/Jamfile
+++ b/util/Jamfile
@@ -21,10 +21,13 @@ obj file_piece_test.o : file_piece_test.cc /top//boost_unit_test_framework : $(c
 
 fakelib parallel_read : parallel_read.cc : <threading>multi:<source>/top//boost_thread <threading>multi:<define>WITH_THREADS : : <include>.. ;
 
-fakelib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc parallel_read pool.cc random.cc read_compressed scoped.cc string_piece.cc usage.cc double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ;
+fakelib kenutil : [ glob *.cc : parallel_read.cc read_compressed.cc *_main.cc *_test.cc ] read_compressed parallel_read double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ;
 
 exe cat_compressed : cat_compressed_main.cc kenutil ;
 
+#Does not install this
+exe probing_hash_table_benchmark : probing_hash_table_benchmark_main.cc kenutil ;
+
 alias programs : cat_compressed ;
 
 import testing ;
@@ -34,3 +37,5 @@ for local t in [ glob *_test.cc : file_piece_test.cc read_compressed_test.cc ] {
     local name = [ MATCH "(.*)\.cc" : $(t) ] ;
     unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_filesystem /top//boost_system ;
 }
+
+build-project stream ;
diff --git a/util/exception.hh b/util/exception.hh
index 7a0e7c44a..d67a6f9fb 100644
--- a/util/exception.hh
+++ b/util/exception.hh
@@ -91,6 +91,12 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
 #define UTIL_UNLIKELY(x) (x)
 #endif
 
+#if __GNUC__ >= 3
+#define UTIL_LIKELY(x) __builtin_expect (!!(x), 1)
+#else
+#define UTIL_LIKELY(x) (x)
+#endif
+
 #define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
   if (UTIL_UNLIKELY(Condition)) { \
     UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
diff --git a/util/fake_ofstream.hh b/util/fake_ofstream.hh
index 8299ba9ac..d35bf0d83 100644
--- a/util/fake_ofstream.hh
+++ b/util/fake_ofstream.hh
@@ -1,111 +1,135 @@
 /* Like std::ofstream but without being incredibly slow.  Backed by a raw fd.
- * Does not support many data types.  Currently, it's targeted at writing ARPA
- * files quickly.
+ * Supports most of the built-in types except for void* and long double.
  */
 #ifndef UTIL_FAKE_OFSTREAM_H
 #define UTIL_FAKE_OFSTREAM_H
 
-#include "util/double-conversion/double-conversion.h"
-#include "util/double-conversion/utils.h"
 #include "util/file.hh"
+#include "util/float_to_string.hh"
+#include "util/integer_to_string.hh"
 #include "util/scoped.hh"
 #include "util/string_piece.hh"
 
-#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
-#include <boost/lexical_cast.hpp>
+#include <cassert>
+#include <cstring>
+
+#include <stdint.h>
 
 namespace util {
 class FakeOFStream {
   public:
+    // Maximum over all ToString operations.
+    // static const std::size_t kMinBuf = 20;
+    // This was causing compile failures in debug, so now 20 is written directly.
+    //
     // Does not take ownership of out.
     // Allows default constructor, but must call SetFD.
     explicit FakeOFStream(int out = -1, std::size_t buffer_size = 1048576)
-      : buf_(util::MallocOrThrow(buffer_size)),
-        builder_(static_cast<char*>(buf_.get()), buffer_size),
-        // Mostly the default but with inf instead.  And no flags.
-        convert_(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0),
-        fd_(out),
-        buffer_size_(buffer_size) {}
+      : buf_(util::MallocOrThrow(std::max(buffer_size, (size_t)20))),
+        current_(static_cast<char*>(buf_.get())),
+        end_(current_ + std::max(buffer_size, (size_t)20)),
+        fd_(out) {}
 
     ~FakeOFStream() {
-      if (buf_.get()) Flush();
+      // Could have called Finish already
+      flush();
     }
 
     void SetFD(int to) {
-      if (builder_.position()) Flush();
+      flush();
       fd_ = to;
     }
 
-    FakeOFStream &Write(const void *data, std::size_t length) {
-      // Dominant case
-      if (static_cast<std::size_t>(builder_.size() - builder_.position()) > length) {
-        builder_.AddSubstring((const char*)data, length);
+    FakeOFStream &write(const void *data, std::size_t length) {
+      if (UTIL_LIKELY(current_ + length <= end_)) {
+        std::memcpy(current_, data, length);
+        current_ += length;
         return *this;
       }
-      Flush();
-      if (length > buffer_size_) {
-        util::WriteOrThrow(fd_, data, length);
+      flush();
+      if (current_ + length <= end_) {
+        std::memcpy(current_, data, length);
+        current_ += length;
       } else {
-        builder_.AddSubstring((const char*)data, length);
+        util::WriteOrThrow(fd_, data, length);
       }
       return *this;
     }
 
+    // This also covers std::string and char*
     FakeOFStream &operator<<(StringPiece str) {
-      return Write(str.data(), str.size());
+      return write(str.data(), str.size());
     }
 
-    FakeOFStream &operator<<(float value) {
-      // Odd, but this is the largest number found in the comments.
-      EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8);
-      convert_.ToShortestSingle(value, &builder_);
+    // For anything with ToStringBuf<T>::kBytes, define operator<< using ToString.
+    // This includes uint64_t, int64_t, uint32_t, int32_t, uint16_t, int16_t,
+    // float, double
+  private:
+    template <int Arg> struct EnableIfKludge {
+      typedef FakeOFStream type;
+    };
+  public:
+    template <class T> typename EnableIfKludge<ToStringBuf<T>::kBytes>::type &operator<<(const T value) {
+      EnsureRemaining(ToStringBuf<T>::kBytes);
+      current_ = ToString(value, current_);
+      assert(current_ <= end_);
       return *this;
     }
 
-    FakeOFStream &operator<<(double value) {
-      EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8);
-      convert_.ToShortest(value, &builder_);
+    FakeOFStream &operator<<(char c) {
+      EnsureRemaining(1);
+      *current_++ = c;
       return *this;
     }
 
-    // Inefficient!  TODO: more efficient implementation
-    FakeOFStream &operator<<(unsigned value) {
-      return *this << boost::lexical_cast<std::string>(value);
+    FakeOFStream &operator<<(unsigned char c) {
+      EnsureRemaining(1);
+      *current_++ = static_cast<char>(c);
+      return *this;
     }
 
-    FakeOFStream &operator<<(char c) {
-      EnsureRemaining(1);
-      builder_.AddCharacter(c);
+    /* clang on OS X appears to consider std::size_t aka unsigned long distinct
+     * from uint64_t.  So this function makes clang work.  gcc considers
+     * uint64_t and std::size_t the same (on 64-bit) so this isn't necessary.
+     * But it does no harm since gcc sees it as a specialization of the
+     * EnableIfKludge template.
+     * Also, delegating to *this << static_cast<uint64_t>(value) would loop
+     * indefinitely on gcc.
+     */
+    FakeOFStream &operator<<(std::size_t value) {
+      EnsureRemaining(ToStringBuf<uint64_t>::kBytes);
+      current_ = ToString(static_cast<uint64_t>(value), current_);
       return *this;
     }
 
     // Note this does not sync.
-    void Flush() {
-      util::WriteOrThrow(fd_, buf_.get(), builder_.position());
-      builder_.Reset();
+    void flush() {
+      if (current_ != buf_.get()) {
+        util::WriteOrThrow(fd_, buf_.get(), current_ - (char*)buf_.get());
+        current_ = static_cast<char*>(buf_.get());
+      }
     }
 
     // Not necessary, but does assure the data is cleared.
     void Finish() {
-      Flush();
-      // It will segfault trying to null terminate otherwise.
-      builder_.Finalize();
+      flush();
       buf_.reset();
+      current_ = NULL;
       util::FSyncOrThrow(fd_);
     }
 
   private:
     void EnsureRemaining(std::size_t amount) {
-      if (static_cast<std::size_t>(builder_.size() - builder_.position()) <= amount) {
-        Flush();
+      if (UTIL_UNLIKELY(current_ + amount > end_)) {
+        flush();
+        assert(current_ + amount <= end_);
       }
     }
 
     util::scoped_malloc buf_;
-    double_conversion::StringBuilder builder_;
-    double_conversion::DoubleToStringConverter convert_;
+    char *current_, *end_;
+
     int fd_;
-    const std::size_t buffer_size_;
 };
 
 } // namespace
diff --git a/util/file_piece.cc b/util/file_piece.cc
index c808e7d90..92edef27d 100644
--- a/util/file_piece.cc
+++ b/util/file_piece.cc
@@ -11,19 +11,22 @@
 #include <unistd.h>
 #endif
 
+#include <cassert>
+#include <cerrno>
+#include <cmath>
+#include <cstdlib>
 #include <iostream>
-#include <string>
 #include <limits>
-#include <cassert>
+#include <string>
+
 #include <fcntl.h>
-#include <cstdlib>
 #include <sys/types.h>
 #include <sys/stat.h>
 
 namespace util {
 
 ParseNumberException::ParseNumberException(StringPiece value) throw() {
-  *this << "Could not parse \"" << value << "\" into a number";
+  *this << "Could not parse \"" << value << "\" into a ";
 }
 
 // Sigh this is the only way I could come up with to do a _const_ bool.  It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale).
@@ -62,12 +65,17 @@ FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buf
 
 FilePiece::~FilePiece() {}
 
-StringPiece FilePiece::ReadLine(char delim) {
+StringPiece FilePiece::ReadLine(char delim, bool strip_cr) {
   std::size_t skip = 0;
   while (true) {
     for (const char *i = position_ + skip; i < position_end_; ++i) {
       if (*i == delim) {
-        StringPiece ret(position_, i - position_);
+        // End of line.
+        // Take 1 byte off the end if it's an unwanted carriage return.
+        const std::size_t subtract_cr = (
+            (strip_cr && i > position_ && *(i - 1) == '\r') ?
+            1 : 0);
+        StringPiece ret(position_, i - position_ - subtract_cr);
         position_ = i + 1;
         return ret;
       }
@@ -83,9 +91,9 @@ StringPiece FilePiece::ReadLine(char delim) {
   }
 }
 
-bool FilePiece::ReadLineOrEOF(StringPiece &to, char delim) {
+bool FilePiece::ReadLineOrEOF(StringPiece &to, char delim, bool strip_cr) {
   try {
-    to = ReadLine(delim);
+    to = ReadLine(delim, strip_cr);
   } catch (const util::EndOfFileException &e) { return false; }
   return true;
 }
@@ -145,49 +153,59 @@ static const double_conversion::StringToDoubleConverter kConverter(
     "inf",
     "NaN");
 
-void ParseNumber(const char *begin, const char *&end, float &out) {
+StringPiece FirstToken(StringPiece str) {
+  const char *i;
+  for (i = str.data(); i != str.data() + str.size(); ++i) {
+    if (kSpaces[(unsigned char)*i]) break;
+  }
+  return StringPiece(str.data(), i - str.data());
+}
+
+const char *ParseNumber(StringPiece str, float &out) {
   int count;
-  out = kConverter.StringToFloat(begin, end - begin, &count);
-  end = begin + count;
+  out = kConverter.StringToFloat(str.data(), str.size(), &count);
+  UTIL_THROW_IF_ARG(isnan(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "float");
+  return str.data() + count;
 }
-void ParseNumber(const char *begin, const char *&end, double &out) {
+const char *ParseNumber(StringPiece str, double &out) {
   int count;
-  out = kConverter.StringToDouble(begin, end - begin, &count);
-  end = begin + count;
+  out = kConverter.StringToDouble(str.data(), str.size(), &count);
+  UTIL_THROW_IF_ARG(isnan(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "double");
+  return str.data() + count;
 }
-void ParseNumber(const char *begin, const char *&end, long int &out) {
-  char *silly_end;
-  out = strtol(begin, &silly_end, 10);
-  end = silly_end;
+const char *ParseNumber(StringPiece str, long int &out) {
+  char *end;
+  errno = 0;
+  out = strtol(str.data(), &end, 10);
+  UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "long int");
+  return end;
 }
-void ParseNumber(const char *begin, const char *&end, unsigned long int &out) {
-  char *silly_end;
-  out = strtoul(begin, &silly_end, 10);
-  end = silly_end;
+const char *ParseNumber(StringPiece str, unsigned long int &out) {
+  char *end;
+  errno = 0;
+  out = strtoul(str.data(), &end, 10);
+  UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "unsigned long int");
+  return end;
 }
 } // namespace
 
 template <class T> T FilePiece::ReadNumber() {
   SkipSpaces();
   while (last_space_ < position_) {
-    if (at_end_) {
+    if (UTIL_UNLIKELY(at_end_)) {
       // Hallucinate a null off the end of the file.
       std::string buffer(position_, position_end_);
-      const char *buf = buffer.c_str();
-      const char *end = buf + buffer.size();
       T ret;
-      ParseNumber(buf, end, ret);
-      if (buf == end) throw ParseNumberException(buffer);
-      position_ += end - buf;
+      // Has to be null-terminated.
+      const char *begin = buffer.c_str();
+      const char *end = ParseNumber(StringPiece(begin, buffer.size()), ret);
+      position_ += end - begin;
       return ret;
     }
     Shift();
   }
-  const char *end = last_space_;
   T ret;
-  ParseNumber(position_, end, ret);
-  if (end == position_) throw ParseNumberException(ReadDelimited());
-  position_ = end;
+  position_ = ParseNumber(StringPiece(position_, last_space_ - position_), ret);
   return ret;
 }
 
diff --git a/util/file_piece.hh b/util/file_piece.hh
index 35f5eb648..d3d83054d 100644
--- a/util/file_piece.hh
+++ b/util/file_piece.hh
@@ -55,7 +55,7 @@ class FilePiece {
       return Consume(FindDelimiterOrEOF(delim));
     }
 
-    // Read word until the line or file ends.
+    /// Read word until the line or file ends.
     bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) {
       assert(delim[static_cast<unsigned char>('\n')]);
       // Skip non-enter spaces.
@@ -75,12 +75,30 @@ class FilePiece {
       return true;
     }
 
-    // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter.
-    // It is similar to getline in that way.
-    StringPiece ReadLine(char delim = '\n');
-
-    // Doesn't throw EndOfFileException, just returns false.
-    bool ReadLineOrEOF(StringPiece &to, char delim = '\n');
+    /** Read a line of text from the file.
+     *
+     * Unlike ReadDelimited, this includes leading spaces and consumes the
+     * delimiter.   It is similar to getline in that way.
+     *
+     * If strip_cr is true, any trailing carriate return (as would be found on
+     * a file written on Windows) will be left out of the returned line.
+     *
+     * Throws EndOfFileException if the end of the file is encountered.  If the
+     * file does not end in a newline, this could mean that the last line is
+     * never read.
+     */
+    StringPiece ReadLine(char delim = '\n', bool strip_cr = true);
+
+    /** Read a line of text from the file, or return false on EOF.
+     *
+     * This is like ReadLine, except it returns false where ReadLine throws
+     * EndOfFileException.  Like ReadLine it may not read the last line in the
+     * file if the file does not end in a newline.
+     *
+     * If strip_cr is true, any trailing carriate return (as would be found on
+     * a file written on Windows) will be left out of the returned line.
+     */
+    bool ReadLineOrEOF(StringPiece &to, char delim = '\n', bool strip_cr = true);
 
     float ReadFloat();
     double ReadDouble();
diff --git a/util/file_piece_test.cc b/util/file_piece_test.cc
index 4a2a521ba..11e2ab3aa 100644
--- a/util/file_piece_test.cc
+++ b/util/file_piece_test.cc
@@ -1,6 +1,7 @@
 // Tests might fail if you have creative characters in your path.  Sue me.
 #include "util/file_piece.hh"
 
+#include "util/fake_ofstream.hh"
 #include "util/file.hh"
 #include "util/scoped.hh"
 
@@ -133,5 +134,21 @@ BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
 
 #endif // HAVE_ZLIB
 
+BOOST_AUTO_TEST_CASE(Numbers) {
+  scoped_fd file(MakeTemp(FileLocation()));
+  const float floating = 3.2;
+  {
+    util::FakeOFStream writing(file.get());
+    writing << "94389483984398493890287 " << floating << " 5";
+  }
+  SeekOrThrow(file.get(), 0);
+  util::FilePiece f(file.release());
+  BOOST_CHECK_THROW(f.ReadULong(), ParseNumberException);
+  BOOST_CHECK_EQUAL("94389483984398493890287", f.ReadDelimited());
+  // Yes, exactly equal.  Isn't double-conversion wonderful?
+  BOOST_CHECK_EQUAL(floating, f.ReadFloat());
+  BOOST_CHECK_EQUAL(5, f.ReadULong());
+}
+
 } // namespace
 } // namespace util
diff --git a/util/float_to_string.cc b/util/float_to_string.cc
new file mode 100644
index 000000000..1e16d6f99
--- /dev/null
+++ b/util/float_to_string.cc
@@ -0,0 +1,23 @@
+#include "util/float_to_string.hh"
+
+#include "util/double-conversion/double-conversion.h"
+#include "util/double-conversion/utils.h"
+
+namespace util {
+namespace {
+const double_conversion::DoubleToStringConverter kConverter(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0);
+} // namespace
+
+char *ToString(double value, char *to) {
+  double_conversion::StringBuilder builder(to, ToStringBuf<double>::kBytes);
+  kConverter.ToShortest(value, &builder);
+  return &to[builder.position()];
+}
+
+char *ToString(float value, char *to) {
+  double_conversion::StringBuilder builder(to, ToStringBuf<float>::kBytes);
+  kConverter.ToShortestSingle(value, &builder);
+  return &to[builder.position()];
+}
+
+} // namespace util
diff --git a/util/float_to_string.hh b/util/float_to_string.hh
new file mode 100644
index 000000000..d1104e790
--- /dev/null
+++ b/util/float_to_string.hh
@@ -0,0 +1,25 @@
+#ifndef UTIL_FLOAT_TO_STRING_H
+#define UTIL_FLOAT_TO_STRING_H
+
+// Just for ToStringBuf
+#include "util/integer_to_string.hh"
+
+namespace util {
+
+template <> struct ToStringBuf<double> {
+  // DoubleToStringConverter::kBase10MaximalLength + 1 for null paranoia.
+  static const unsigned kBytes = 18;
+};
+
+// Single wasn't documented in double conversion, so be conservative and
+// say the same as double.
+template <> struct ToStringBuf<float> {
+  static const unsigned kBytes = 18;
+};
+
+char *ToString(double value, char *to);
+char *ToString(float value, char *to);
+
+} // namespace util
+
+#endif // UTIL_FLOAT_TO_STRING_H
diff --git a/util/integer_to_string.cc b/util/integer_to_string.cc
new file mode 100644
index 000000000..32047291d
--- /dev/null
+++ b/util/integer_to_string.cc
@@ -0,0 +1,639 @@
+/* Fast integer to string conversion.
+Source: https://github.com/miloyip/itoa-benchmark
+Local modifications:
+1. Return end of buffer instead of null terminating
+2. Collapse to single file
+3. Namespace
+4. Remove test hook
+5. Non-x86 support from the branch_lut code
+6. Rename functions
+
+Copyright (C) 2014 Milo Yip
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Which is based on: http://0x80.pl/snippets/asm/sse-utoa.c
+
+  SSE: conversion integers to decimal representation
+
+  Author: Wojciech MuÅ‚a
+  e-mail: wojciech_mula@poczta.onet.pl
+  www:    http://0x80.pl/
+
+  License: BSD
+
+  initial release 2011-10-21
+  $Id$
+*/
+
+#include "util/integer_to_string.hh"
+#include <cassert>
+#include <stdint.h>
+
+namespace util {
+
+namespace {
+const char gDigitsLut[200] = {
+  '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
+  '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
+  '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
+  '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
+  '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
+  '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
+  '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
+  '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
+  '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
+  '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
+};
+} // namespace
+
+// SSE2 implementation according to http://0x80.pl/articles/sse-itoa.html
+// Modifications: (1) fix incorrect digits (2) accept all ranges (3) write to user provided buffer.
+
+#if defined(i386) || defined(__amd64) || defined(_M_IX86) || defined(_M_X64)
+
+#include <emmintrin.h>
+
+#ifdef _MSC_VER
+#include "intrin.h"
+#endif
+
+#ifdef _MSC_VER
+#define ALIGN_PRE __declspec(align(16))
+#define ALIGN_SUF
+#else
+#define ALIGN_PRE
+#define ALIGN_SUF  __attribute__ ((aligned(16)))
+#endif
+
+namespace {
+
+static const uint32_t kDiv10000 = 0xd1b71759;
+ALIGN_PRE static const uint32_t kDiv10000Vector[4] ALIGN_SUF = { kDiv10000, kDiv10000, kDiv10000, kDiv10000 };
+ALIGN_PRE static const uint32_t k10000Vector[4] ALIGN_SUF = { 10000, 10000, 10000, 10000 };
+ALIGN_PRE static const uint16_t kDivPowersVector[8] ALIGN_SUF = { 8389, 5243, 13108, 32768, 8389, 5243, 13108, 32768 }; // 10^3, 10^2, 10^1, 10^0
+ALIGN_PRE static const uint16_t kShiftPowersVector[8] ALIGN_SUF = {
+    1 << (16 - (23 + 2 - 16)),
+    1 << (16 - (19 + 2 - 16)),
+    1 << (16 - 1 - 2),
+    1 << (15),
+    1 << (16 - (23 + 2 - 16)),
+    1 << (16 - (19 + 2 - 16)),
+    1 << (16 - 1 - 2),
+    1 << (15)
+};
+ALIGN_PRE static const uint16_t k10Vector[8] ALIGN_SUF = { 10, 10, 10, 10, 10, 10, 10, 10 };
+ALIGN_PRE static const char kAsciiZero[16] ALIGN_SUF = { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' };
+
+inline __m128i Convert8DigitsSSE2(uint32_t value) {
+    assert(value <= 99999999);
+
+    // abcd, efgh = abcdefgh divmod 10000
+    const __m128i abcdefgh = _mm_cvtsi32_si128(value);
+    const __m128i abcd = _mm_srli_epi64(_mm_mul_epu32(abcdefgh, reinterpret_cast<const __m128i*>(kDiv10000Vector)[0]), 45);
+    const __m128i efgh = _mm_sub_epi32(abcdefgh, _mm_mul_epu32(abcd, reinterpret_cast<const __m128i*>(k10000Vector)[0]));
+
+    // v1 = [ abcd, efgh, 0, 0, 0, 0, 0, 0 ]
+    const __m128i v1 = _mm_unpacklo_epi16(abcd, efgh);
+
+    // v1a = v1 * 4 = [ abcd * 4, efgh * 4, 0, 0, 0, 0, 0, 0 ]
+    const __m128i v1a = _mm_slli_epi64(v1, 2);
+
+    // v2 = [ abcd * 4, abcd * 4, abcd * 4, abcd * 4, efgh * 4, efgh * 4, efgh * 4, efgh * 4 ]
+    const __m128i v2a = _mm_unpacklo_epi16(v1a, v1a);
+    const __m128i v2 = _mm_unpacklo_epi32(v2a, v2a);
+
+    // v4 = v2 div 10^3, 10^2, 10^1, 10^0 = [ a, ab, abc, abcd, e, ef, efg, efgh ]
+    const __m128i v3 = _mm_mulhi_epu16(v2, reinterpret_cast<const __m128i*>(kDivPowersVector)[0]);
+    const __m128i v4 = _mm_mulhi_epu16(v3, reinterpret_cast<const __m128i*>(kShiftPowersVector)[0]);
+
+    // v5 = v4 * 10 = [ a0, ab0, abc0, abcd0, e0, ef0, efg0, efgh0 ]
+    const __m128i v5 = _mm_mullo_epi16(v4, reinterpret_cast<const __m128i*>(k10Vector)[0]);
+
+    // v6 = v5 << 16 = [ 0, a0, ab0, abc0, 0, e0, ef0, efg0 ]
+    const __m128i v6 = _mm_slli_epi64(v5, 16);
+
+    // v7 = v4 - v6 = { a, b, c, d, e, f, g, h }
+    const __m128i v7 = _mm_sub_epi16(v4, v6);
+
+    return v7;
+}
+
+inline __m128i ShiftDigits_SSE2(__m128i a, unsigned digit) {
+    assert(digit <= 8);
+    switch (digit) {
+        case 0: return a;
+        case 1: return _mm_srli_si128(a, 1);
+        case 2: return _mm_srli_si128(a, 2);
+        case 3: return _mm_srli_si128(a, 3);
+        case 4: return _mm_srli_si128(a, 4);
+        case 5: return _mm_srli_si128(a, 5);
+        case 6: return _mm_srli_si128(a, 6);
+        case 7: return _mm_srli_si128(a, 7);
+        case 8: return _mm_srli_si128(a, 8);
+    }
+    return a; // should not execute here.
+}
+
+} // namespace
+
+// Original name: u32toa_sse2
+char *ToString(uint32_t value, char* buffer) {
+    if (value < 10000) {
+        const uint32_t d1 = (value / 100) << 1;
+        const uint32_t d2 = (value % 100) << 1;
+
+        if (value >= 1000)
+            *buffer++ = gDigitsLut[d1];
+        if (value >= 100)
+            *buffer++ = gDigitsLut[d1 + 1];
+        if (value >= 10)
+            *buffer++ = gDigitsLut[d2];
+        *buffer++ = gDigitsLut[d2 + 1];
+        //*buffer++ = '\0';
+        return buffer;
+    }
+    else if (value < 100000000) {
+        // Experiment shows that this case SSE2 is slower
+#if 0
+        const __m128i a = Convert8DigitsSSE2(value);
+
+        // Convert to bytes, add '0'
+        const __m128i va = _mm_add_epi8(_mm_packus_epi16(a, _mm_setzero_si128()), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+
+        // Count number of digit
+        const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0]));
+        unsigned long digit;
+#ifdef _MSC_VER
+        _BitScanForward(&digit, ~mask | 0x8000);
+#else
+        digit = __builtin_ctz(~mask | 0x8000);
+#endif
+
+        // Shift digits to the beginning
+        __m128i result = ShiftDigits_SSE2(va, digit);
+        //__m128i result = _mm_srl_epi64(va, _mm_cvtsi32_si128(digit * 8));
+        _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result);
+        buffer[8 - digit] = '\0';
+#else
+        // value = bbbbcccc
+        const uint32_t b = value / 10000;
+        const uint32_t c = value % 10000;
+
+        const uint32_t d1 = (b / 100) << 1;
+        const uint32_t d2 = (b % 100) << 1;
+
+        const uint32_t d3 = (c / 100) << 1;
+        const uint32_t d4 = (c % 100) << 1;
+
+        if (value >= 10000000)
+            *buffer++ = gDigitsLut[d1];
+        if (value >= 1000000)
+            *buffer++ = gDigitsLut[d1 + 1];
+        if (value >= 100000)
+            *buffer++ = gDigitsLut[d2];
+        *buffer++ = gDigitsLut[d2 + 1];
+
+        *buffer++ = gDigitsLut[d3];
+        *buffer++ = gDigitsLut[d3 + 1];
+        *buffer++ = gDigitsLut[d4];
+        *buffer++ = gDigitsLut[d4 + 1];
+//        *buffer++ = '\0';
+        return buffer;
+#endif
+    }
+    else {
+        // value = aabbbbbbbb in decimal
+
+        const uint32_t a = value / 100000000; // 1 to 42
+        value %= 100000000;
+
+        if (a >= 10) {
+            const unsigned i = a << 1;
+            *buffer++ = gDigitsLut[i];
+            *buffer++ = gDigitsLut[i + 1];
+        }
+        else
+            *buffer++ = '0' + static_cast<char>(a);
+
+        const __m128i b = Convert8DigitsSSE2(value);
+        const __m128i ba = _mm_add_epi8(_mm_packus_epi16(_mm_setzero_si128(), b), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+        const __m128i result = _mm_srli_si128(ba, 8);
+        _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result);
+//        buffer[8] = '\0';
+        return buffer + 8;
+    }
+}
+
+// Original name: u64toa_sse2
+char *ToString(uint64_t value, char* buffer) {
+    if (value < 100000000) {
+        uint32_t v = static_cast<uint32_t>(value);
+        if (v < 10000) {
+            const uint32_t d1 = (v / 100) << 1;
+            const uint32_t d2 = (v % 100) << 1;
+
+            if (v >= 1000)
+                *buffer++ = gDigitsLut[d1];
+            if (v >= 100)
+                *buffer++ = gDigitsLut[d1 + 1];
+            if (v >= 10)
+                *buffer++ = gDigitsLut[d2];
+            *buffer++ = gDigitsLut[d2 + 1];
+            //*buffer++ = '\0';
+            return buffer;
+        }
+        else {
+            // Experiment shows that this case SSE2 is slower
+#if 0
+            const __m128i a = Convert8DigitsSSE2(v);
+
+            // Convert to bytes, add '0'
+            const __m128i va = _mm_add_epi8(_mm_packus_epi16(a, _mm_setzero_si128()), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+
+            // Count number of digit
+            const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0]));
+            unsigned long digit;
+#ifdef _MSC_VER
+            _BitScanForward(&digit, ~mask | 0x8000);
+#else
+            digit = __builtin_ctz(~mask | 0x8000);
+#endif
+
+            // Shift digits to the beginning
+            __m128i result = ShiftDigits_SSE2(va, digit);
+            _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result);
+            buffer[8 - digit] = '\0';
+#else
+            // value = bbbbcccc
+            const uint32_t b = v / 10000;
+            const uint32_t c = v % 10000;
+
+            const uint32_t d1 = (b / 100) << 1;
+            const uint32_t d2 = (b % 100) << 1;
+
+            const uint32_t d3 = (c / 100) << 1;
+            const uint32_t d4 = (c % 100) << 1;
+
+            if (value >= 10000000)
+                *buffer++ = gDigitsLut[d1];
+            if (value >= 1000000)
+                *buffer++ = gDigitsLut[d1 + 1];
+            if (value >= 100000)
+                *buffer++ = gDigitsLut[d2];
+            *buffer++ = gDigitsLut[d2 + 1];
+
+            *buffer++ = gDigitsLut[d3];
+            *buffer++ = gDigitsLut[d3 + 1];
+            *buffer++ = gDigitsLut[d4];
+            *buffer++ = gDigitsLut[d4 + 1];
+            //*buffer++ = '\0';
+            return buffer;
+#endif
+        }
+    }
+    else if (value < 10000000000000000) {
+        const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+        const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+
+        const __m128i a0 = Convert8DigitsSSE2(v0);
+        const __m128i a1 = Convert8DigitsSSE2(v1);
+
+        // Convert to bytes, add '0'
+        const __m128i va = _mm_add_epi8(_mm_packus_epi16(a0, a1), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+
+        // Count number of digit
+        const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0]));
+#ifdef _MSC_VER
+        unsigned long digit;
+        _BitScanForward(&digit, ~mask | 0x8000);
+#else
+        unsigned digit = __builtin_ctz(~mask | 0x8000);
+#endif
+
+        // Shift digits to the beginning
+        __m128i result = ShiftDigits_SSE2(va, digit);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
+//        buffer[16 - digit] = '\0';
+        return &buffer[16 - digit];
+    }
+    else {
+        const uint32_t a = static_cast<uint32_t>(value / 10000000000000000); // 1 to 1844
+        value %= 10000000000000000;
+
+        if (a < 10)
+            *buffer++ = '0' + static_cast<char>(a);
+        else if (a < 100) {
+            const uint32_t i = a << 1;
+            *buffer++ = gDigitsLut[i];
+            *buffer++ = gDigitsLut[i + 1];
+        }
+        else if (a < 1000) {
+            *buffer++ = '0' + static_cast<char>(a / 100);
+
+            const uint32_t i = (a % 100) << 1;
+            *buffer++ = gDigitsLut[i];
+            *buffer++ = gDigitsLut[i + 1];
+        }
+        else {
+            const uint32_t i = (a / 100) << 1;
+            const uint32_t j = (a % 100) << 1;
+            *buffer++ = gDigitsLut[i];
+            *buffer++ = gDigitsLut[i + 1];
+            *buffer++ = gDigitsLut[j];
+            *buffer++ = gDigitsLut[j + 1];
+        }
+
+        const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+        const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+
+        const __m128i a0 = Convert8DigitsSSE2(v0);
+        const __m128i a1 = Convert8DigitsSSE2(v1);
+
+        // Convert to bytes, add '0'
+        const __m128i va = _mm_add_epi8(_mm_packus_epi16(a0, a1), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), va);
+//        buffer[16] = '\0';
+        return &buffer[16];
+    }
+}
+
+#else // Generic Non-x86 case
+
+// Orignal name: u32toa_branchlut
+char *ToString(uint32_t value, char* buffer) {
+    if (value < 10000) {
+        const uint32_t d1 = (value / 100) << 1;
+        const uint32_t d2 = (value % 100) << 1;
+
+        if (value >= 1000)
+            *buffer++ = gDigitsLut[d1];
+        if (value >= 100)
+            *buffer++ = gDigitsLut[d1 + 1];
+        if (value >= 10)
+            *buffer++ = gDigitsLut[d2];
+        *buffer++ = gDigitsLut[d2 + 1];
+    }
+    else if (value < 100000000) {
+        // value = bbbbcccc
+        const uint32_t b = value / 10000;
+        const uint32_t c = value % 10000;
+
+        const uint32_t d1 = (b / 100) << 1;
+        const uint32_t d2 = (b % 100) << 1;
+
+        const uint32_t d3 = (c / 100) << 1;
+        const uint32_t d4 = (c % 100) << 1;
+
+        if (value >= 10000000)
+            *buffer++ = gDigitsLut[d1];
+        if (value >= 1000000)
+            *buffer++ = gDigitsLut[d1 + 1];
+        if (value >= 100000)
+            *buffer++ = gDigitsLut[d2];
+        *buffer++ = gDigitsLut[d2 + 1];
+
+        *buffer++ = gDigitsLut[d3];
+        *buffer++ = gDigitsLut[d3 + 1];
+        *buffer++ = gDigitsLut[d4];
+        *buffer++ = gDigitsLut[d4 + 1];
+    }
+    else {
+        // value = aabbbbcccc in decimal
+
+        const uint32_t a = value / 100000000; // 1 to 42
+        value %= 100000000;
+
+        if (a >= 10) {
+            const unsigned i = a << 1;
+            *buffer++ = gDigitsLut[i];
+            *buffer++ = gDigitsLut[i + 1];
+        }
+        else
+            *buffer++ = '0' + static_cast<char>(a);
+
+        const uint32_t b = value / 10000; // 0 to 9999
+        const uint32_t c = value % 10000; // 0 to 9999
+
+        const uint32_t d1 = (b / 100) << 1;
+        const uint32_t d2 = (b % 100) << 1;
+
+        const uint32_t d3 = (c / 100) << 1;
+        const uint32_t d4 = (c % 100) << 1;
+
+        *buffer++ = gDigitsLut[d1];
+        *buffer++ = gDigitsLut[d1 + 1];
+        *buffer++ = gDigitsLut[d2];
+        *buffer++ = gDigitsLut[d2 + 1];
+        *buffer++ = gDigitsLut[d3];
+        *buffer++ = gDigitsLut[d3 + 1];
+        *buffer++ = gDigitsLut[d4];
+        *buffer++ = gDigitsLut[d4 + 1];
+    }
+    return buffer; //*buffer++ = '\0';
+}
+
+// Original name: u64toa_branchlut
+char *ToString(uint64_t value, char* buffer) {
+    if (value < 100000000) {
+        uint32_t v = static_cast<uint32_t>(value);
+        if (v < 10000) {
+            const uint32_t d1 = (v / 100) << 1;
+            const uint32_t d2 = (v % 100) << 1;
+
+            if (v >= 1000)
+                *buffer++ = gDigitsLut[d1];
+            if (v >= 100)
+                *buffer++ = gDigitsLut[d1 + 1];
+            if (v >= 10)
+                *buffer++ = gDigitsLut[d2];
+            *buffer++ = gDigitsLut[d2 + 1];
+        }
+        else {
+            // value = bbbbcccc
+            const uint32_t b = v / 10000;
+            const uint32_t c = v % 10000;
+
+            const uint32_t d1 = (b / 100) << 1;
+            const uint32_t d2 = (b % 100) << 1;
+
+            const uint32_t d3 = (c / 100) << 1;
+            const uint32_t d4 = (c % 100) << 1;
+
+            if (value >= 10000000)
+                *buffer++ = gDigitsLut[d1];
+            if (value >= 1000000)
+                *buffer++ = gDigitsLut[d1 + 1];
+            if (value >= 100000)
+                *buffer++ = gDigitsLut[d2];
+            *buffer++ = gDigitsLut[d2 + 1];
+
+            *buffer++ = gDigitsLut[d3];
+            *buffer++ = gDigitsLut[d3 + 1];
+            *buffer++ = gDigitsLut[d4];
+            *buffer++ = gDigitsLut[d4 + 1];
+        }
+    }
+    else if (value < 10000000000000000) {
+        const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+        const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+
+        const uint32_t b0 = v0 / 10000;
+        const uint32_t c0 = v0 % 10000;
+
+        const uint32_t d1 = (b0 / 100) << 1;
+        const uint32_t d2 = (b0 % 100) << 1;
+
+        const uint32_t d3 = (c0 / 100) << 1;
+        const uint32_t d4 = (c0 % 100) << 1;
+
+        const uint32_t b1 = v1 / 10000;
+        const uint32_t c1 = v1 % 10000;
+
+        const uint32_t d5 = (b1 / 100) << 1;
+        const uint32_t d6 = (b1 % 100) << 1;
+
+        const uint32_t d7 = (c1 / 100) << 1;
+        const uint32_t d8 = (c1 % 100) << 1;
+
+        if (value >= 1000000000000000)
+            *buffer++ = gDigitsLut[d1];
+        if (value >= 100000000000000)
+            *buffer++ = gDigitsLut[d1 + 1];
+        if (value >= 10000000000000)
+            *buffer++ = gDigitsLut[d2];
+        if (value >= 1000000000000)
+            *buffer++ = gDigitsLut[d2 + 1];
+        if (value >= 100000000000)
+            *buffer++ = gDigitsLut[d3];
+        if (value >= 10000000000)
+            *buffer++ = gDigitsLut[d3 + 1];
+        if (value >= 1000000000)
+            *buffer++ = gDigitsLut[d4];
+        if (value >= 100000000)
+            *buffer++ = gDigitsLut[d4 + 1];
+
+        *buffer++ = gDigitsLut[d5];
+        *buffer++ = gDigitsLut[d5 + 1];
+        *buffer++ = gDigitsLut[d6];
+        *buffer++ = gDigitsLut[d6 + 1];
+        *buffer++ = gDigitsLut[d7];
+        *buffer++ = gDigitsLut[d7 + 1];
+        *buffer++ = gDigitsLut[d8];
+        *buffer++ = gDigitsLut[d8 + 1];
+    }
+    else {
+        const uint32_t a = static_cast<uint32_t>(value / 10000000000000000); // 1 to 1844
+        value %= 10000000000000000;
+
+        if (a < 10)
+            *buffer++ = '0' + static_cast<char>(a);
+        else if (a < 100) {
+            const uint32_t i = a << 1;
+            *buffer++ = gDigitsLut[i];
+            *buffer++ = gDigitsLut[i + 1];
+        }
+        else if (a < 1000) {
+            *buffer++ = '0' + static_cast<char>(a / 100);
+
+            const uint32_t i = (a % 100) << 1;
+            *buffer++ = gDigitsLut[i];
+            *buffer++ = gDigitsLut[i + 1];
+        }
+        else {
+            const uint32_t i = (a / 100) << 1;
+            const uint32_t j = (a % 100) << 1;
+            *buffer++ = gDigitsLut[i];
+            *buffer++ = gDigitsLut[i + 1];
+            *buffer++ = gDigitsLut[j];
+            *buffer++ = gDigitsLut[j + 1];
+        }
+
+        const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+        const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+
+        const uint32_t b0 = v0 / 10000;
+        const uint32_t c0 = v0 % 10000;
+
+        const uint32_t d1 = (b0 / 100) << 1;
+        const uint32_t d2 = (b0 % 100) << 1;
+
+        const uint32_t d3 = (c0 / 100) << 1;
+        const uint32_t d4 = (c0 % 100) << 1;
+
+        const uint32_t b1 = v1 / 10000;
+        const uint32_t c1 = v1 % 10000;
+
+        const uint32_t d5 = (b1 / 100) << 1;
+        const uint32_t d6 = (b1 % 100) << 1;
+
+        const uint32_t d7 = (c1 / 100) << 1;
+        const uint32_t d8 = (c1 % 100) << 1;
+
+        *buffer++ = gDigitsLut[d1];
+        *buffer++ = gDigitsLut[d1 + 1];
+        *buffer++ = gDigitsLut[d2];
+        *buffer++ = gDigitsLut[d2 + 1];
+        *buffer++ = gDigitsLut[d3];
+        *buffer++ = gDigitsLut[d3 + 1];
+        *buffer++ = gDigitsLut[d4];
+        *buffer++ = gDigitsLut[d4 + 1];
+        *buffer++ = gDigitsLut[d5];
+        *buffer++ = gDigitsLut[d5 + 1];
+        *buffer++ = gDigitsLut[d6];
+        *buffer++ = gDigitsLut[d6 + 1];
+        *buffer++ = gDigitsLut[d7];
+        *buffer++ = gDigitsLut[d7 + 1];
+        *buffer++ = gDigitsLut[d8];
+        *buffer++ = gDigitsLut[d8 + 1];
+    }
+    return buffer;
+}
+
+#endif // End of architecture if statement.
+
+// Signed wrappers.  The negation is done on the unsigned version because
+// doing so has defined behavior for INT_MIN.
+char *ToString(int32_t value, char *to) {
+  uint32_t un = static_cast<uint32_t>(value);
+  if (value < 0) {
+    *to++ = '-';
+    un = -un;
+  }
+  return ToString(un, to);
+}
+
+char *ToString(int64_t value, char *to) {
+  uint64_t un = static_cast<uint64_t>(value);
+  if (value < 0) {
+    *to++ = '-';
+    un = -un;
+  }
+  return ToString(un, to);
+}
+
+// No optimization for this case yet.
+char *ToString(int16_t value, char *to) {
+  return ToString((int32_t)value, to);
+}
+char *ToString(uint16_t value, char *to) {
+  return ToString((uint32_t)value, to);
+}
+
+} // namespace util
diff --git a/util/integer_to_string.hh b/util/integer_to_string.hh
new file mode 100644
index 000000000..0d975b14e
--- /dev/null
+++ b/util/integer_to_string.hh
@@ -0,0 +1,56 @@
+#ifndef UTIL_INTEGER_TO_STRING_H
+#define UTIL_INTEGER_TO_STRING_H
+#include <cstddef>
+#include <stdint.h>
+
+namespace util {
+
+/* These functions convert integers to strings and return the end pointer.
+ */
+char *ToString(uint32_t value, char *to);
+char *ToString(uint64_t value, char *to);
+
+// Implemented as wrappers to above
+char *ToString(int32_t value, char *to);
+char *ToString(int64_t value, char *to);
+
+// Calls the 32-bit versions for now.
+char *ToString(uint16_t value, char *to);
+char *ToString(int16_t value, char *to);
+
+inline char *ToString(bool value, char *to) {
+  *to++ = '0' + value;
+  return to;
+}
+
+// How many bytes to reserve in the buffer for these strings:
+// g++ 4.9.1 doesn't work with this:
+// static const std::size_t kBytes = 5;
+// So use enum.
+template <class T> struct ToStringBuf;
+template <> struct ToStringBuf<bool> {
+  enum { kBytes = 1 };
+};
+template <> struct ToStringBuf<uint16_t> {
+  enum { kBytes = 5 };
+};
+template <> struct ToStringBuf<int16_t> {
+  enum { kBytes = 6 };
+};
+template <> struct ToStringBuf<uint32_t> {
+  enum { kBytes = 10 };
+};
+template <> struct ToStringBuf<int32_t> {
+  enum { kBytes = 11 };
+};
+template <> struct ToStringBuf<uint64_t> {
+  enum { kBytes = 20 };
+};
+template <> struct ToStringBuf<int64_t> {
+  // Not a typo.  2^63 has 19 digits.
+  enum { kBytes = 20 };
+};
+
+} // namespace util
+
+#endif // UTIL_INTEGER_TO_STRING_H
diff --git a/util/integer_to_string_test.cc b/util/integer_to_string_test.cc
new file mode 100644
index 000000000..ded1ecec7
--- /dev/null
+++ b/util/integer_to_string_test.cc
@@ -0,0 +1,65 @@
+#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
+#include "util/integer_to_string.hh"
+#include "util/string_piece.hh"
+
+#define BOOST_TEST_MODULE IntegerToStringTest
+#include <boost/test/unit_test.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include <limits>
+
+namespace util {
+namespace {
+
+template <class T> void TestValue(const T value) {
+  char buf[ToStringBuf<T>::kBytes];
+  StringPiece result(buf, ToString(value, buf) - buf);
+  BOOST_REQUIRE_GE(static_cast<std::size_t>(ToStringBuf<T>::kBytes), result.size());
+  BOOST_CHECK_EQUAL(boost::lexical_cast<std::string>(value), result);
+}
+
+template <class T> void TestCorners() {
+  TestValue(std::numeric_limits<T>::min());
+  TestValue(std::numeric_limits<T>::max());
+  TestValue(static_cast<T>(0));
+  TestValue(static_cast<T>(-1));
+  TestValue(static_cast<T>(1));
+}
+
+BOOST_AUTO_TEST_CASE(Corners) {
+  TestCorners<uint16_t>();
+  TestCorners<uint32_t>();
+  TestCorners<uint64_t>();
+  TestCorners<int16_t>();
+  TestCorners<int32_t>();
+  TestCorners<int64_t>();
+}
+
+template <class T> void TestAll() {
+  for (T i = std::numeric_limits<T>::min(); i < std::numeric_limits<T>::max(); ++i) {
+    TestValue(i);
+  }
+  TestValue(std::numeric_limits<T>::max());
+}
+
+BOOST_AUTO_TEST_CASE(Short) {
+  TestAll<uint16_t>();
+  TestAll<int16_t>();
+}
+
+template <class T> void Test10s() {
+  for (T i = 1; i < std::numeric_limits<T>::max() / 10; i *= 10) {
+    TestValue(i);
+    TestValue(i - 1);
+    TestValue(i + 1);
+  }
+}
+
+BOOST_AUTO_TEST_CASE(Tens) {
+  Test10s<uint64_t>();
+  Test10s<int64_t>();
+  Test10s<uint32_t>();
+  Test10s<int32_t>();
+}
+
+}} // namespaces
diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh
index 245340ddb..f32b64ea3 100644
--- a/util/probing_hash_table.hh
+++ b/util/probing_hash_table.hh
@@ -88,7 +88,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
 #ifdef DEBUG
       assert(initialized_);
 #endif
-      for (MutableIterator i = Ideal(t);;) {
+      for (MutableIterator i = Ideal(t.GetKey());;) {
         Key got(i->GetKey());
         if (equal_(got, t.GetKey())) { out = i; return true; }
         if (equal_(got, invalid_)) {
@@ -108,7 +108,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
 #ifdef DEBUG
       assert(initialized_);
 #endif
-      for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) {
+      for (MutableIterator i(Ideal(key));;) {
         Key got(i->GetKey());
         if (equal_(got, key)) { out = i; return true; }
         if (equal_(got, invalid_)) return false;
@@ -118,7 +118,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
 
     // Like UnsafeMutableFind, but the key must be there.
     template <class Key> MutableIterator UnsafeMutableMustFind(const Key key) {
-       for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) {
+       for (MutableIterator i(Ideal(key));;) {
         Key got(i->GetKey());
         if (equal_(got, key)) { return i; }
         assert(!equal_(got, invalid_));
@@ -131,7 +131,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
 #ifdef DEBUG
       assert(initialized_);
 #endif
-      for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) {
+      for (ConstIterator i(Ideal(key));;) {
         Key got(i->GetKey());
         if (equal_(got, key)) { out = i; return true; }
         if (equal_(got, invalid_)) return false;
@@ -141,7 +141,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
 
     // Like Find but we're sure it must be there.
     template <class Key> ConstIterator MustFind(const Key key) const {
-      for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) {
+      for (ConstIterator i(Ideal(key));;) {
         Key got(i->GetKey());
         if (equal_(got, key)) { return i; }
         assert(!equal_(got, invalid_));
@@ -213,7 +213,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
       MutableIterator i;
       // Beginning can be wrap-arounds.
       for (i = begin_; !equal_(i->GetKey(), invalid_); ++i) {
-        MutableIterator ideal = Ideal(*i);
+        MutableIterator ideal = Ideal(i->GetKey());
         UTIL_THROW_IF(ideal > i && ideal <= last, Exception, "Inconsistency at position " << (i - begin_) << " should be at " << (ideal - begin_));
       }
       MutableIterator pre_gap = i;
@@ -222,7 +222,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
           pre_gap = i;
           continue;
         }
-        MutableIterator ideal = Ideal(*i);
+        MutableIterator ideal = Ideal(i->GetKey());
         UTIL_THROW_IF(ideal > i || ideal <= pre_gap, Exception, "Inconsistency at position " << (i - begin_) << " with ideal " << (ideal - begin_));
       }
     }
@@ -230,12 +230,15 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
   private:
     friend class AutoProbing<Entry, Hash, Equal>;
 
-    template <class T> MutableIterator Ideal(const T &t) {
-      return begin_ + (hash_(t.GetKey()) % buckets_);
+    MutableIterator Ideal(const Key key) {
+      return begin_ + (hash_(key) % buckets_);
+    }
+    ConstIterator Ideal(const Key key) const {
+      return begin_ + (hash_(key) % buckets_);
     }
 
     template <class T> MutableIterator UncheckedInsert(const T &t) {
-      for (MutableIterator i(Ideal(t));;) {
+      for (MutableIterator i(Ideal(t.GetKey()));;) {
         if (equal_(i->GetKey(), invalid_)) { *i = t; return i; }
         if (++i == end_) { i = begin_; }
       }
@@ -277,6 +280,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
 
     // Assumes that the key is unique.  Multiple insertions won't cause a failure, just inconsistent lookup.
     template <class T> MutableIterator Insert(const T &t) {
+      ++backend_.entries_;
       DoubleIfNeeded();
       return backend_.UncheckedInsert(t);
     }
diff --git a/util/probing_hash_table_benchmark_main.cc b/util/probing_hash_table_benchmark_main.cc
new file mode 100644
index 000000000..3e12290cf
--- /dev/null
+++ b/util/probing_hash_table_benchmark_main.cc
@@ -0,0 +1,49 @@
+#include "util/probing_hash_table.hh"
+#include "util/scoped.hh"
+#include "util/usage.hh"
+
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_int_distribution.hpp>
+
+#include <iostream>
+
+namespace util {
+namespace {
+
+struct Entry {
+  typedef uint64_t Key;
+  Key key;
+  Key GetKey() const { return key; }
+};
+
+typedef util::ProbingHashTable<Entry, util::IdentityHash> Table;
+
+void Test(uint64_t entries, uint64_t lookups, float multiplier = 1.5) {
+  std::size_t size = Table::Size(entries, multiplier);
+  scoped_malloc backing(util::CallocOrThrow(size));
+  Table table(backing.get(), size);
+  boost::random::mt19937 gen;
+  boost::random::uniform_int_distribution<> dist(std::numeric_limits<uint64_t>::min(), std::numeric_limits<uint64_t>::max());
+  double start = UserTime();
+  for (uint64_t i = 0; i < entries; ++i) {
+    Entry entry;
+    entry.key = dist(gen);
+    table.Insert(entry);
+  }
+  double inserted = UserTime();
+  bool meaningless = true;
+  for (uint64_t i = 0; i < lookups; ++i) {
+    Table::ConstIterator it;
+    meaningless ^= table.Find(dist(gen), it);
+  }
+  std::cout << meaningless << ' ' << entries << ' ' << multiplier << ' ' << (inserted - start) << ' ' << (UserTime() - inserted) / static_cast<double>(lookups) << std::endl;
+}
+
+} // namespace
+} // namespace util
+
+int main() {
+  for (uint64_t i = 1; i <= 10000000ULL; i *= 10) {
+    util::Test(i, 10000000);
+  }
+}
diff --git a/util/scoped.cc b/util/scoped.cc
index de1d9e940..84f4344b7 100644
--- a/util/scoped.cc
+++ b/util/scoped.cc
@@ -7,6 +7,8 @@
 
 namespace util {
 
+// TODO: if we're really under memory pressure, don't allocate memory to
+// display the error.
 MallocException::MallocException(std::size_t requested) throw() {
   *this << "for " << requested << " bytes ";
 }
@@ -16,10 +18,6 @@ MallocException::~MallocException() throw() {}
 namespace {
 void *InspectAddr(void *addr, std::size_t requested, const char *func_name) {
   UTIL_THROW_IF_ARG(!addr && requested, MallocException, (requested), "in " << func_name);
-  // These routines are often used for large chunks of memory where huge pages help.
-#if MADV_HUGEPAGE
-  madvise(addr, requested, MADV_HUGEPAGE);
-#endif
   return addr;
 }
 } // namespace
@@ -36,4 +34,10 @@ void scoped_malloc::call_realloc(std::size_t requested) {
   p_ = InspectAddr(std::realloc(p_, requested), requested, "realloc");
 }
 
+void AdviseHugePages(const void *addr, std::size_t size) {
+#if MADV_HUGEPAGE
+  madvise((void*)addr, size, MADV_HUGEPAGE);
+#endif
+}
+
 } // namespace util
diff --git a/util/scoped.hh b/util/scoped.hh
index c347a43cc..21e9a7566 100644
--- a/util/scoped.hh
+++ b/util/scoped.hh
@@ -104,6 +104,8 @@ template <class T> class scoped_ptr : public scoped<T, scoped_delete_forward> {
     explicit scoped_ptr(T *p = NULL) : scoped<T, scoped_delete_forward>(p) {}
 };
 
+void AdviseHugePages(const void *addr, std::size_t size);
+
 } // namespace util
 
 #endif // UTIL_SCOPED_H
diff --git a/util/stream/Jamfile b/util/stream/Jamfile
index 2e99979f5..cde0247e7 100644
--- a/util/stream/Jamfile
+++ b/util/stream/Jamfile
@@ -4,9 +4,10 @@
 #   timer-link = ;
 #}
 
-fakelib stream : chain.cc io.cc line_input.cc multi_progress.cc ..//kenutil /top//boost_thread : : : <library>/top//boost_thread ;
+fakelib stream : chain.cc rewindable_stream.cc io.cc line_input.cc multi_progress.cc ..//kenutil /top//boost_thread : : : <library>/top//boost_thread ;
 
 import testing ;
 unit-test io_test : io_test.cc stream /top//boost_unit_test_framework ;
 unit-test stream_test : stream_test.cc stream /top//boost_unit_test_framework ;
+unit-test rewindable_stream_test : rewindable_stream_test.cc stream /top//boost_unit_test_framework ;
 unit-test sort_test : sort_test.cc stream /top//boost_unit_test_framework ;
diff --git a/util/stream/block.hh b/util/stream/block.hh
index 6a70dba3e..42df13f32 100644
--- a/util/stream/block.hh
+++ b/util/stream/block.hh
@@ -72,6 +72,7 @@ class Block {
 
   private:
     friend class Link;
+    friend class RewindableStream;
 
     /**
      * Points this block's memory at NULL.
diff --git a/util/stream/chain.hh b/util/stream/chain.hh
index 0cd8c2aae..8caa1afcb 100644
--- a/util/stream/chain.hh
+++ b/util/stream/chain.hh
@@ -23,6 +23,7 @@ class ChainConfigException : public Exception {
 };
 
 class Chain;
+class RewindableStream;
 
 /**
  * Encapsulates a @ref PCQueue "producer queue" and a @ref PCQueue "consumer queue" within a @ref Chain "chain".
@@ -35,6 +36,7 @@ class ChainPosition {
   private:
     friend class Chain;
     friend class Link;
+    friend class RewindableStream;
     ChainPosition(PCQueue<Block> &in, PCQueue<Block> &out, Chain *chain, MultiProgress &progress)
       : in_(&in), out_(&out), chain_(chain), progress_(progress.Add()) {}
 
diff --git a/util/stream/io.hh b/util/stream/io.hh
index c3b53bbfe..4605a8a79 100644
--- a/util/stream/io.hh
+++ b/util/stream/io.hh
@@ -70,8 +70,8 @@ class FileBuffer {
       return PWriteAndRecycle(file_.get());
     }
 
-    PRead Source() const {
-      return PRead(file_.get());
+    PRead Source(bool discard = false) {
+      return PRead(discard ? file_.release() : file_.get(), discard);
     }
 
     uint64_t Size() const {
diff --git a/util/stream/rewindable_stream.cc b/util/stream/rewindable_stream.cc
new file mode 100644
index 000000000..c7e39231b
--- /dev/null
+++ b/util/stream/rewindable_stream.cc
@@ -0,0 +1,117 @@
+#include "util/stream/rewindable_stream.hh"
+#include "util/pcqueue.hh"
+
+namespace util {
+namespace stream {
+
+RewindableStream::RewindableStream()
+    : current_(NULL), in_(NULL), out_(NULL), poisoned_(true) {
+  // nothing
+}
+
+void RewindableStream::Init(const ChainPosition &position) {
+  UTIL_THROW_IF2(in_, "RewindableStream::Init twice");
+  in_ = position.in_;
+  out_ = position.out_;
+  poisoned_ = false;
+  progress_ = position.progress_;
+  entry_size_ = position.GetChain().EntrySize();
+  block_size_ = position.GetChain().BlockSize();
+  FetchBlock();
+  current_bl_ = &second_bl_;
+  current_ = static_cast<uint8_t*>(current_bl_->Get());
+  end_ = current_ + current_bl_->ValidSize();
+}
+
+const void *RewindableStream::Get() const {
+  return current_;
+}
+
+void *RewindableStream::Get() {
+  return current_;
+}
+
+RewindableStream &RewindableStream::operator++() {
+  assert(*this);
+  assert(current_ < end_);
+  current_ += entry_size_;
+  if (current_ == end_) {
+    // two cases: either we need to fetch the next block, or we've already
+    // fetched it before. We can check this by looking at the current_bl_
+    // pointer: if it's at the second_bl_, we need to flush and fetch a new
+    // block. Otherwise, we can just move over to the second block.
+    if (current_bl_ == &second_bl_) {
+      if (first_bl_) {
+        out_->Produce(first_bl_);
+        progress_ += first_bl_.ValidSize();
+      }
+      first_bl_ = second_bl_;
+      FetchBlock();
+    }
+    current_bl_ = &second_bl_;
+    current_ = static_cast<uint8_t *>(second_bl_.Get());
+    end_ = current_ + second_bl_.ValidSize();
+  }
+
+  if (!*current_bl_)
+  {
+    if (current_bl_ == &second_bl_ && first_bl_)
+    {
+      out_->Produce(first_bl_);
+      progress_ += first_bl_.ValidSize();
+    }
+    out_->Produce(*current_bl_);
+    poisoned_ = true;
+  }
+
+  return *this;
+}
+
+void RewindableStream::FetchBlock() {
+  // The loop is needed since it is *feasible* that we're given 0 sized but
+  // valid blocks
+  do {
+    in_->Consume(second_bl_);
+  } while (second_bl_ && second_bl_.ValidSize() == 0);
+}
+
+void RewindableStream::Mark() {
+  marked_ = current_;
+}
+
+void RewindableStream::Rewind() {
+  if (marked_ >= first_bl_.Get() && marked_ < first_bl_.ValidEnd()) {
+    current_bl_ = &first_bl_;
+    current_ = marked_;
+  } else if (marked_ >= second_bl_.Get() && marked_ < second_bl_.ValidEnd()) {
+    current_bl_ = &second_bl_;
+    current_ = marked_;
+  } else { UTIL_THROW2("RewindableStream rewound too far"); }
+}
+
+void RewindableStream::Poison() {
+  assert(!poisoned_);
+
+  // Three things: if we have a buffered first block, we need to produce it
+  // first. Then, produce the partial "current" block, and then send the
+  // poison down the chain
+
+  // if we still have a buffered first block, produce it first
+  if (current_bl_ == &second_bl_ && first_bl_) {
+    out_->Produce(first_bl_);
+    progress_ += first_bl_.ValidSize();
+  }
+
+  // send our partial block
+  current_bl_->SetValidSize(current_
+                            - static_cast<uint8_t *>(current_bl_->Get()));
+  out_->Produce(*current_bl_);
+  progress_ += current_bl_->ValidSize();
+
+  // send down the poison
+  current_bl_->SetToPoison();
+  out_->Produce(*current_bl_);
+  poisoned_ = true;
+}
+}
+}
diff --git a/util/stream/rewindable_stream.hh b/util/stream/rewindable_stream.hh
new file mode 100644
index 000000000..9ee637c99
--- /dev/null
+++ b/util/stream/rewindable_stream.hh
@@ -0,0 +1,108 @@
+#ifndef UTIL_STREAM_REWINDABLE_STREAM_H
+#define UTIL_STREAM_REWINDABLE_STREAM_H
+
+#include "util/stream/chain.hh"
+
+#include <boost/noncopyable.hpp>
+
+namespace util {
+namespace stream {
+
+/**
+ * A RewindableStream is like a Stream (but one that is only used for
+ * creating input at the start of a chain) except that it can be rewound to
+ * be able to re-write a part of the stream before it is sent. Rewinding
+ * has a limit of 2 * block_size_ - 1 in distance (it does *not* buffer an
+ * entire stream into memory, only a maximum of 2 * block_size_).
+ */
+class RewindableStream : boost::noncopyable {
+  public:
+    /**
+     * Creates an uninitialized RewindableStream. You **must** call Init()
+     * on it later!
+     */
+    RewindableStream();
+
+    /**
+     * Initializes an existing RewindableStream at a specific position in
+     * a Chain.
+     *
+     * @param position The position in the chain to get input from and
+     *  produce output on
+     */
+    void Init(const ChainPosition &position);
+
+    /**
+     * Constructs a RewindableStream at a specific position in a Chain all
+     * in one step.
+     *
+     * Equivalent to RewindableStream a(); a.Init(....);
+     */
+    explicit RewindableStream(const ChainPosition &position);
+
+    /**
+     * Gets the record at the current stream position. Const version.
+     */
+    const void *Get() const;
+
+    /**
+     * Gets the record at the current stream position.
+     */
+    void *Get();
+
+    operator bool() const { return current_; }
+
+    bool operator!() const { return !(*this); }
+
+    /**
+     * Marks the current position in the stream to be rewound to later.
+     * Note that you can only rewind back as far as 2 * block_size_ - 1!
+     */
+    void Mark();
+
+    /**
+     * Rewinds the stream back to the marked position. This will throw an
+     * exception if the marked position is too far away.
+     */
+    void Rewind();
+
+    /**
+     * Moves the stream forward to the next record. This internally may
+     * buffer a block for the purposes of rewinding.
+     */
+    RewindableStream& operator++();
+
+    /**
+     * Poisons the stream. This sends any buffered blocks down the chain
+     * and sends a poison block as well (sending at most 2 non-poison and 1
+     * poison block).
+     */
+    void Poison();
+
+  private:
+    void FetchBlock();
+
+    std::size_t entry_size_;
+    std::size_t block_size_;
+
+    uint8_t *marked_, *current_, *end_;
+
+    Block first_bl_;
+    Block second_bl_;
+    Block* current_bl_;
+
+    PCQueue<Block> *in_, *out_;
+
+    bool poisoned_;
+
+    WorkerProgress progress_;
+};
+
+inline Chain &operator>>(Chain &chain, RewindableStream &stream) {
+  stream.Init(chain.Add());
+  return chain;
+}
+
+}
+}
+#endif
diff --git a/util/stream/rewindable_stream_test.cc b/util/stream/rewindable_stream_test.cc
new file mode 100644
index 000000000..3ed87f372
--- /dev/null
+++ b/util/stream/rewindable_stream_test.cc
@@ -0,0 +1,41 @@
+#include "util/stream/io.hh"
+
+#include "util/stream/rewindable_stream.hh"
+#include "util/file.hh"
+
+#define BOOST_TEST_MODULE RewindableStreamTest
+#include <boost/test/unit_test.hpp>
+
+namespace util {
+namespace stream {
+namespace {
+
+BOOST_AUTO_TEST_CASE(RewindableStreamTest) {
+  scoped_fd in(MakeTemp("io_test_temp"));
+  for (uint64_t i = 0; i < 100000; ++i) {
+    WriteOrThrow(in.get(), &i, sizeof(uint64_t));
+  }
+  SeekOrThrow(in.get(), 0);
+
+  ChainConfig config;
+  config.entry_size = 8;
+  config.total_memory = 100;
+  config.block_count = 6;
+
+  RewindableStream s;
+  Chain chain(config);
+  chain >> Read(in.get()) >> s >> kRecycle;
+  uint64_t i = 0;
+  for (; s; ++s, ++i) {
+    BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(s.Get()));
+    if (100000UL - i == 2)
+      s.Mark();
+  }
+  BOOST_CHECK_EQUAL(100000ULL, i);
+  s.Rewind();
+  BOOST_CHECK_EQUAL(100000ULL - 2, *static_cast<const uint64_t*>(s.Get()));
+}
+
+}
+}
+}
diff --git a/util/stream/sort.hh b/util/stream/sort.hh
index a1e0a8539..1b4801ad6 100644
--- a/util/stream/sort.hh
+++ b/util/stream/sort.hh
@@ -25,6 +25,7 @@
 #include "util/stream/timer.hh"
 
 #include "util/file.hh"
+#include "util/fixed_array.hh"
 #include "util/scoped.hh"
 #include "util/sized_iterator.hh"
 
@@ -544,6 +545,54 @@ template <class Compare, class Combine> uint64_t BlockingSort(Chain &chain, cons
   return size;
 }
 
+/**
+ * Represents an @ref util::FixedArray "array" capable of storing @ref util::stream::Sort "Sort" objects.
+ *
+ * In the anticipated use case, an instance of this class will maintain one @ref util::stream::Sort "Sort" object
+ * for each n-gram order (ranging from 1 up to the maximum n-gram order being processed).
+ * Use in this manner would enable the n-grams each n-gram order to be sorted, in parallel.
+ *
+ * @tparam Compare An @ref Comparator "ngram comparator" to use during sorting.
+ */
+template <class Compare, class Combine = NeverCombine> class Sorts : public FixedArray<Sort<Compare, Combine> > {
+  private:
+    typedef Sort<Compare, Combine> S;
+    typedef FixedArray<S> P;
+
+  public:
+    /**
+     * Constructs, but does not initialize.
+     *
+     * @ref util::FixedArray::Init() "Init" must be called before use.
+     *
+     * @see util::FixedArray::Init()
+     */
+    Sorts() {}
+
+    /**
+     * Constructs an @ref util::FixedArray "array" capable of storing a fixed number of @ref util::stream::Sort "Sort" objects.
+     *
+     * @param number The maximum number of @ref util::stream::Sort "sorters" that can be held by this @ref util::FixedArray "array"
+     * @see util::FixedArray::FixedArray()
+     */
+    explicit Sorts(std::size_t number) : FixedArray<Sort<Compare, Combine> >(number) {}
+
+    /**
+     * Constructs a new @ref util::stream::Sort "Sort" object which is stored in this @ref util::FixedArray "array".
+     *
+     * The new @ref util::stream::Sort "Sort" object is constructed using the provided @ref util::stream::SortConfig "SortConfig" and @ref Comparator "ngram   comparator";
+     * once constructed, a new worker @ref util::stream::Thread "thread" (owned by the @ref util::stream::Chain "chain") will sort the n-gram data stored
+     * in the @ref util::stream::Block "blocks" of the provided @ref util::stream::Chain "chain".
+     *
+     * @see util::stream::Sort::Sort()
+     * @see util::stream::Chain::operator>>()
+     */
+    void push_back(util::stream::Chain &chain, const util::stream::SortConfig &config, const Compare &compare = Compare(), const Combine &combine = Combine()) {
+      new (P::end()) S(chain, config, compare, combine); // use "placement new" syntax to initalize S in an already-allocated memory location
+      P::Constructed();
+    }
+};
+
 } // namespace stream
 } // namespace util
 
diff --git a/util/tempfile.hh b/util/tempfile.hh
index 9b872a27e..9c28346fc 100644
--- a/util/tempfile.hh
+++ b/util/tempfile.hh
@@ -10,13 +10,14 @@
 
 #if defined(_WIN32) || defined(_WIN64)
 #include <windows.h>
+#else
+#include <unistd.h>
 #endif
 
 #include <boost/filesystem.hpp>
 #include <boost/noncopyable.hpp>
 
 #include "util/exception.hh"
-#include "util/unistd.hh"
 
 namespace util
 {
diff --git a/util/unistd.hh b/util/unistd.hh
deleted file mode 100644
index f99be592a..000000000
--- a/util/unistd.hh
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef UTIL_UNISTD_H
-#define UTIL_UNISTD_H
-
-#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
-
-// Windows doesn't define <unistd.h>
-//
-// So we define what we need here instead:
-//
-#define STDIN_FILENO=0
-#define STDOUT_FILENO=1
-
-
-#else // Huzzah for POSIX!
-
-#include <unistd.h>
-
-#endif
-
-
-
-#endif // UTIL_UNISTD_H
diff --git a/util/usage.cc b/util/usage.cc
index f2b661014..5f66b17d2 100644
--- a/util/usage.cc
+++ b/util/usage.cc
@@ -135,6 +135,16 @@ double WallTime() {
   return Subtract(GetWall(), kRecordStart.Started());
 }
 
+double UserTime() {
+#if !defined(_WIN32) && !defined(_WIN64)
+  struct rusage usage;
+  if (getrusage(RUSAGE_SELF, &usage))
+    return 0.0;
+  return DoubleSec(usage.ru_utime);
+#endif
+  return 0.0;
+}
+
 void PrintUsage(std::ostream &out) {
 #if !defined(_WIN32) && !defined(_WIN64)
   // Linux doesn't set memory usage in getrusage :-(
diff --git a/util/usage.hh b/util/usage.hh
index 85bd2119a..dff81b59d 100644
--- a/util/usage.hh
+++ b/util/usage.hh
@@ -9,6 +9,8 @@ namespace util {
 // Time in seconds since process started.  Zero on unsupported platforms.
 double WallTime();
 
+double UserTime();
+
 void PrintUsage(std::ostream &to);
 
 // Determine how much physical memory there is.  Return 0 on failure.
@@ -16,5 +18,6 @@ uint64_t GuessPhysicalMemory();
 
 // Parse a size like unix sort.  Sadly, this means the default multiplier is K.
 uint64_t ParseSize(const std::string &arg);
+
 } // namespace util
 #endif // UTIL_USAGE_H
author	Kenneth Heafield <github@kheafield.com>	2015-05-19 22:27:30 +0300
committer	Kenneth Heafield <github@kheafield.com>	2015-05-19 22:27:30 +0300
commit	a70d37e46fce323f7a9720e3a621f35d19e4ac9f (patch)
tree	d45651f2d44ba7c373f2a198611dca3b196e36d9 /util
parent	90309aebfa0184ac611725520443b34c3331794b (diff)