Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2011-11-17 23:12:19 +0400
committerKenneth Heafield <github@kheafield.com>2011-11-17 23:12:19 +0400
commit974a708dddab2b4c6836a176d95f8455d0ed5f51 (patch)
tree0916139ae18b032250dc84a6703dabbedc7dc8ac /util
parent17cec851dfcf5712c66e6304308273b62dc532e8 (diff)
Updated kenlm 96ef3f2c11.
Invalidates old gcc and 32-bit formats, replacing these with one consistent format: 64-bit new gcc. Backwards compatible with these files.
Diffstat (limited to 'util')
-rw-r--r--util/file.cc19
-rw-r--r--util/file.hh2
-rw-r--r--util/file_piece.cc10
-rw-r--r--util/file_piece_test.cc41
-rw-r--r--util/getopt.c77
-rw-r--r--[-rwxr-xr-x]util/getopt.hh191
-rw-r--r--util/have.hh18
-rw-r--r--util/mmap.cc11
-rw-r--r--util/portability.cc74
-rw-r--r--util/portability.hh115
-rw-r--r--util/probing_hash_table.hh33
-rw-r--r--util/probing_hash_table_test.cc27
-rw-r--r--util/sorted_uniform.hh93
-rw-r--r--util/sorted_uniform_test.cc84
-rw-r--r--util/tokenize_piece.hh64
-rw-r--r--util/tokenize_piece_test.cc50
16 files changed, 278 insertions, 631 deletions
diff --git a/util/file.cc b/util/file.cc
index 81d1d490c..77922cfad 100644
--- a/util/file.cc
+++ b/util/file.cc
@@ -1,7 +1,6 @@
#include "util/file.hh"
#include "util/exception.hh"
-#include "util/portability.hh"
#include <cstdlib>
#include <cstdio>
@@ -14,6 +13,7 @@
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
+#include <io.h>
#endif
namespace util {
@@ -43,11 +43,28 @@ int OpenReadOrThrow(const char *name) {
}
uint64_t SizeFile(int fd) {
+#if defined(_WIN32) || defined(_WIN64)
+ __int64 ret = _filelengthi64(fd);
+ return (ret == -1) ? kBadSize : ret;
+#else
struct stat sb;
if (fstat(fd, &sb) == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
return sb.st_size;
+#endif
+}
+
+void ResizeOrThrow(int fd, uint64_t to) {
+#if defined(_WIN32) || defined(_WIN64)
+ UTIL_THROW_IF(_chsize_s(fd, to), ErrnoException, "Resizing to " << to << " bytes failed");
+#else
+ UTIL_THROW_IF(ftruncate(fd, to), ErrnoException, "Resizing to " << to << " bytes failed");
+#endif
}
+#ifdef WIN32
+typedef int ssize_t;
+#endif
+
void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
uint8_t *to = static_cast<uint8_t*>(to_void);
while (amount) {
diff --git a/util/file.hh b/util/file.hh
index b820ba351..04023dec0 100644
--- a/util/file.hh
+++ b/util/file.hh
@@ -71,6 +71,8 @@ int OpenReadOrThrow(const char *name);
const uint64_t kBadSize = (uint64_t)-1;
uint64_t SizeFile(int fd);
+void ResizeOrThrow(int fd, uint64_t to);
+
void ReadOrThrow(int fd, void *to, std::size_t size);
std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount);
diff --git a/util/file_piece.cc b/util/file_piece.cc
index 43b578b65..f0d49d555 100644
--- a/util/file_piece.cc
+++ b/util/file_piece.cc
@@ -3,7 +3,9 @@
#include "util/exception.hh"
#include "util/file.hh"
#include "util/mmap.hh"
-#include "util/portability.hh"
+#ifdef WIN32
+#include <io.h>
+#endif // WIN32
#include <iostream>
#include <string>
@@ -130,7 +132,7 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s
namespace {
void ParseNumber(const char *begin, char *&end, float &out) {
-#ifdef sun
+#if defined(sun) || defined(WIN32)
out = static_cast<float>(strtod(begin, &end));
#else
out = strtof(begin, &end);
@@ -255,6 +257,10 @@ void FilePiece::TransitionToRead() {
#endif
}
+#ifdef WIN32
+typedef int ssize_t;
+#endif
+
void FilePiece::ReadShift() {
assert(fallback_to_read_);
// Bytes [data_.begin(), position_) have been consumed.
diff --git a/util/file_piece_test.cc b/util/file_piece_test.cc
index dc9ec7e7c..f912e18af 100644
--- a/util/file_piece_test.cc
+++ b/util/file_piece_test.cc
@@ -1,3 +1,4 @@
+// Tests might fail if you have creative characters in your path. Sue me.
#include "util/file_piece.hh"
#include "util/scoped.hh"
@@ -14,10 +15,18 @@
namespace util {
namespace {
+std::string FileLocation() {
+ if (boost::unit_test::framework::master_test_suite().argc < 2) {
+ return "file_piece.cc";
+ }
+ std::string ret(boost::unit_test::framework::master_test_suite().argv[1]);
+ return ret;
+}
+
/* mmap implementation */
BOOST_AUTO_TEST_CASE(MMapReadLine) {
- std::fstream ref("file_piece.cc", std::ios::in);
- FilePiece test("file_piece.cc", NULL, 1);
+ std::fstream ref(FileLocation().c_str(), std::ios::in);
+ FilePiece test(FileLocation().c_str(), NULL, 1);
std::string ref_line;
while (getline(ref, ref_line)) {
StringPiece test_line(test.ReadLine());
@@ -35,9 +44,13 @@ BOOST_AUTO_TEST_CASE(MMapReadLine) {
*/
/* read() implementation */
BOOST_AUTO_TEST_CASE(StreamReadLine) {
- std::fstream ref("file_piece.cc", std::ios::in);
+ std::fstream ref(FileLocation().c_str(), std::ios::in);
+
+ std::string popen_args = "cat \"";
+ popen_args += FileLocation();
+ popen_args += '"';
- FILE *catter = popen("cat file_piece.cc", "r");
+ FILE *catter = popen(popen_args.c_str(), "r");
BOOST_REQUIRE(catter);
FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
@@ -58,10 +71,15 @@ BOOST_AUTO_TEST_CASE(StreamReadLine) {
// gzip file
BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
- std::fstream ref("file_piece.cc", std::ios::in);
+ std::string location(FileLocation());
+ std::fstream ref(location.c_str(), std::ios::in);
- BOOST_REQUIRE_EQUAL(0, system("gzip <file_piece.cc >file_piece.cc.gz"));
- FilePiece test("file_piece.cc.gz", NULL, 1);
+ std::string command("gzip <\"");
+ command += location + "\" >\"" + location + "\".gz";
+
+ BOOST_REQUIRE_EQUAL(0, system(command.c_str()));
+ FilePiece test((location + ".gz").c_str(), NULL, 1);
+ unlink((location + ".gz").c_str());
std::string ref_line;
while (getline(ref, ref_line)) {
StringPiece test_line(test.ReadLine());
@@ -77,12 +95,15 @@ BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
// the test.
#ifndef __APPLE__
BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
- std::fstream ref("file_piece.cc", std::ios::in);
+ std::fstream ref(FileLocation().c_str(), std::ios::in);
+
+ std::string command("gzip <\"");
+ command += FileLocation() + "\"";
- FILE * catter = popen("gzip <file_piece.cc", "r");
+ FILE * catter = popen(command.c_str(), "r");
BOOST_REQUIRE(catter);
- FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
+ FilePiece test(dup(fileno(catter)), "file_piece.cc.gz", NULL, 1);
std::string ref_line;
while (getline(ref, ref_line)) {
StringPiece test_line(test.ReadLine());
diff --git a/util/getopt.c b/util/getopt.c
new file mode 100644
index 000000000..5dfe545de
--- /dev/null
+++ b/util/getopt.c
@@ -0,0 +1,77 @@
+/*
+POSIX getopt for Windows
+
+AT&T Public License
+
+Code given out at the 1985 UNIFORUM conference in Dallas.
+*/
+
+#ifndef __GNUC__
+
+#include "getopt.hh"
+#include <stdio.h>
+
+#define NULL 0
+#define EOF (-1)
+#define ERR(s, c) if(opterr){\
+ char errbuf[2];\
+ errbuf[0] = c; errbuf[1] = '\n';\
+ fputs(argv[0], stderr);\
+ fputs(s, stderr);\
+ fputc(c, stderr);}
+ //(void) write(2, argv[0], (unsigned)strlen(argv[0]));\
+ //(void) write(2, s, (unsigned)strlen(s));\
+ //(void) write(2, errbuf, 2);}
+
+int opterr = 1;
+int optind = 1;
+int optopt;
+char *optarg;
+
+int
+getopt(argc, argv, opts)
+int argc;
+char **argv, *opts;
+{
+ static int sp = 1;
+ register int c;
+ register char *cp;
+
+ if(sp == 1)
+ if(optind >= argc ||
+ argv[optind][0] != '-' || argv[optind][1] == '\0')
+ return(EOF);
+ else if(strcmp(argv[optind], "--") == NULL) {
+ optind++;
+ return(EOF);
+ }
+ optopt = c = argv[optind][sp];
+ if(c == ':' || (cp=strchr(opts, c)) == NULL) {
+ ERR(": illegal option -- ", c);
+ if(argv[optind][++sp] == '\0') {
+ optind++;
+ sp = 1;
+ }
+ return('?');
+ }
+ if(*++cp == ':') {
+ if(argv[optind][sp+1] != '\0')
+ optarg = &argv[optind++][sp+1];
+ else if(++optind >= argc) {
+ ERR(": option requires an argument -- ", c);
+ sp = 1;
+ return('?');
+ } else
+ optarg = argv[optind++];
+ sp = 1;
+ } else {
+ if(argv[optind][++sp] == '\0') {
+ sp = 1;
+ optind++;
+ }
+ optarg = NULL;
+ }
+ return(c);
+}
+
+#endif /* __GNUC__ */ \ No newline at end of file
diff --git a/util/getopt.hh b/util/getopt.hh
index 33a706833..6ad977324 100755..100644
--- a/util/getopt.hh
+++ b/util/getopt.hh
@@ -1,190 +1,33 @@
+/*
+POSIX getopt for Windows
-/* getopt.h */
-/* Declarations for getopt.
- Copyright (C) 1989-1994, 1996-1999, 2001 Free Software
- Foundation, Inc. This file is part of the GNU C Library.
+AT&T Public License
- The GNU C Library is free software; you can redistribute
- it and/or modify it under the terms of the GNU Lesser
- General Public License as published by the Free Software
- Foundation; either version 2.1 of the License, or
- (at your option) any later version.
+Code given out at the 1985 UNIFORUM conference in Dallas.
+*/
- The GNU C Library is distributed in the hope that it will
- be useful, but WITHOUT ANY WARRANTY; without even the
- implied warranty of MERCHANTABILITY or FITNESS FOR A
- PARTICULAR PURPOSE. See the GNU Lesser General Public
- License for more details.
-
- You should have received a copy of the GNU Lesser General
- Public License along with the GNU C Library; if not, write
- to the Free Software Foundation, Inc., 59 Temple Place,
- Suite 330, Boston, MA 02111-1307 USA. */
-
-
-
-
-#ifndef _GETOPT_H
-
-#ifndef __need_getopt
-# define _GETOPT_H 1
+#ifdef __GNUC__
+#include <getopt.h>
#endif
+#ifndef __GNUC__
-/* If __GNU_LIBRARY__ is not already defined, either we are being used
- standalone, or this is the first header included in the source file.
- If we are being used with glibc, we need to include <features.h>, but
- that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
- not defined, include <ctype.h>, which will pull in <features.h> for us
- if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
- doesn't flood the namespace with stuff the way some other headers do.) */
-#if !defined __GNU_LIBRARY__
-# include <ctype.h>
-#endif
+#ifndef _WINGETOPT_H_
+#define _WINGETOPT_H_
-#ifdef __cplusplus
+#ifdef __cplusplus
extern "C" {
#endif
-int getopt (int argc, char *const *argv, const char *optstring);
-
-/* For communication from `getopt' to the caller.
- When `getopt' finds an option that takes an argument,
- the argument value is returned here.
- Also, when `ordering' is RETURN_IN_ORDER,
- each non-option ARGV-element is returned here. */
-
-extern char *optarg;
-
-/* Index in ARGV of the next element to be scanned.
- This is used for communication to and from the caller
- and for communication between successive calls to `getopt'.
-
- On entry to `getopt', zero means this is the first call; initialize.
-
- When `getopt' returns -1, this is the index of the first of the
- non-option elements that the caller should itself scan.
-
- Otherwise, `optind' communicates from one call to the next
- how much of ARGV has been scanned so far. */
-
-extern int optind;
-
-/* Callers store zero here to inhibit the error message `getopt' prints
- for unrecognized options. */
-
extern int opterr;
-
-/* Set to an option character which was unrecognized. */
-
+extern int optind;
extern int optopt;
+extern char *optarg;
+extern int getopt(int argc, char **argv, char *opts);
-#ifndef __need_getopt
-/* Describe the long-named options requested by the application.
- The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
- of `struct option' terminated by an element containing a name which is
- zero.
-
- The field `has_arg' is:
- no_argument (or 0) if the option does not take an argument,
- required_argument (or 1) if the option requires an argument,
- optional_argument (or 2) if the option takes an optional argument.
-
- If the field `flag' is not NULL, it points to a variable that is set
- to the value given in the field `val' when the option is found, but
- left unchanged if the option is not found.
-
- To have a long-named option do something other than set an `int' to
- a compiled-in constant, such as set a value from `optarg', set the
- option's `flag' field to zero and its `val' field to a nonzero
- value (the equivalent single-letter option character, if there is
- one). For long options that have a zero `flag' field, `getopt'
- returns the contents of the `val' field. */
-
-struct option
-{
-# if (defined __STDC__ && __STDC__) || defined __cplusplus
- const char *name;
-# else
- char *name;
-# endif
- /* has_arg can't be an enum because some compilers complain about
- type mismatches in all the code that assumes it is an int. */
- int has_arg;
- int *flag;
- int val;
-};
-
-/* Names for the values of the `has_arg' field of `struct option'. */
-
-# define no_argument 0
-# define required_argument 1
-# define optional_argument 2
-#endif /* need getopt */
-
-
-/* Get definitions and prototypes for functions to process the
- arguments in ARGV (ARGC of them, minus the program name) for
- options given in OPTS.
-
- Return the option character from OPTS just read. Return -1 when
- there are no more options. For unrecognized options, or options
- missing arguments, `optopt' is set to the option letter, and '?' is
- returned.
-
- The OPTS string is a list of characters which are recognized option
- letters, optionally followed by colons, specifying that that letter
- takes an argument, to be placed in `optarg'.
-
- If a letter in OPTS is followed by two colons, its argument is
- optional. This behavior is specific to the GNU `getopt'.
-
- The argument `--' causes premature termination of argument
- scanning, explicitly telling `getopt' that there are no more
- options.
-
- If OPTS begins with `--', then non-option arguments are treated as
- arguments to the option '\0'. This behavior is specific to the GNU
- `getopt'. */
-
-#if (defined __STDC__ && __STDC__) || defined __cplusplus
-# ifdef __GNU_LIBRARY__
-/* Many other libraries have conflicting prototypes for getopt, with
- differences in the consts, in stdlib.h. To avoid compilation
- errors, only prototype getopt for the GNU C library. */
-extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
-# else /* not __GNU_LIBRARY__ */
-//extern int getopt ();
-# endif /* __GNU_LIBRARY__ */
-
-# ifndef __need_getopt
-extern int getopt_long (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind);
-extern int getopt_long_only (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind);
-
-/* Internal only. Users should not call this directly. */
-extern int _getopt_internal (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind,
- int __long_only);
-# endif
-#else /* not __STDC__ */
-extern int getopt ();
-# ifndef __need_getopt
-extern int getopt_long ();
-extern int getopt_long_only ();
-
-extern int _getopt_internal ();
-# endif
-#endif /* __STDC__ */
-
-#ifdef __cplusplus
+#ifdef __cplusplus
}
#endif
-/* Make sure we later can get all the definitions and declarations. */
-#undef __need_getopt
+#endif /* _GETOPT_H_ */
+#endif /* __GNUC__ */
-#endif /* getopt.h */
diff --git a/util/have.hh b/util/have.hh
index 9e1fc20cd..aca8c6264 100644
--- a/util/have.hh
+++ b/util/have.hh
@@ -1,9 +1,21 @@
-/* This ties kenlm's config into Moses's build system. If you are using kenlm
- * outside Moses, see http://kheafield.com/code/kenlm/developers/ .
- */
+/* Optional packages. You might want to integrate this with your build system e.g. config.h from ./configure. */
#ifndef UTIL_HAVE__
#define UTIL_HAVE__
+#ifndef HAVE_ZLIB
#define HAVE_ZLIB
+#endif
+
+#ifndef HAVE_ICU
+//#define HAVE_ICU
+#endif
+
+#ifndef HAVE_BOOST
+//#define HAVE_BOOST
+#endif
+
+#ifndef HAVE_THREADS
+//#define HAVE_THREADS
+#endif
#endif // UTIL_HAVE__
diff --git a/util/mmap.cc b/util/mmap.cc
index 3dfe0ab2c..d3a2526fa 100644
--- a/util/mmap.cc
+++ b/util/mmap.cc
@@ -13,13 +13,14 @@
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <stdlib.h>
+
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
+#include <io.h>
#else
#include <sys/mman.h>
#endif
-#include <stdlib.h>
-#include <unistd.h>
namespace util {
@@ -102,7 +103,7 @@ void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int
int protectM = for_write ? FILE_MAP_WRITE : FILE_MAP_READ;
HANDLE hMapping = CreateFileMapping((HANDLE)_get_osfhandle(fd), NULL, protectC, 0, size + offset, NULL);
UTIL_THROW_IF(!hMapping, ErrnoException, "CreateFileMapping failed");
- ret = MapViewOfFile(hMapping, protectM, 0, offset, size);
+ LPVOID ret = MapViewOfFile(hMapping, protectM, 0, offset, size);
CloseHandle(hMapping);
UTIL_THROW_IF(!ret, ErrnoException, "MapViewOfFile failed");
#else
@@ -159,8 +160,8 @@ void *MapAnonymous(std::size_t size) {
}
void *MapZeroedWrite(int fd, std::size_t size) {
- UTIL_THROW_IF(-1 == ftruncate(fd, 0), ErrnoException, "ftruncate on fd " << fd << " to 0 failed");
- UTIL_THROW_IF(-1 == ftruncate(fd, size), ErrnoException, "ftruncate on fd " << fd << " to " << size << " failed");
+ ResizeOrThrow(fd, 0);
+ ResizeOrThrow(fd, size);
return MapOrThrow(size, true, kFileFlags, false, fd, 0);
}
diff --git a/util/portability.cc b/util/portability.cc
deleted file mode 100644
index 2efd74cba..000000000
--- a/util/portability.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-
-#include <stdlib.h>
-#include <errno.h>
-#include "util/portability.hh"
-
-#ifdef WIN32
-
-int RUSAGE_SELF = 0;
-
-int sysconf(int) { return 0; }
-int msync(void*, int, int) { return 0; }
-int munmap(void *, int) { return 0; }
-void *mmap(void*, int, int, int, FD, OFF_T) { return 0; }
-int write(int, const void *, int) {return 0; }
-
-//FILE *popen(const char*, const char*) { return 0; }
-//int pclose(FILE *) { return 0; }
-int close(FD fd) { return 0; }
-
-
-// to be implemented by boost
-int mkdtemp(const char*) { return 0; }
-
-// done
-long lrint(float x)
-{
- long ret = (long) x;
- return ret;
-}
-
-float strtof(const char *begin, char **end)
-{
- double ret = strtod(begin, end);
- return (float) ret;
-}
-
-
-int ftruncate (FD hfile, unsigned int size)
-{
- unsigned int curpos;
- /*
- HANDLE hfile;
-
- if (fd < 0)
- {
- errno = EBADF;
- return -1;
- }
-
- hfile = (HANDLE) _get_osfhandle (fd);
- */
- curpos = SetFilePointer (hfile, 0, NULL, FILE_CURRENT);
- if (curpos == ~0
- || SetFilePointer (hfile, size, NULL, FILE_BEGIN) == ~0
- || !SetEndOfFile (hfile))
- {
- int error = GetLastError ();
- switch (error)
- {
- case ERROR_INVALID_HANDLE:
- errno = EBADF;
- break;
- default:
- errno = EIO;
- break;
- }
- return -1;
- }
- return 0;
-}
-
-#endif
-
-
diff --git a/util/portability.hh b/util/portability.hh
deleted file mode 100644
index acbc01922..000000000
--- a/util/portability.hh
+++ /dev/null
@@ -1,115 +0,0 @@
-
-#pragma once
-
-#include <assert.h>
-#include <stdint.h>
-
-#ifdef WIN32
-
-#include <windows.h>
-#include <direct.h>
-#include <io.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/stat.h>
-#include "util/getopt.hh"
-
-#undef max
-#undef min
-
-typedef HANDLE FD;
-
-const FD kBadFD = INVALID_HANDLE_VALUE;
-
-typedef int ssize_t;
-
-#define _SC_PAGE_SIZE 1
-#define MS_SYNC 1
-
-int sysconf(int);
-int msync(void*, int, int);
-int ftruncate(FD, unsigned int);
-
-long lrint(float);
-
-//inline int getrusage(int, struct rusage*) { return 0; }
-//extern int RUSAGE_SELF;
-
-typedef __int64 OFF_T;
-//#define OFF_T __int64
-
-#ifndef S_ISDIR
-#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
-#endif
-
-#ifndef S_ISREG
-#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
-#endif
-
-int mkdtemp(const char*);
-int munmap(void *, int);
-void *mmap(void*, int, int, int, FD, OFF_T);
-
-#define PROT_READ 1
-#define PROT_WRITE 1
-#define MAP_FAILED (void*) 0x1
-#define MAP_SHARED 1
-#define MAP_ANON 1
-#define MAP_PRIVATE 1
-#define S_IRUSR 1
-#define S_IROTH 1
-#define S_IRGRP 1
-
-int write(int, const void *, int);
-#define S_IRUSR 1
-#define S_IWUSR 1
-
-//const char *strerror_r(int, const char *buf, int);
-
-float strtof(const char *begin, char **end);
-//FILE *popen(const char*, const char*);
-//int pclose(FILE *);
-int close(FD fd);
-
-#define dup(x) _dup(x)
-#define rmdir(x) _rmdir(x)
-#define strerror_r(errNum, buffer, numberOfElements) strerror_s(buffer, numberOfElements);
-
-#else // assume UNIX OS
-
-#include <stdint.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-typedef int FD;
-const FD kBadFD = -1;
-
-typedef off_t OFF_T;
-
-#endif
-
-#ifdef __GNUC__
-#define UTIL_FUNC_NAME __PRETTY_FUNCTION__
-#else
-#ifdef _WIN32
-#define UTIL_FUNC_NAME __FUNCTION__
-#else
-#define UTIL_FUNC_NAME NULL
-#endif
-#endif
-
-/* Bit-level packing routines */
-#ifdef __APPLE__
- #include <architecture/byte_order.h>
-#elif __linux__
- #include <endian.h>
-#elif WIN32
- // nothing
-#else
- #include <arpa/nameser_compat.h>
-#endif
-
diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh
index 8122d69c5..f466cebc9 100644
--- a/util/probing_hash_table.hh
+++ b/util/probing_hash_table.hh
@@ -18,27 +18,33 @@ class ProbingSizeException : public Exception {
~ProbingSizeException() throw() {}
};
+// std::identity is an SGI extension :-(
+struct IdentityHash {
+ template <class T> T operator()(T arg) const { return arg; }
+};
+
/* Non-standard hash table
* Buckets must be set at the beginning and must be greater than maximum number
- * of elements, else an infinite loop happens.
+ * of elements, else it throws ProbingSizeException.
* Memory management and initialization is externalized to make it easier to
* serialize these to disk and load them quickly.
* Uses linear probing to find value.
* Only insert and lookup operations.
*/
-template <class PackingT, class HashT, class EqualT = std::equal_to<typename PackingT::Key> > class ProbingHashTable {
+template <class EntryT, class HashT, class EqualT = std::equal_to<typename EntryT::Key> > class ProbingHashTable {
public:
- typedef PackingT Packing;
- typedef typename Packing::Key Key;
- typedef typename Packing::MutableIterator MutableIterator;
- typedef typename Packing::ConstIterator ConstIterator;
-
+ typedef EntryT Entry;
+ typedef typename Entry::Key Key;
+ typedef const Entry *ConstIterator;
+ typedef Entry *MutableIterator;
typedef HashT Hash;
typedef EqualT Equal;
+ public:
static std::size_t Size(std::size_t entries, float multiplier) {
- return std::max(entries + 1, static_cast<std::size_t>(multiplier * static_cast<float>(entries))) * Packing::kBytes;
+ std::size_t buckets = std::max(entries + 1, static_cast<std::size_t>(multiplier * static_cast<float>(entries)));
+ return buckets * sizeof(Entry);
}
// Must be assigned to later.
@@ -49,9 +55,9 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac
{}
ProbingHashTable(void *start, std::size_t allocated, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal())
- : begin_(Packing::FromVoid(start)),
- buckets_(allocated / Packing::kBytes),
- end_(begin_ + (allocated / Packing::kBytes)),
+ : begin_(reinterpret_cast<MutableIterator>(start)),
+ buckets_(allocated / sizeof(Entry)),
+ end_(begin_ + buckets_),
invalid_(invalid),
hash_(hash_func),
equal_(equal_func),
@@ -62,11 +68,10 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac
{}
template <class T> MutableIterator Insert(const T &t) {
- if (++entries_ >= buckets_)
- UTIL_THROW(ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
#ifdef DEBUG
assert(initialized_);
#endif
+ UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) {
if (equal_(i->GetKey(), invalid_)) { *i = t; return i; }
if (++i == end_) { i = begin_; }
@@ -84,7 +89,7 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac
if (equal_(got, key)) { out = i; return true; }
if (equal_(got, invalid_)) return false;
if (++i == end_) i = begin_;
- }
+ }
}
template <class Key> bool Find(const Key key, ConstIterator &out) const {
diff --git a/util/probing_hash_table_test.cc b/util/probing_hash_table_test.cc
index ff2f5af31..ef68e5f22 100644
--- a/util/probing_hash_table_test.cc
+++ b/util/probing_hash_table_test.cc
@@ -1,6 +1,6 @@
#include "util/probing_hash_table.hh"
-#include "util/key_value_packing.hh"
+#include <stdint.h>
#define BOOST_TEST_MODULE ProbingHashTableTest
#include <boost/test/unit_test.hpp>
@@ -9,17 +9,34 @@
namespace util {
namespace {
-typedef AlignedPacking<char, uint64_t> Packing;
-typedef ProbingHashTable<Packing, boost::hash<char> > Table;
+struct Entry {
+ unsigned char key;
+ typedef unsigned char Key;
+
+ unsigned char GetKey() const {
+ return key;
+ }
+
+ uint64_t GetValue() const {
+ return value;
+ }
+
+ uint64_t value;
+};
+
+typedef ProbingHashTable<Entry, boost::hash<unsigned char> > Table;
BOOST_AUTO_TEST_CASE(simple) {
char mem[Table::Size(10, 1.2)];
memset(mem, 0, sizeof(mem));
Table table(mem, sizeof(mem));
- Packing::ConstIterator i = Packing::ConstIterator();
+ const Entry *i = NULL;
BOOST_CHECK(!table.Find(2, i));
- table.Insert(Packing::Make(3, 328920));
+ Entry to_ins;
+ to_ins.key = 3;
+ to_ins.value = 328920;
+ table.Insert(to_ins);
BOOST_REQUIRE(table.Find(3, i));
BOOST_CHECK_EQUAL(3, i->GetKey());
BOOST_CHECK_EQUAL(static_cast<uint64_t>(328920), i->GetValue());
diff --git a/util/sorted_uniform.hh b/util/sorted_uniform.hh
index 0391189f0..7700d9e64 100644
--- a/util/sorted_uniform.hh
+++ b/util/sorted_uniform.hh
@@ -122,99 +122,6 @@ template <class Iterator, class Accessor> Iterator BinaryBelow(
return begin - 1;
}
-// To use this template, you need to define a Pivot function to match Key.
-template <class PackingT> class SortedUniformMap {
- public:
- typedef PackingT Packing;
- typedef typename Packing::ConstIterator ConstIterator;
- typedef typename Packing::MutableIterator MutableIterator;
-
- struct Accessor {
- public:
- typedef typename Packing::Key Key;
- const Key &operator()(const ConstIterator &i) const { return i->GetKey(); }
- Key &operator()(const MutableIterator &i) const { return i->GetKey(); }
- };
-
- // Offer consistent API with probing hash.
- static std::size_t Size(std::size_t entries, float /*ignore*/ = 0.0) {
- return sizeof(uint64_t) + entries * Packing::kBytes;
- }
-
- SortedUniformMap()
-#ifdef DEBUG
- : initialized_(false), loaded_(false)
-#endif
- {}
-
- SortedUniformMap(void *start, std::size_t /*allocated*/) :
- begin_(Packing::FromVoid(reinterpret_cast<uint64_t*>(start) + 1)),
- end_(begin_), size_ptr_(reinterpret_cast<uint64_t*>(start))
-#ifdef DEBUG
- , initialized_(true), loaded_(false)
-#endif
- {}
-
- void LoadedBinary() {
-#ifdef DEBUG
- assert(initialized_);
- assert(!loaded_);
- loaded_ = true;
-#endif
- // Restore the size.
- end_ = begin_ + *size_ptr_;
- }
-
- // Caller responsible for not exceeding specified size. Do not call after FinishedInserting.
- template <class T> void Insert(const T &t) {
-#ifdef DEBUG
- assert(initialized_);
- assert(!loaded_);
-#endif
- *end_ = t;
- ++end_;
- }
-
- void FinishedInserting() {
-#ifdef DEBUG
- assert(initialized_);
- assert(!loaded_);
- loaded_ = true;
-#endif
- std::sort(begin_, end_);
- *size_ptr_ = (end_ - begin_);
- }
-
- // Don't use this to change the key.
- template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
-#ifdef DEBUG
- assert(initialized_);
- assert(loaded_);
-#endif
- return SortedUniformFind<MutableIterator, Accessor, Pivot64>(begin_, end_, key, out);
- }
-
- // Do not call before FinishedInserting.
- template <class Key> bool Find(const Key key, ConstIterator &out) const {
-#ifdef DEBUG
- assert(initialized_);
- assert(loaded_);
-#endif
- return SortedUniformFind<ConstIterator, Accessor, Pivot64>(Accessor(), ConstIterator(begin_), ConstIterator(end_), key, out);
- }
-
- ConstIterator begin() const { return begin_; }
- ConstIterator end() const { return end_; }
-
- private:
- typename Packing::MutableIterator begin_, end_;
- uint64_t *size_ptr_;
-#ifdef DEBUG
- bool initialized_;
- bool loaded_;
-#endif
-};
-
} // namespace util
#endif // UTIL_SORTED_UNIFORM__
diff --git a/util/sorted_uniform_test.cc b/util/sorted_uniform_test.cc
index 4aa4c8aad..ac7a0bfc5 100644
--- a/util/sorted_uniform_test.cc
+++ b/util/sorted_uniform_test.cc
@@ -1,7 +1,5 @@
#include "util/sorted_uniform.hh"
-#include "util/key_value_packing.hh"
-
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_int.hpp>
#include <boost/random/variate_generator.hpp>
@@ -17,74 +15,86 @@
namespace util {
namespace {
-template <class Map, class Key, class Value> void Check(const Map &map, const boost::unordered_map<Key, Value> &reference, const Key key) {
+template <class KeyT, class ValueT> struct Entry {
+ typedef KeyT Key;
+ typedef ValueT Value;
+
+ Key key;
+ Value value;
+
+ Key GetKey() const {
+ return key;
+ }
+
+ Value GetValue() const {
+ return value;
+ }
+
+ bool operator<(const Entry<Key,Value> &other) const {
+ return key < other.key;
+ }
+};
+
+template <class KeyT> struct Accessor {
+ typedef KeyT Key;
+ template <class Value> Key operator()(const Entry<Key, Value> *entry) const {
+ return entry->GetKey();
+ }
+};
+
+template <class Key, class Value> void Check(const Entry<Key, Value> *begin, const Entry<Key, Value> *end, const boost::unordered_map<Key, Value> &reference, const Key key) {
typename boost::unordered_map<Key, Value>::const_iterator ref = reference.find(key);
- typename Map::ConstIterator i = typename Map::ConstIterator();
+ typedef const Entry<Key, Value> *It;
+ // g++ can't tell that require will crash and burn.
+ It i = NULL;
+ bool ret = SortedUniformFind<It, Accessor<Key>, Pivot64>(Accessor<Key>(), begin, end, key, i);
if (ref == reference.end()) {
- BOOST_CHECK(!map.Find(key, i));
+ BOOST_CHECK(!ret);
} else {
- // g++ can't tell that require will crash and burn.
- BOOST_REQUIRE(map.Find(key, i));
+ BOOST_REQUIRE(ret);
BOOST_CHECK_EQUAL(ref->second, i->GetValue());
}
}
-typedef SortedUniformMap<AlignedPacking<uint64_t, uint32_t> > TestMap;
-
BOOST_AUTO_TEST_CASE(empty) {
- char buf[TestMap::Size(0)];
- TestMap map(buf, TestMap::Size(0));
- map.FinishedInserting();
- TestMap::ConstIterator i;
- BOOST_CHECK(!map.Find(42, i));
-}
-
-BOOST_AUTO_TEST_CASE(one) {
- char buf[TestMap::Size(1)];
- TestMap map(buf, sizeof(buf));
- Entry<uint64_t, uint32_t> e;
- e.Set(42,2);
- map.Insert(e);
- map.FinishedInserting();
- TestMap::ConstIterator i = TestMap::ConstIterator();
- BOOST_REQUIRE(map.Find(42, i));
- BOOST_CHECK(i == map.begin());
- BOOST_CHECK(!map.Find(43, i));
- BOOST_CHECK(!map.Find(41, i));
+ typedef const Entry<uint64_t, float> T;
+ const T *i;
+ bool ret = SortedUniformFind<const T*, Accessor<uint64_t>, Pivot64>(Accessor<uint64_t>(), (const T*)NULL, (const T*)NULL, (uint64_t)10, i);
+ BOOST_CHECK(!ret);
}
template <class Key> void RandomTest(Key upper, size_t entries, size_t queries) {
typedef unsigned char Value;
- typedef SortedUniformMap<AlignedPacking<Key, unsigned char> > Map;
- boost::scoped_array<char> buffer(new char[Map::Size(entries)]);
- Map map(buffer.get(), entries);
boost::mt19937 rng;
boost::uniform_int<Key> range_key(0, upper);
boost::uniform_int<Value> range_value(0, 255);
boost::variate_generator<boost::mt19937&, boost::uniform_int<Key> > gen_key(rng, range_key);
boost::variate_generator<boost::mt19937&, boost::uniform_int<unsigned char> > gen_value(rng, range_value);
+ typedef Entry<Key, Value> Ent;
+ std::vector<Ent> backing;
boost::unordered_map<Key, unsigned char> reference;
- Entry<Key, unsigned char> ent;
+ Ent ent;
for (size_t i = 0; i < entries; ++i) {
Key key = gen_key();
unsigned char value = gen_value();
if (reference.insert(std::make_pair(key, value)).second) {
- ent.Set(key, value);
- map.Insert(Entry<Key, unsigned char>(ent));
+ ent.key = key;
+ ent.value = value;
+ backing.push_back(ent);
}
}
- map.FinishedInserting();
+ std::sort(backing.begin(), backing.end());
// Random queries.
for (size_t i = 0; i < queries; ++i) {
const Key key = gen_key();
- Check<Map, Key, unsigned char>(map, reference, key);
+ Check<Key, unsigned char>(&*backing.begin(), &*backing.end(), reference, key);
}
typename boost::unordered_map<Key, unsigned char>::const_iterator it = reference.begin();
for (size_t i = 0; (i < queries) && (it != reference.end()); ++i, ++it) {
- Check<Map, Key, unsigned char>(map, reference, it->second);
+ Check<Key, unsigned char>(&*backing.begin(), &*backing.end(), reference, it->second);
}
}
diff --git a/util/tokenize_piece.hh b/util/tokenize_piece.hh
index 413bda0b9..c7e1c8633 100644
--- a/util/tokenize_piece.hh
+++ b/util/tokenize_piece.hh
@@ -1,6 +1,7 @@
#ifndef UTIL_TOKENIZE_PIECE__
#define UTIL_TOKENIZE_PIECE__
+#include "util/exception.hh"
#include "util/string_piece.hh"
#include <boost/iterator/iterator_facade.hpp>
@@ -8,63 +9,25 @@
#include <algorithm>
#include <iostream>
-/* Usage:
- *
- * for (PieceIterator<' '> i(" foo \r\n bar "); i; ++i) {
- * std::cout << *i << "\n";
- * }
- *
- */
-
namespace util {
-// Tokenize a StringPiece using an iterator interface. boost::tokenizer doesn't work with StringPiece.
-template <char d> class PieceIterator : public boost::iterator_facade<PieceIterator<d>, const StringPiece, boost::forward_traversal_tag> {
+// Thrown on dereference when out of tokens to parse
+class OutOfTokens : public Exception {
public:
- // Default construct is end, which is also accessed by kEndPieceIterator;
- PieceIterator() {}
-
- explicit PieceIterator(const StringPiece &str)
- : after_(str) {
- increment();
- }
+ OutOfTokens() throw() {}
+ ~OutOfTokens() throw() {}
+};
- bool operator!() const {
- return after_.data() == 0;
- }
- operator bool() const {
- return after_.data() != 0;
- }
+class SingleCharacter {
+ public:
+ explicit SingleCharacter(char delim) : delim_(delim) {}
- static PieceIterator<d> end() {
- return PieceIterator<d>();
+ StringPiece Find(const StringPiece &in) const {
+ return StringPiece(std::find(in.data(), in.data() + in.size(), delim_), 1);
}
private:
- friend class boost::iterator_core_access;
-
- void increment() {
- const char *start = after_.data();
- for (; (start != after_.data() + after_.size()) && (d == *start); ++start) {}
- if (start == after_.data() + after_.size()) {
- // End condition.
- after_.clear();
- return;
- }
- const char *finish = start;
- for (; (finish != after_.data() + after_.size()) && (d != *finish); ++finish) {}
- current_ = StringPiece(start, finish - start);
- after_ = StringPiece(finish, after_.data() + after_.size() - finish);
- }
-
- bool equal(const PieceIterator &other) const {
- return after_.data() == other.after_.data();
- }
-
- const StringPiece &dereference() const { return current_; }
-
- StringPiece current_;
- StringPiece after_;
+ char delim_;
};
class MultiCharacter {
@@ -95,7 +58,7 @@ template <class Find, bool SkipEmpty = false> class TokenIter : public boost::it
public:
TokenIter() {}
- TokenIter(const StringPiece &str, const Find &finder) : after_(str), finder_(finder) {
+ template <class Construct> TokenIter(const StringPiece &str, const Construct &construct) : after_(str), finder_(construct) {
increment();
}
@@ -130,6 +93,7 @@ template <class Find, bool SkipEmpty = false> class TokenIter : public boost::it
}
const StringPiece &dereference() const {
+ UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens");
return current_;
}
diff --git a/util/tokenize_piece_test.cc b/util/tokenize_piece_test.cc
index e07ebcf5e..d856018fb 100644
--- a/util/tokenize_piece_test.cc
+++ b/util/tokenize_piece_test.cc
@@ -9,53 +9,7 @@
namespace util {
namespace {
-BOOST_AUTO_TEST_CASE(simple) {
- PieceIterator<' '> it("single spaced words.");
- BOOST_REQUIRE(it);
- BOOST_CHECK_EQUAL(StringPiece("single"), *it);
- ++it;
- BOOST_REQUIRE(it);
- BOOST_CHECK_EQUAL(StringPiece("spaced"), *it);
- ++it;
- BOOST_REQUIRE(it);
- BOOST_CHECK_EQUAL(StringPiece("words."), *it);
- ++it;
- BOOST_CHECK(!it);
-}
-
-BOOST_AUTO_TEST_CASE(null_delimiter) {
- const char str[] = "\0first\0\0second\0\0\0third\0fourth\0\0\0";
- PieceIterator<'\0'> it(StringPiece(str, sizeof(str) - 1));
- BOOST_REQUIRE(it);
- BOOST_CHECK_EQUAL(StringPiece("first"), *it);
- ++it;
- BOOST_REQUIRE(it);
- BOOST_CHECK_EQUAL(StringPiece("second"), *it);
- ++it;
- BOOST_REQUIRE(it);
- BOOST_CHECK_EQUAL(StringPiece("third"), *it);
- ++it;
- BOOST_REQUIRE(it);
- BOOST_CHECK_EQUAL(StringPiece("fourth"), *it);
- ++it;
- BOOST_CHECK(!it);
-}
-
-BOOST_AUTO_TEST_CASE(null_entries) {
- const char str[] = "\0split\0\0 \0me\0 ";
- PieceIterator<' '> it(StringPiece(str, sizeof(str) - 1));
- BOOST_REQUIRE(it);
- const char first[] = "\0split\0\0";
- BOOST_CHECK_EQUAL(StringPiece(first, sizeof(first) - 1), *it);
- ++it;
- BOOST_REQUIRE(it);
- const char second[] = "\0me\0";
- BOOST_CHECK_EQUAL(StringPiece(second, sizeof(second) - 1), *it);
- ++it;
- BOOST_CHECK(!it);
-}
-
-/*BOOST_AUTO_TEST_CASE(pipe_pipe_none) {
+BOOST_AUTO_TEST_CASE(pipe_pipe_none) {
const char str[] = "nodelimit at all";
TokenIter<MultiCharacter> it(str, MultiCharacter("|||"));
BOOST_REQUIRE(it);
@@ -79,7 +33,7 @@ BOOST_AUTO_TEST_CASE(remove_empty) {
const char str[] = "|||";
TokenIter<MultiCharacter, true> it(str, MultiCharacter("|||"));
BOOST_CHECK(!it);
-}*/
+}
BOOST_AUTO_TEST_CASE(remove_empty_keep) {
const char str[] = " |||";