Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2011-11-17 16:49:55 +0400
committerKenneth Heafield <github@kheafield.com>2011-11-17 16:49:55 +0400
commit72a4c8a0d34529086b91c016ce32f0b03f9778a1 (patch)
tree1520b39aa01e77dda85b57f42749e992b42a886d /util/file_piece.hh
parent07a8558c02fe46b08734c8479b58ad0f9e3a1a3c (diff)
Move kenlm up one level, simplify compilation
Diffstat (limited to 'util/file_piece.hh')
-rw-r--r--util/file_piece.hh126
1 files changed, 126 insertions, 0 deletions
diff --git a/util/file_piece.hh b/util/file_piece.hh
new file mode 100644
index 000000000..b81ac0e20
--- /dev/null
+++ b/util/file_piece.hh
@@ -0,0 +1,126 @@
+#ifndef UTIL_FILE_PIECE__
+#define UTIL_FILE_PIECE__
+
+#include "util/ersatz_progress.hh"
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/have.hh"
+#include "util/mmap.hh"
+#include "util/string_piece.hh"
+
+#include <cstddef>
+#include <string>
+
+#include <stdint.h>
+
+namespace util {
+
+class ParseNumberException : public Exception {
+ public:
+ explicit ParseNumberException(StringPiece value) throw();
+ ~ParseNumberException() throw() {}
+};
+
+class GZException : public Exception {
+ public:
+ explicit GZException(void *file);
+ GZException() throw() {}
+ ~GZException() throw() {}
+};
+
+extern const bool kSpaces[256];
+
+// Memory backing the returned StringPiece may vanish on the next call.
+class FilePiece {
+ public:
+ // 32 MB default.
+ explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432);
+ // Takes ownership of fd. name is used for messages.
+ explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432);
+
+ ~FilePiece();
+
+ char get() {
+ if (position_ == position_end_) {
+ Shift();
+ if (at_end_) throw EndOfFileException();
+ }
+ return *(position_++);
+ }
+
+ // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace().
+ StringPiece ReadDelimited(const bool *delim = kSpaces) {
+ SkipSpaces(delim);
+ return Consume(FindDelimiterOrEOF(delim));
+ }
+
+ // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter.
+ // It is similar to getline in that way.
+ StringPiece ReadLine(char delim = '\n');
+
+ float ReadFloat();
+ double ReadDouble();
+ long int ReadLong();
+ unsigned long int ReadULong();
+
+ // Skip spaces defined by isspace.
+ void SkipSpaces(const bool *delim = kSpaces) {
+ for (; ; ++position_) {
+ if (position_ == position_end_) Shift();
+ if (!delim[static_cast<unsigned char>(*position_)]) return;
+ }
+ }
+
+ uint64_t Offset() const {
+ return position_ - data_.begin() + mapped_offset_;
+ }
+
+ const std::string &FileName() const { return file_name_; }
+
+ private:
+ void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer);
+
+ template <class T> T ReadNumber();
+
+ StringPiece Consume(const char *to) {
+ StringPiece ret(position_, to - position_);
+ position_ = to;
+ return ret;
+ }
+
+ const char *FindDelimiterOrEOF(const bool *delim = kSpaces);
+
+ void Shift();
+ // Backends to Shift().
+ void MMapShift(uint64_t desired_begin);
+
+ void TransitionToRead();
+ void ReadShift();
+
+ const char *position_, *last_space_, *position_end_;
+
+ scoped_fd file_;
+ const uint64_t total_size_;
+ const uint64_t page_;
+
+ std::size_t default_map_size_;
+ uint64_t mapped_offset_;
+
+ // Order matters: file_ should always be destroyed after this.
+ scoped_memory data_;
+
+ bool at_end_;
+ bool fallback_to_read_;
+
+ ErsatzProgress progress_;
+
+ std::string file_name_;
+
+#ifdef HAVE_ZLIB
+ void *gz_file_;
+#endif // HAVE_ZLIB
+};
+
+} // namespace util
+
+#endif // UTIL_FILE_PIECE__