Welcome to mirror list, hosted at ThFree Co, Russian Federation.

arpa_io.cc « filter « lm - github.com/kpu/kenlm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: add610aab000b2835d19de36b0f2840ffc50aafe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include "arpa_io.hh"
#include "../../util/file_piece.hh"
#include "../../util/string_stream.hh"

#include <iostream>
#include <ostream>
#include <string>
#include <vector>

#include <cctype>
#include <cerrno>
#include <cstring>

namespace lm {

ARPAInputException::ARPAInputException(const StringPiece &message) throw() {
  *this << message;
}

ARPAInputException::ARPAInputException(const StringPiece &message, const StringPiece &line) throw() {
  *this << message << " in line " << line;
}

ARPAInputException::~ARPAInputException() throw() {}

// Seeking is the responsibility of the caller.
template <class Stream> void WriteCounts(Stream &out, const std::vector<uint64_t> &number) {
  out << "\n\\data\\\n";
  for (unsigned int i = 0; i < number.size(); ++i) {
    out << "ngram " << i+1 << "=" << number[i] << '\n';
  }
  out << '\n';
}

size_t SizeNeededForCounts(const std::vector<uint64_t> &number) {
  util::StringStream stream;
  WriteCounts(stream, number);
  return stream.str().size();
}

bool IsEntirelyWhiteSpace(const StringPiece &line) {
  for (size_t i = 0; i < static_cast<size_t>(line.size()); ++i) {
    if (!isspace(line.data()[i])) return false;
  }
  return true;
}

ARPAOutput::ARPAOutput(const char *name, size_t buffer_size)
  : file_backing_(util::CreateOrThrow(name)), file_(file_backing_.get(), buffer_size) {}

void ARPAOutput::ReserveForCounts(std::streampos reserve) {
  for (std::streampos i = 0; i < reserve; i += std::streampos(1)) {
    file_ << '\n';
  }
}

void ARPAOutput::BeginLength(unsigned int length) {
  file_ << '\\' << length << "-grams:" << '\n';
  fast_counter_ = 0;
}

void ARPAOutput::EndLength(unsigned int length) {
  file_ << '\n';
  if (length > counts_.size()) {
    counts_.resize(length);
  }
  counts_[length - 1] = fast_counter_;
}

void ARPAOutput::Finish() {
  file_ << "\\end\\\n";
  file_.seekp(0);
  WriteCounts(file_, counts_);
  file_.flush();
}

} // namespace lm