Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/kenlm
diff options
context:
space:
mode:
authorheafield <heafield@1f5c12ca-751b-0410-a591-d2e778427230>2011-08-03 23:46:19 +0400
committerheafield <heafield@1f5c12ca-751b-0410-a591-d2e778427230>2011-08-03 23:46:19 +0400
commit61974ad75e12450c74e8b2e95322769d950a22b2 (patch)
treeac8f972660f8db55780f3566a117223b238e7a35 /kenlm
parent36db0ffe489c94e9838f17d0f05b772b01777af5 (diff)
Minor fixes. One for David Chiang who has files without initial newlines.
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4108 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'kenlm')
-rwxr-xr-xkenlm/compile.sh2
-rw-r--r--kenlm/lm/read_arpa.cc6
2 files changed, 4 insertions, 4 deletions
diff --git a/kenlm/compile.sh b/kenlm/compile.sh
index 35c9e28ef..0786df130 100755
--- a/kenlm/compile.sh
+++ b/kenlm/compile.sh
@@ -7,7 +7,7 @@
set -e
-for i in util/{bit_packing,ersatz_progress,exception,file_piece,murmur_hash,scoped,mmap} lm/{binary_format,config,lm_exception,model,quantize,read_arpa,search_hashed,search_trie,trie,virtual_interface,vocab}; do
+for i in util/{bit_packing,ersatz_progress,exception,file_piece,murmur_hash,scoped,mmap} lm/{bhiksha,binary_format,config,lm_exception,model,quantize,read_arpa,search_hashed,search_trie,trie,virtual_interface,vocab}; do
g++ -I. -O3 -DNDEBUG $CXXFLAGS -c $i.cc -o $i.o
done
g++ -I. -O3 -DNDEBUG $CXXFLAGS lm/build_binary.cc {lm,util}/*.o -lz -o build_binary
diff --git a/kenlm/lm/read_arpa.cc b/kenlm/lm/read_arpa.cc
index 060a97ea0..455bc4ba8 100644
--- a/kenlm/lm/read_arpa.cc
+++ b/kenlm/lm/read_arpa.cc
@@ -31,15 +31,15 @@ const char kBinaryMagic[] = "mmap lm http://kheafield.com/code";
void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
number.clear();
StringPiece line;
- if (!IsEntirelyWhiteSpace(line = in.ReadLine())) {
+ while (IsEntirelyWhiteSpace(line = in.ReadLine())) {}
+ if (line != "\\data\\") {
if ((line.size() >= 2) && (line.data()[0] == 0x1f) && (static_cast<unsigned char>(line.data()[1]) == 0x8b)) {
UTIL_THROW(FormatLoadException, "Looks like a gzip file. If this is an ARPA file, pipe " << in.FileName() << " through zcat. If this already in binary format, you need to decompress it because mmap doesn't work on top of gzip.");
}
if (static_cast<size_t>(line.size()) >= strlen(kBinaryMagic) && StringPiece(line.data(), strlen(kBinaryMagic)) == kBinaryMagic)
UTIL_THROW(FormatLoadException, "This looks like a binary file but got sent to the ARPA parser. Did you compress the binary file or pass a binary file where only ARPA files are accepted?");
- UTIL_THROW(FormatLoadException, "First line was \"" << line.data() << "\" not blank");
+ UTIL_THROW(FormatLoadException, "first non-empty line was \"" << line << "\" not \\data\\.");
}
- if ((line = in.ReadLine()) != "\\data\\") UTIL_THROW(FormatLoadException, "second line was \"" << line << "\" not \\data\\.");
while (!IsEntirelyWhiteSpace(line = in.ReadLine())) {
if (line.size() < 6 || strncmp(line.data(), "ngram ", 6)) UTIL_THROW(FormatLoadException, "count line \"" << line << "\"doesn't begin with \"ngram \"");
// So strtol doesn't go off the end of line.