Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lm
diff options
context:
space:
mode:
authorKenneth Heafield <kenlm@kheafield.com>2012-03-11 21:47:38 +0400
committerKenneth Heafield <kenlm@kheafield.com>2012-03-11 21:47:38 +0400
commit175b7aaf495963a8dd08525094073db06686adf8 (patch)
treec279eee1e5a3d3d7d417ec747091e184c418b7bf /lm
parent0fc56ef7b602134b387b264c4b1ffe13e7ac40f8 (diff)
KenLM c1dba12
- Reject NaNs - Fix ChartState hashing (unused in Moses) - Expose CreateOrThrow - Minor portability improvement in getopt
Diffstat (limited to 'lm')
-rw-r--r--lm/left.hh2
-rw-r--r--lm/read_arpa.cc7
-rw-r--r--lm/read_arpa.hh28
3 files changed, 23 insertions, 14 deletions
diff --git a/lm/left.hh b/lm/left.hh
index 41f71f849..a07f98038 100644
--- a/lm/left.hh
+++ b/lm/left.hh
@@ -112,7 +112,7 @@ inline size_t hash_value(const ChartState &state) {
size_t hashes[2];
hashes[0] = hash_value(state.left);
hashes[1] = hash_value(state.right);
- return util::MurmurHashNative(hashes, sizeof(size_t), state.full);
+ return util::MurmurHashNative(hashes, sizeof(size_t) * 2, state.full);
}
template <class M> class RuleScore {
diff --git a/lm/read_arpa.cc b/lm/read_arpa.cc
index 05f761be6..be6565992 100644
--- a/lm/read_arpa.cc
+++ b/lm/read_arpa.cc
@@ -7,6 +7,7 @@
#include <vector>
#include <ctype.h>
+#include <math.h>
#include <string.h>
#include <stdint.h>
@@ -93,7 +94,11 @@ void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) {
case '\t':
weights.backoff = in.ReadFloat();
if (weights.backoff == ngram::kExtensionBackoff) weights.backoff = ngram::kNoExtensionBackoff;
- if ((in.get() != '\n')) UTIL_THROW(FormatLoadException, "Expected newline after backoff");
+ {
+ int float_class = fpclassify(weights.backoff);
+ UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << weights.backoff);
+ }
+ UTIL_THROW_IF((in.get() != '\n'), FormatLoadException, "Expected newline after backoff");
break;
case '\n':
weights.backoff = ngram::kNoExtensionBackoff;
diff --git a/lm/read_arpa.hh b/lm/read_arpa.hh
index ab996bde7..25648d3fb 100644
--- a/lm/read_arpa.hh
+++ b/lm/read_arpa.hh
@@ -10,6 +10,8 @@
#include <iosfwd>
#include <vector>
+#include <math.h>
+
namespace lm {
void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number);
@@ -29,20 +31,26 @@ class PositiveProbWarn {
explicit PositiveProbWarn(WarningAction action) : action_(action) {}
- void Warn(float prob);
+ float ReadProb(util::FilePiece &f) {
+ float prob = f.ReadFloat();
+ UTIL_THROW_IF(f.get() != '\t', FormatLoadException, "Expected tab after probability");
+ UTIL_THROW_IF(isnan(prob), FormatLoadException, "NaN probability");
+ if (prob > 0.0) {
+ Warn(prob);
+ prob = 0.0;
+ }
+ return prob;
+ }
private:
+ void Warn(float prob);
+
WarningAction action_;
};
template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff *unigrams, PositiveProbWarn &warn) {
try {
- float prob = f.ReadFloat();
- if (prob > 0.0) {
- warn.Warn(prob);
- prob = 0.0;
- }
- if (f.get() != '\t') UTIL_THROW(FormatLoadException, "Expected tab after probability");
+ float prob = warn.ReadProb(f);
ProbBackoff &value = unigrams[vocab.Insert(f.ReadDelimited(kARPASpaces))];
value.prob = prob;
ReadBackoff(f, value);
@@ -64,11 +72,7 @@ template <class Voc> void Read1Grams(util::FilePiece &f, std::size_t count, Voc
// Return true if a positive log probability came out.
template <class Voc, class Weights> void ReadNGram(util::FilePiece &f, const unsigned char n, const Voc &vocab, WordIndex *const reverse_indices, Weights &weights, PositiveProbWarn &warn) {
try {
- weights.prob = f.ReadFloat();
- if (weights.prob > 0.0) {
- warn.Warn(weights.prob);
- weights.prob = 0.0;
- }
+ weights.prob = warn.ReadProb(f);
for (WordIndex *vocab_out = reverse_indices + n - 1; vocab_out >= reverse_indices; --vocab_out) {
*vocab_out = vocab.Index(f.ReadDelimited(kARPASpaces));
}