Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lm/builder/pipeline.hh')
-rw-r--r--lm/builder/pipeline.hh9
1 files changed, 9 insertions, 0 deletions
diff --git a/lm/builder/pipeline.hh b/lm/builder/pipeline.hh
index 4395622ed..09e1a4d52 100644
--- a/lm/builder/pipeline.hh
+++ b/lm/builder/pipeline.hh
@@ -1,6 +1,7 @@
#ifndef LM_BUILDER_PIPELINE_H
#define LM_BUILDER_PIPELINE_H
+#include "lm/builder/adjust_counts.hh"
#include "lm/builder/initial_probabilities.hh"
#include "lm/builder/header_info.hh"
#include "lm/lm_exception.hh"
@@ -19,6 +20,8 @@ struct PipelineConfig {
util::stream::SortConfig sort;
InitialProbabilitiesConfig initial_probs;
util::stream::ChainConfig read_backoffs;
+
+ // Include a header in the ARPA with some statistics?
bool verbose_header;
// Estimated vocabulary size. Used for sizing CorpusCount memory and
@@ -34,6 +37,12 @@ struct PipelineConfig {
// n-gram count thresholds for pruning. 0 values means no pruning for
// corresponding n-gram order
std::vector<uint64_t> prune_thresholds; //mjd
+
+ // What to do with discount failures.
+ DiscountConfig discount;
+
+ // Compute collapsed q values instead of probability and backoff
+ bool output_q;
/* Computing the perplexity of LMs with different vocabularies is hard. For
* example, the lowest perplexity is attained by a unigram model that