Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kpu/kenlm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkpu <github@kheafield.com>2010-10-26 00:52:26 +0400
committerkpu <github@kheafield.com>2010-10-26 00:52:26 +0400
commitfb59a8beb6f7d622906145d55c09fedf2d662604 (patch)
treea9e6365c954c529c84ba2439f91c21133888cc57 /lm/enumerate_vocab.hh
parent8bb02c7c32e9c3734fcc04779e9465ee27cac12a (diff)
Add unimplemented config option to enumerate the vocabulary. Minor namespace change.
git-svn-id: file:///dev/shm/somefilter.svn@436 e102df66-1e2e-11dd-9b44-c24451a4db5e
Diffstat (limited to 'lm/enumerate_vocab.hh')
-rw-r--r--lm/enumerate_vocab.hh24
1 files changed, 24 insertions, 0 deletions
diff --git a/lm/enumerate_vocab.hh b/lm/enumerate_vocab.hh
new file mode 100644
index 0000000..3b1cd09
--- /dev/null
+++ b/lm/enumerate_vocab.hh
@@ -0,0 +1,24 @@
+#ifndef LM_ENUMERATE_VOCAB__
+#define LM_ENUMERATE_VOCAB__
+
+#include "util/string_piece.hh"
+
+namespace lm {
+namespace ngram {
+
+/* If you need the actual strings in the vocabulary, inherit from this class
+ * and implement Add. Then put a pointer in Config.enumerate_vocab.
+ * Add is called once per n-gram. index starts at 0 and increases by 1 each
+ * time.
+ */
+class EnumerateVocab {
+ virtual ~EnumerateVocab() {}
+
+ virtual Add(WordIndex index, const StringPiece &str) = 0;
+};
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_ENUMERATE_VOCAB__
+