Add unimplemented config option to enumerate the vocabulary. Minor namespace change.

git-svn-id: file:///dev/shm/somefilter.svn@436 e102df66-1e2e-11dd-9b44-c24451a4db5e
author: kpu <github@kheafield.com> 2010-10-26 00:52:26 +0400
committer: kpu <github@kheafield.com> 2010-10-26 00:52:26 +0400
commit: fb59a8beb6f7d622906145d55c09fedf2d662604 (patch)
tree: a9e6365c954c529c84ba2439f91c21133888cc57 /lm/enumerate_vocab.hh
parent: 8bb02c7c32e9c3734fcc04779e9465ee27cac12a (diff)
1 files changed, 24 insertions, 0 deletions
diff --git a/lm/enumerate_vocab.hh b/lm/enumerate_vocab.hh
new file mode 100644
index 0000000..3b1cd09
--- /dev/null
+++ b/lm/enumerate_vocab.hh
@@ -0,0 +1,24 @@
+#ifndef LM_ENUMERATE_VOCAB__
+#define LM_ENUMERATE_VOCAB__
+
+#include "util/string_piece.hh"
+
+namespace lm {
+namespace ngram {
+
+/* If you need the actual strings in the vocabulary, inherit from this class
+ * and implement Add.  Then put a pointer in Config.enumerate_vocab.  
+ * Add is called once per n-gram.  index starts at 0 and increases by 1 each
+ * time.  
+ */
+class EnumerateVocab {
+  virtual ~EnumerateVocab() {}
+
+  virtual Add(WordIndex index, const StringPiece &str) = 0;
+};
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_ENUMERATE_VOCAB__
+
author	kpu <github@kheafield.com>	2010-10-26 00:52:26 +0400
committer	kpu <github@kheafield.com>	2010-10-26 00:52:26 +0400
commit	fb59a8beb6f7d622906145d55c09fedf2d662604 (patch)
tree	a9e6365c954c529c84ba2439f91c21133888cc57 /lm/enumerate_vocab.hh
parent	8bb02c7c32e9c3734fcc04779e9465ee27cac12a (diff)