Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h')
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h46
1 files changed, 43 insertions, 3 deletions
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
index 14bf6cdad..ab7f96bf0 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
@@ -7,6 +7,8 @@
#include "ug_typedefs.h"
#include "tpt_tokenindex.h"
#include <iostream>
+#include "util/exception.hh"
+#include "moses/Util.h"
//#include <cassert>
// #include "ug_bv_iter.h"
@@ -60,10 +62,15 @@ namespace ugdiss
// TSA_tree_iterator(TSA_tree_iterator const& other);
TSA_tree_iterator(TSA<Token> const* s);
+ TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other);
TSA_tree_iterator(TSA<Token> const* r, id_type const* s, size_t const len);
// TSA_tree_iterator(TSA<Token> const* s, Token const& t);
TSA_tree_iterator(TSA<Token> const* s,
Token const* kstart,
+ size_t const len,
+ bool full_match_only=true);
+ TSA_tree_iterator(TSA<Token> const* s,
+ Token const* kstart,
Token const* kend,
bool full_match_only=true);
// TSA_tree_iterator(TSA<Token> const* s,
@@ -150,9 +157,12 @@ namespace ugdiss
double approxOccurrenceCount(int p=-1) const
{
assert(root);
+ if (p < 0) p += lower.size();
double ret = arrayByteSpanSize(p)/root->aveIndexEntrySize();
- assert(ret < root->corpus->numTokens());
if (ret < 25) ret = rawCnt(p);
+ UTIL_THROW_IF2(ret > root->corpus->numTokens(), "[" << HERE << "] "
+ << "Word count mismatch.");
+ assert(ret <= root->corpus->numTokens());
return ret;
}
@@ -320,6 +330,18 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
+ TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other)
+ : root(s)
+ {
+ Token const* x = other.getToken(0);
+ for (size_t i = 0; i < other.size() && this->extend(x->id()); ++i)
+ x = x->next();
+ };
+
+
+
+ template<typename Token>
+ TSA_tree_iterator<Token>::
TSA_tree_iterator
(TSA<Token> const* r,
id_type const* s,
@@ -385,6 +407,25 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
+ size_t const len, bool full_match_only)
+ : root(s)
+ {
+ if (!root) return;
+ size_t i = 0;
+ for (; i < len && kstart && extend(*kstart); ++i)
+ kstart = kstart->next();
+ if (full_match_only && i != len)
+ {
+ lower.clear();
+ upper.clear();
+ }
+ };
+
+ // DEPRECATED: DO NOT USE. Use the one that takes the length
+ // instead of kend.
+ template<typename Token>
+ TSA_tree_iterator<Token>::
+ TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
Token const* kend, bool full_match_only)
: root(s)
{
@@ -561,8 +602,7 @@ namespace ugdiss
TSA_tree_iterator<Token>::
rawCnt(int p) const
{
- if (p < 0)
- p = lower.size()+p;
+ if (p < 0) p += lower.size();
assert(p>=0);
if (lower.size() == 0) return root->getCorpusSize();
return root->rawCnt(lower[p],upper[p]);