Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/TranslationModel/UG/mm/ug_im_ttrack.h')
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_ttrack.h34
1 files changed, 32 insertions, 2 deletions
diff --git a/moses/TranslationModel/UG/mm/ug_im_ttrack.h b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
index 05066c922..0c6e4afbf 100644
--- a/moses/TranslationModel/UG/mm/ug_im_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
@@ -16,6 +16,9 @@
#include "tpt_tokenindex.h"
#include "ug_ttrack_base.h"
#include "tpt_tokenindex.h"
+#include "util/exception.hh"
+#include "moses/Util.h"
+
// #include "ug_vocab.h"
// define the corpus buffer size (in sentences) and the
@@ -49,6 +52,8 @@ namespace ugdiss
typename boost::shared_ptr<imTtrack<Token> >
append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, vector<Token> const & snt);
+ void m_check_token_count(); // debugging function
+
public:
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
@@ -70,6 +75,22 @@ namespace ugdiss
};
template<typename Token>
+ void
+ imTtrack<Token>::
+ m_check_token_count()
+ { // sanity check
+ size_t check = 0;
+ BOOST_FOREACH(vector<Token> const& s, *myData)
+ check += s.size();
+ UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]"
+ << " Wrong token count after appending sentence!"
+ << " Counted " << check << " but expected "
+ << this->numToks << " in a total of " << myData->size()
+ << " sentences.");
+
+ }
+
+ template<typename Token>
Token const*
imTtrack<Token>::
sntStart(size_t sid) const // return pointer to beginning of sentence
@@ -111,9 +132,9 @@ namespace ugdiss
template<typename Token>
imTtrack<Token>::
imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL)
+ : numToks(0)
{
myData.reset(new vector<vector<Token> >());
- numToks = 0;
string line,w;
size_t linectr=0;
boost::unordered_map<string,id_type> H;
@@ -135,6 +156,7 @@ namespace ugdiss
template<typename Token>
imTtrack<Token>::
imTtrack(size_t reserve)
+ : numToks(0)
{
myData.reset(new vector<vector<Token> >());
if (reserve) myData->reserve(reserve);
@@ -143,9 +165,9 @@ namespace ugdiss
template<typename Token>
imTtrack<Token>::
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d)
+ : numToks(0)
{
myData = d;
- numToks = 0;
BOOST_FOREACH(vector<Token> const& v, *d)
numToks += v.size();
}
@@ -171,6 +193,9 @@ namespace ugdiss
shared_ptr<imTtrack<TOKEN> >
append(shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
{
+#if 1
+ if (crp) crp->m_check_token_count();
+#endif
shared_ptr<imTtrack<TOKEN> > ret;
if (crp == NULL)
{
@@ -185,6 +210,11 @@ namespace ugdiss
}
else ret = crp;
ret->myData->push_back(snt);
+ ret->numToks += snt.size();
+
+#if 1
+ ret->m_check_token_count();
+#endif
return ret;
}