1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
#include "ug_im_bitext.h"
namespace Moses
{
namespace bitext
{
template<>
sptr<imBitext<L2R_Token<SimpleWordId> > >
imBitext<L2R_Token<SimpleWordId> >::
add(vector<string> const& s1,
vector<string> const& s2,
vector<string> const& aln) const
{
typedef L2R_Token<SimpleWordId> TKN;
assert(s1.size() == s2.size() && s1.size() == aln.size());
#ifndef NDEBUG
size_t first_new_snt = this->T1 ? this->T1->size() : 0;
#endif
sptr<imBitext<TKN> > ret;
{
boost::unique_lock<boost::shared_mutex> guard(m_lock);
ret.reset(new imBitext<TKN>(*this));
}
// we add the sentences in separate threads (so it's faster)
boost::thread thread1(snt_adder<TKN>(s1,*ret->V1,ret->myT1,ret->myI1));
// thread1.join(); // for debugging
boost::thread thread2(snt_adder<TKN>(s2,*ret->V2,ret->myT2,ret->myI2));
BOOST_FOREACH(string const& a, aln)
{
istringstream ibuf(a);
ostringstream obuf;
uint32_t row,col; char c;
while (ibuf >> row >> c >> col)
{
UTIL_THROW_IF2(c != '-', "[" << HERE << "] "
<< "Error in alignment information:\n" << a);
ugdiss::binwrite(obuf,row);
ugdiss::binwrite(obuf,col);
}
// important: DO NOT replace the two lines below this comment by
// char const* x = obuf.str().c_str(), as the memory x is pointing
// to is freed immediately upon deconstruction of the string object.
string foo = obuf.str();
char const* x = foo.c_str();
vector<char> v(x,x+foo.size());
ret->myTx = append(ret->myTx, v);
}
thread1.join();
thread2.join();
ret->Tx = ret->myTx;
ret->T1 = ret->myT1;
ret->T2 = ret->myT2;
ret->I1 = ret->myI1;
ret->I2 = ret->myI2;
#ifndef NDEBUG
// sanity check
for (size_t i = first_new_snt; i < ret->T1->size(); ++i)
{
size_t slen1 = ret->T1->sntLen(i);
size_t slen2 = ret->T2->sntLen(i);
char const* p = ret->Tx->sntStart(i);
char const* q = ret->Tx->sntEnd(i);
size_t k;
while (p < q)
{
p = binread(p,k);
assert(p);
assert(p < q);
assert(k < slen1);
p = binread(p,k);
assert(p);
assert(k < slen2);
}
}
#endif
return ret;
}
}
}
|