Welcome to mirror list, hosted at ThFree Co, Russian Federation.

ug_mm_2d_table.h « mm « UG « TranslationModel « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 0ae16895b631301215457f0248ea3f283bfd480b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
// -*- c++ -*-
// (c) 2007-2012 Ulrich Germann
#ifndef __ug_mm_2d_table_h
#define __ug_mm_2d_table_h
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/shared_ptr.hpp>
#include <vector>
#include <map>
#include "tpt_typedefs.h"
#include "tpt_pickler.h"
#include "ug_typedefs.h"
#include "util/exception.hh"
namespace bio=boost::iostreams;
namespace ugdiss
{
  // using namespace std;
  template<typename OFFSET, typename ID, typename VAL, typename INIT>
  class
  mm2dTable
  {
  public:
    struct Cell
    {
      ID   id;
      VAL val;

      bool
      operator<(ID const otherId) const
      {
        return id < otherId;
      }

      bool
      operator<(Cell const& other) const
      {
        return id < other.id;
      }

      struct SortDescendingByValue
      {
        bool operator()(Cell const& a, Cell const& b) const
        {
          return a.val > b.val;
        }
      };
    };

    struct Row
    {
      Cell const* start;
      Cell const* stop;
      VAL operator[](ID key) const;
    };

    Cell const* data;
    VAL  const* M1;
    VAL const* M2;
    OFFSET const* index;
    ID numRows;
    ID numCols;
    boost::shared_ptr<bio::mapped_file_source> file;

    VAL m1(ID key) const
    {
      return (key < numRows) ? M1[key] : INIT(0);
    }

    VAL m2(ID key) const
    {
      return (key < numCols) ? M2[key] : INIT(0);
    }


    void open(std::string fname);
    void close();

    Row operator[](ID key) const;

    mm2dTable(std::string const fname="") { if (!fname.empty()) open(fname); };
    ~mm2dTable() { file.reset(); };
  };

  template<typename OFFSET, typename ID, typename VAL, typename INIT>
  typename mm2dTable<OFFSET,ID,VAL,INIT>::Row
  mm2dTable<OFFSET,ID,VAL,INIT>::
  operator[](ID key) const
  {
    Row ret;
    if (key < numRows)
      {
        ret.start = data+index[key];
        ret.stop  = data+index[key+1];
      }
    else
      ret.start = ret.stop = data+index[key+1];
    return ret;
  }

  template<typename OFFSET, typename ID, typename VAL, typename INIT>
  VAL
  mm2dTable<OFFSET,ID,VAL,INIT>::
  Row::
  operator[](ID key) const
  {
    if (start==stop) return INIT(0);
    Cell const* c = std::lower_bound(start,stop,key);
    return (c != stop && c->id == key ? c->val : INIT(0));
  }

  template<typename OFFSET, typename ID, typename VAL, typename INIT>
  void
  mm2dTable<OFFSET,ID,VAL,INIT>::
  open(std::string fname)
  {
    // cout << "opening " << fname << " at " << __FILE__ << ":" << __LINE__ << std::endl;
    if (access(fname.c_str(),R_OK))
      {
	std::ostringstream msg;
        msg << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: "
	    << "file '" << fname << " is not accessible." << std::endl;
	std::string foo = msg.str();
	UTIL_THROW(util::Exception,foo.c_str());
      }
    file.reset(new bio::mapped_file_source());
    file->open(fname);
    if (!file->is_open())
      {
	std::ostringstream msg;
        msg << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: "
	    << "Opening file '" << fname << "' failed." << std::endl;
	std::string foo = msg.str();
	UTIL_THROW(util::Exception,foo.c_str());
      }
    char const* p = file->data();
    filepos_type offset = *reinterpret_cast<filepos_type const*>(p);
    index = reinterpret_cast<OFFSET const*>(p+offset); p += sizeof(offset);
    numRows = *reinterpret_cast<ID const*>(p);   p += sizeof(id_type);
    numCols = *reinterpret_cast<ID const*>(p);   p += sizeof(id_type);
    data = reinterpret_cast<Cell const*>(p);
    // cout << numRows << " rows; " << numCols << " columns " << std::endl;
    M1 = reinterpret_cast<VAL const*>(index+numRows+1);
    M2 = M1+numRows;
    //    cout << "Table " << fname << " has " << numRows << " rows and "
    //         << numCols << " columns." << std::endl;
    //     cout << "File size is " << file.size()*1024 << " bytes; ";
    //     cout << "M2 starts " << (reinterpret_cast<char const*>(M2) - file.data())
    //          << " bytes into the file" << std::endl;
    // cout << M2[0] << std::endl;
  }

  template<
    typename OFFSET, // integer type of file offsets
    typename ID,     // integer type of column ids
    typename VAL,    // type of cell values
    typename INIT,   // INIT(0) initializes default values
    typename ICONT   // inner container type
    >
  void
  write_mm_2d_table(std::ostream& out, std::vector<ICONT> const& T,
                    std::vector<VAL> const* m1    = NULL,
                    std::vector<VAL> const* m2    = NULL)
  {
    assert(T.size());
    typedef typename ICONT::const_iterator iter;

    // compute marginals if necessary
    std::vector<VAL> m1x,m2x;
    if (!m1)
      {
        m1x.resize(T.size(),INIT(0));
        for (size_t r = 0; r < T.size(); ++r)
          for (iter c = T.at(r).begin(); c != T.at(r).end(); ++c)
            m1x[r] = m1x[r] + c->second;
        m1 = &m1x;
      }
    if (!m2)
      {
        for (size_t r = 0; r < T.size(); ++r)
          for (iter c = T.at(r).begin(); c != T.at(r).end(); ++c)
            {
              while (c->first >= m2x.size())
                m2x.push_back(INIT(0));
              m2x[c->first] = m2x[c->first] + c->second;
            }
        m2 = &m2x;
      }

    filepos_type idxOffset=0;
    numwrite(out,idxOffset); // place holder, we'll return here at the end
    numwrite(out,id_type(m1->size())); // number of rows
    numwrite(out,id_type(m2->size())); // number of columns

    // write actual table
    std::vector<OFFSET> index;
    size_t ctr =0;
    index.reserve(m1->size()+1);
    for (ID r = 0; r < ID(T.size()); ++r)
      {
        //index.push_back(out.tellp());
        index.push_back(ctr);
        ID lastId = 0;
        if (T.at(r).size())
          lastId = T.at(r).begin()->first;
        for (typename ICONT::const_iterator c = T.at(r).begin();
             c != T.at(r).end(); ++c)
          {
            ctr++;
            assert(c->first >= lastId);
            lastId = c->first;
            typename mm2dTable<OFFSET,ID,VAL,INIT>::Cell item;
            item.id  = c->first;
            item.val = c->second;
            out.write(reinterpret_cast<char const*>(&item),sizeof(item));
          }
      }
    // index.push_back(out.tellp());
    index.push_back(ctr);
    idxOffset=out.tellp();

    // write index
    for (size_t i = 0; i < index.size(); ++i)
      {
        OFFSET o = index[i]; // (index[i]-index[0])/sizeof(VAL);
        out.write(reinterpret_cast<char*>(&o),sizeof(OFFSET));
      }

    // write marginals
    out.write(reinterpret_cast<char const*>(&(*m1)[0]),m1->size()*sizeof(VAL));
    out.write(reinterpret_cast<char const*>(&(*m2)[0]),m2->size()*sizeof(VAL));

    out.seekp(0);
    numwrite(out,idxOffset);
  }
}
#endif