Welcome to mirror list, hosted at ThFree Co, Russian Federation.

nl-hmmloop.h « include « rvtl « hhmm « synlm « contrib - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c476b4271e7f15a6a9dcecd7586aac64f487a80d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
///////////////////////////////////////////////////////////////////////////////
//                                                                           //
// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
//                                                                           //
//    ModelBlocks is free software: you can redistribute it and/or modify    //
//    it under the terms of the GNU General Public License as published by   //
//    the Free Software Foundation, either version 3 of the License, or      //
//    (at your option) any later version.                                    //
//                                                                           //
//    ModelBlocks is distributed in the hope that it will be useful,         //
//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
//    GNU General Public License for more details.                           //
//                                                                           //
//    You should have received a copy of the GNU General Public License      //
//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
//                                                                           //
//    ModelBlocks developers designate this particular file as subject to    //
//    the "Moses" exception as provided by ModelBlocks developers in         //
//    the LICENSE file that accompanies this code.                           //
//                                                                           //
///////////////////////////////////////////////////////////////////////////////

#ifndef _NL_HMMLOOP_
#define _NL_HMMLOOP_
#include <list>
#include <string>
#include <boost/thread/thread.hpp>
#include <boost/thread/mutex.hpp>
#include <boost/bind.hpp>
#include "nl-prob.h"
#include "nl-safeids.h"
#include "nl-beam.h"

typedef int Frame;

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
//  NullBackDat - default empty back-pointer data; can replace with word or sem relation
//
////////////////////////////////////////////////////////////////////////////////

template <class Y>
class NullBackDat {
  static const string sDummy;
  char dummy_data_member_to_avoid_compile_warning;
 public:
  NullBackDat ()             { dummy_data_member_to_avoid_compile_warning=0; }
  NullBackDat (const Y& y)   { dummy_data_member_to_avoid_compile_warning=0; }
  void write  (FILE*) const  { }
  string getString() const   { return sDummy; }
  friend ostream& operator<< ( ostream& os, const NullBackDat& nb ) { return os; }
};
template <class Y>
const string NullBackDat<Y>::sDummy ( "" );


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
//  Index - pointer to source in previous beam heap
//
////////////////////////////////////////////////////////////////////////////////

class Index : public Id<int> {
 public:
  Index             ( )     { }
  Index             (int i) {set(i);}
  Index& operator++ ( )     {set(toInt()+1); return *this;}
};


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
//  TrellNode - node in viterbi trellis
//
////////////////////////////////////////////////////////////////////////////////

template <class S, class B>
class TrellNode {
 private:

  // Data members...
  Index   indSource;
  B       backptrData;
  S       sId;
  LogProb lgprMax;

 public:

  // Constructor / destructor methods...
  TrellNode ( ) { }
  TrellNode ( const Index& indS, const S& sI, const B& bDat, LogProb lgpr)
    { indSource=indS; sId=sI; lgprMax=lgpr; backptrData=bDat; /* fo = -1; */ }

  // Specification methods...
  const Index& setSource  ( ) const { return indSource; }
  const B&     setBackData( ) const { return backptrData; }
  const S&     setId      ( ) const { return sId; }
  LogProb&     setScore   ( )       { return lgprMax; }

  // Extraction methods...
  bool operator== ( const TrellNode<S,B>& tnsb ) const { return(sId==tnsb.sId); }
//  size_t       getHashKey ( ) const { return sId.getHashKey(); }
  const Index& getSource  ( ) const { return indSource; }
  const B&     getBackData( ) const { return backptrData; }
  const S&     getId      ( ) const { return sId; }
  LogProb      getLogProb ( ) const { return lgprMax; }
  LogProb      getScore   ( ) const { return lgprMax; }

  // Input / output methods...
  friend ostream& operator<< ( ostream& os, const TrellNode& tn ) { return os<<tn.indSource<<","<<tn.backptrData<<","<<tn.sId<<","<<tn.lgprMax; }
};


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
//  HMMLoop
//
////////////////////////////////////////////////////////////////////////////////

template <class MY, class MX, class S=typename MY::RandVarType, class B=NullBackDat<typename MY::RandVarType> >
class HMMLoop {
 private:
  typedef std::pair<Index,B> IB;
  // Data members...
  MY modY;
  MX modX;
  SafeArray2D<Id<Frame>,Id<int>,TrellNode<S,B> > aatnTrellis;
  const int BEAM_WIDTH, LOOP_LENGTH;
  Frame  frameLast;
  int    iNextNode;
 public:
  // Static member varaibles...
  static bool OUTPUT_QUIET;
  static bool OUTPUT_NOISY;
  static bool OUTPUT_VERYNOISY;
//  static int  BEAM_WIDTH;
  // Constructor / destructor methods...
  HMMLoop ( int, const char*[], int, int, const S& ) ;
  // Specification methods...
//  void init         ( int, int, const S& ) ;
//  void init         ( int, int, SafeArray1D<Id<int>,pair<S,LogProb> >* );
  const TrellNode<S,B>& update       ( const typename MX::RandVarType& ) ;
  const TrellNode<S,B>& getTrellNode ( Frame t, Index i ) { return aatnTrellis.get(t%LOOP_LENGTH,i); }
  TrellNode<S,B>&       setTrellNode ( Frame t, Index i ) { return aatnTrellis.set(t%LOOP_LENGTH,i); }

 /*
  void updateSerial ( const typename MX::RandVarType& ) ;
  void updatePara   ( const typename MX::RandVarType& ) ;
  void each         ( const typename MX::RandVarType&, Beam<LogProb,S,IB>&, SafeArray1D<Id<int>,std::pair<std::pair<S,IB>,LogProb> >& ) ;
  // Extraction methods...
  const TrellNode<S,B>& getTrellNode ( int i ) const { return aatnTrellis.get(frameLast,i); }
  int getBeamUsed ( int ) const ;
  // Input / output methods...
  void writeMLS  ( FILE* ) const ;
  void writeMLS  ( FILE*, const S& ) const ;
  double getCurrSum(int) const;
  void writeCurr ( FILE*, int ) const ;
  void writeCurrSum ( FILE*, int ) const ;
  void gatherElementsInBeam( SafeArray1D<Id<int>,pair<S,LogProb> >* result, int f ) const;
  void writeCurrEntropy ( FILE*, int ) const;
  //void writeCurrDepths ( FILE*, int ) const;
  void writeFoll ( FILE*, int, int, const typename MX::RandVarType& ) const ;
  void writeFollRanked ( FILE*, int, int, const typename MX::RandVarType&, bool ) const ;
  std::list<string> getMLS() const;
  std::list<TrellNode<S,B> > getMLSnodes() const;
  std::list<string> getMLS(const S&) const;
  std::list<TrellNode<S,B> > getMLSnodes(const S&) const;
 */
};
template <class MY, class MX, class S, class B> bool HMMLoop<MY,MX,S,B>::OUTPUT_QUIET     = false;
template <class MY, class MX, class S, class B> bool HMMLoop<MY,MX,S,B>::OUTPUT_NOISY     = false;
template <class MY, class MX, class S, class B> bool HMMLoop<MY,MX,S,B>::OUTPUT_VERYNOISY = false;
//template <class MY, class MX, class S, class B> int  HMMLoop<MY,MX,S,B>::BEAM_WIDTH       = 1;


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

template <class MY, class MX, class S, class B>
HMMLoop<MY,MX,S,B>::HMMLoop ( int nArgs, const char* apsArgs[], int w, int l, const S& sInit ) : BEAM_WIDTH(w), LOOP_LENGTH(l) {

  // For each model file in command line arguments...
  for ( int iArg=1; iArg<nArgs; iArg++ ) {

    // Try to open model file...
    FILE* pf = fopen(apsArgs[iArg],"r");
    // Complain if can't open model file...
    if ( NULL == pf ) {
      cout<<"ERROR: can't open file '"<<apsArgs[iArg]<<"'!\n";
      cout<<"Terminating process with failure code 1.\n";
      exit(1);
    }

    // Initialize stream buffer and line number...
    IStreamSource iss(pf);
    int linenum=0;

    cout<<"Reading file '"<<apsArgs[iArg]<<"'...\n";

    // For each line of input...
    for ( IStream is(iss),is1; IStream()!=is; is=is1,iss.compress() ) {

      // Increment line number...
      linenum++;
      // Count off every 100K lines...
      if (linenum%100000==0) cout<<"  Reading line "<<linenum<<"...\n";

      // Try to read each line into each model...
      String s;
      if ( (is1=(is>>"#">>s>>"\n")) == IStream() &&
           (is1=(is>>modY>>  "\n")) == IStream() &&
           (is1=(is>>modX>>  "\n")) == IStream() &&
           (is1=(is>>s   >>  "\n")) != IStream() )
        // Complain if bad format...
        cout<<"  ERROR in '"<<apsArgs[iArg]<<"', line "<<linenum<<": can't process '"<<s<<"'!\n";
    }
    cout<<"Done reading file '"<<apsArgs[iArg]<<"'.\n";
    fclose(pf);
  }
  cout<<"Done reading all model files.\n";
  //modY.dump(cout,"Y");
  //modX.dump(cout,"X");

  // Alloc trellis...
  aatnTrellis.init(LOOP_LENGTH,BEAM_WIDTH);
  frameLast=LOOP_LENGTH;
  // Set initial element at first time slice...
  setTrellNode(frameLast,0) = TrellNode<S,B> ( Index(0), sInit, B(), 0 ) ;

  cout<<"Begin processing input...\n";
  IStreamSource iss(stdin);
  typename MX::RandVarType x;

  // For each frame...
  for ( IStream is(iss); is!=IStream(); iss.compress() ) {

//    // Show beam...
//    cout<<"-----BEAM:t="<<frameLast-LOOP_LENGTH<<"-----\n";
//    for(int i=0;i<BEAM_WIDTH;i++)
//      cout<<getTrellNode(frameLast,i)<<"\n";
//    cout<<"--------------\n";

    // Read spectrum (as frame audio)...
    is=is>>x;

//    // Show spectrum...
//    cout<<frameLast-2*LOOP_LENGTH+1<<" "<<x<<"\n";
//    // Show spectrum with bin numbers...
//    cout<<frameLast-2*LOOP_LENGTH+1;
//    for(int i=0; i<NUM_FREQUENCIES; i++)
//      cout<<((i==0)?' ':',')<<i<<":"<<x.get(i);
//    cout<<"\n";

    // Update trellis...
    const TrellNode<S,B>& tn = update(x);

    // Show recognized hidden variable values...
    cout<<frameLast-2*LOOP_LENGTH+1<<":'"<<tn<<"'\n";
    cout.flush();
  }
  cout<<"Done processing input.\n";
}


////////////////////////////////////////////////////////////////////////////////

template <class A, class B>
inline bool outRank ( const quad<A,B,LogProb,Id<int> >& a1,
                      const quad<A,B,LogProb,Id<int> >& a2 ) { return (a1.third>a2.third); }

template <class MY, class MX, class S, class B>
const TrellNode<S,B>& HMMLoop<MY,MX,S,B>::update ( const typename MX::RandVarType& x ) {

  // Increment frame counter...
  frameLast++;

  // Init beam for new frame...
  Beam<LogProb,S,IB> btn(BEAM_WIDTH);
  SafeArray1D<Id<int>,std::pair<std::pair<S,IB>,LogProb> > atnSorted (BEAM_WIDTH);

  typedef quad<int,typename MY::IterVal,LogProb,Id<int> > SHPI;
  Heap < SHPI, outRank<int,typename MY::IterVal > > ashpiQueue;
  SHPI shpi, shpiTop;
  int aCtr;

  ashpiQueue.clear();
  //shpi.first  = -1;
  //shpi.second = YModel::IterVal();
  //shpi.third  = 1.0;
  shpi.first = 0;
  shpi.third  = getTrellNode(frameLast-1,shpi.first).getScore();
  shpi.third *= modY.setIterProb ( shpi.second, getTrellNode(frameLast-1,shpi.first).getId(), aCtr=-1 );   // , x, aCtr=-1 );
  //S s; modY.setTrellDat(s,shpi.second);
  shpi.fourth = -1;
  ////cerr<<"????? "<<shpi<<"\n";
  ashpiQueue.enqueue(shpi);

  bool bFull=false;

  // For each ranked value of transition destination...
  for ( int iTrg=0; !bFull && ashpiQueue.getSize()>0; iTrg++ ) {
    // Iterate A* (best-first) search until a complete path is at the top of the queue...
    while ( ashpiQueue.getSize() > 0 && ashpiQueue.getTop().fourth < MY::IterVal::NUM_ITERS ) {
      // Remove top...
      shpiTop = ashpiQueue.dequeueTop();
      // Fork off (try to advance each elementary variable a)...
      for ( int a=shpiTop.fourth.toInt(); a<=MY::IterVal::NUM_ITERS; a++ ) {
        // Copy top into new queue element...
        shpi = shpiTop;
        // At variable position -1, advance beam element for transition source...
        if ( a == -1 ) shpi.first++;
        // Incorporate prob from transition source...
        shpi.third = getTrellNode(frameLast-1,shpi.first).getScore();
        if ( shpi.third > LogProb() ) {
          // Try to advance variable at position a and return probability (subsequent variables set to first, probability ignored)...
          shpi.third *= modY.setIterProb ( shpi.second, getTrellNode(frameLast-1,shpi.first).getId(), aCtr=a );   // , x, aCtr=a );
          // At end of variables, incorporate observation probability...
          if ( a == MY::IterVal::NUM_ITERS && shpi.fourth != MY::IterVal::NUM_ITERS )
            shpi.third *= modX.getProb ( x, S(shpi.second) );
            //// { S s; modY.setTrellDat(s,shpi.second); shpi.third *= modX.getProb(x,s); }
          // Record variable position at which this element was forked off...
          shpi.fourth = a;
          //cerr<<" from partial: "<<shpiTop<<"\n   to partial: "<<shpi<<"\n";
          if ( shpi.third > LogProb() ) {
            ////if ( frameLast == 4 ) cerr<<" from partial: "<<shpiTop<<"\n   to partial: "<<shpi<<"\n";
            // If valid, add to queue...
            ashpiQueue.enqueue(shpi);
            //cerr<<"--------------------\n"<<ashpiQueue;
          }
        }
      }
      // Remove top...
      //cerr<<"/-----A-----\\\n"<<ashpiQueue<<"\\-----A-----/\n";
      //if ( ashpiQueue.getTop().fourth != MY::IterVal::NUM_ITERS ) ashpiQueue.dequeueTop();
      ////cerr<<"/-----B-----\\\n"<<ashpiQueue<<"\\-----B-----/\n";
      ////cerr<<ashpiQueue.getSize()<<" queue elems, "<<ashpiQueue.getTop()<<"\n";
    }

    ////cerr<<"-----*-----\n"<<ashpiQueue<<"-----*-----\n";
    ////cerr<<ashpiQueue.getSize()<<" queue elems **\n";

    // Add best transition (top of queue)...
    //modX.getProb(o,modY.setTrellDat(ashpiQueue.getTop().first,ashpiQueue.getTop().second));
    if ( ashpiQueue.getSize() > 0 ) {
      S s ( ashpiQueue.getTop().second );
      ////S s; modY.setTrellDat(s,ashpiQueue.getTop().second); 
      bFull |= btn.tryAdd ( s, IB(ashpiQueue.getTop().first,B(ashpiQueue.getTop().second)), ashpiQueue.getTop().third );
      ////cerr<<ashpiQueue.getSize()<<" queue elems A "<<ashpiQueue.getTop()<<"\n";
      ////cerr<<"/-----A-----\\\n"<<ashpiQueue<<"\\-----A-----/\n";
      ashpiQueue.dequeueTop();
      ////cerr<<"/-----B-----\\\n"<<ashpiQueue<<"\\-----B-----/\n";
      ////cerr<<ashpiQueue.getSize()<<" queue elems B "<<ashpiQueue.getTop()<<"\n";
      //cerr<<"."; cerr.flush();
    }
  }

  ////cerr<<"-----*-----\n"<<ashpiQueue<<"-----*-----\n";

  btn.sort(atnSorted);

  // Copy sorted beam to trellis...
  Index iOriginOfBest;
  int j=0;
  for(int i=0;i<BEAM_WIDTH;i++) {
    const std::pair<std::pair<S,IB>,LogProb>* tn1 = &atnSorted.get(i);
    Index iOrigin = tn1->first.second.first;
    // Determine origin at beginning of loop...
    for ( Frame t=frameLast-1; t>frameLast-LOOP_LENGTH+1; t-- )
      iOrigin = getTrellNode(t,iOrigin).getSource();
    if ( 0 == i ) iOriginOfBest = iOrigin;
    // If new hypothesis has same origin, add to beam...
    if ( iOriginOfBest == iOrigin ) {
      setTrellNode(frameLast,j++)=TrellNode<S,B>(tn1->first.second.first,
                                                 tn1->first.first,
                                                 tn1->first.second.second,
                                                 tn1->second);
    }
  }
  // Clear out rest of beam...
  for ( ; j<BEAM_WIDTH; j++ )
    setTrellNode(frameLast,j) = TrellNode<S,B>();

  ////modY.update();

  return getTrellNode(frameLast-LOOP_LENGTH+1,iOriginOfBest);
}




#endif //_NL_HMMLOOP_