Welcome to mirror list, hosted at ThFree Co, Russian Federation.

PrefixTreeMap.h « src « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 35556b4cbf8bbda3008c47c53ae7a591681aa1e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#ifndef PREFIX_TREE_MAP_H
#define PREFIX_TREE_MAP_H

#include<vector>
#include<climits>
#include<iostream>

#include "PrefixTree.h"
#include "File.h"
#include "LVoc.h"
#include "ObjectPool.h"



typedef PrefixTreeF<LabelId,OFF_T> PTF;
typedef FilePtr<PTF>               CPT;
typedef std::vector<CPT>           Data;
typedef LVoc<std::string>          WordVoc;

class GenericCandidate {
public:
  typedef std::vector<IPhrase>              PhraseList;
  typedef std::vector< std::vector<float> > ScoreList;
public:
  GenericCandidate(){
  };
  GenericCandidate(const GenericCandidate& other)
    : m_PhraseList(other.m_PhraseList), m_ScoreList(other.m_ScoreList) {
  };
  GenericCandidate(const PhraseList& p, const ScoreList& s)
	: m_PhraseList(p), m_ScoreList(s) {
  };
  ~GenericCandidate(){
  };
public:
  size_t NumPhrases() const {
    return m_PhraseList.size();
  };
  size_t NumScores()  const {
    return m_ScoreList.size();
  };
  const IPhrase& GetPhrase(unsigned int i) const {
    return m_PhraseList.at(i);
  }
  const std::vector<float>& GetScore(unsigned int i) const {
    return m_ScoreList.at(i);
  }
  void readBin(FILE* f);
  void writeBin(FILE* f) const;
private:
  PhraseList m_PhraseList;
  ScoreList  m_ScoreList;
};

/*
class PPtr {
 public:
  typedef unsigned IndexType;
 public:
  PPtr(PTF const* p, IndexType i, bool isRoot)
    : m_Ptr(p), m_Index(i), m_IsRoot(isRoot){
  };
  ~PPtr(){
  };
};
*/

struct PPimp {
         PTF const*p;unsigned idx;bool root;
         
         PPimp(PTF const* x,unsigned i,bool b) : p(x),idx(i),root(b) {}
         bool isValid() const {return root || (p && idx<p->size());}
 
         bool isRoot() const {return root;}
         PTF const* ptr() const {return p;}
};


class Candidates : public std::vector<GenericCandidate> {
  typedef std::vector<GenericCandidate> MyBase;
 public:
  Candidates() : MyBase() {
  };
	void writeBin(FILE* f) const;
	void readBin(FILE* f);
};

class PrefixTreeMap {
 public:
  PrefixTreeMap() : m_FileSrc(0), m_FileTgt(0) {
    PTF::setDefault(InvalidOffT);
  }
  ~PrefixTreeMap() {
    if(m_FileSrc) {fClose(m_FileSrc);}
    if(m_FileTgt) {fClose(m_FileTgt);}
    FreeMemory();
  }
 public:
  static const LabelId MagicWord; 
 public:
  void FreeMemory();

  int Read(const std::string& fileNameStem, int numVocs = -1);
	
  void GetCandidates(const IPhrase& key, Candidates* cands);
  void GetCandidates(const PPimp& p, Candidates* cands);

  std::vector< std::string const * > ConvertPhrase(const IPhrase& p, unsigned int voc) const;
  IPhrase ConvertPhrase(const std::vector< std::string >& p, unsigned int voc) const;
  LabelId ConvertWord(const std::string& w, unsigned int voc) const;
  std::string ConvertWord(LabelId w, unsigned int voc) const;
public: //low level 
  PPimp* GetRoot();
  PPimp* Extend(PPimp* p, LabelId wi);
  PPimp* Extend(PPimp* p, const std::string w, size_t voc){
    return Extend(p, ConvertWord(w,voc));
  }
 private:
  Data  m_Data;
  FILE* m_FileSrc; 
  FILE* m_FileTgt;

  std::vector<WordVoc*> m_Voc;
  ObjectPool<PPimp>     m_PtrPool; 
};

#endif //PREFIX_TREE_MAP_H