Welcome to mirror list, hosted at ThFree Co, Russian Federation.

LVoc.h « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c81ccb7cfd62123b6ab59ec57c5e1d54caef6ecb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#ifndef moses_LVoc_h
#define moses_LVoc_h

#include<map>
#include<vector>
#include<iostream>
#include<fstream>
#include <sstream>

typedef unsigned LabelId;
extern const LabelId InvalidLabelId;
extern const LabelId Epsilon;

typedef std::vector<LabelId> IPhrase;

/** class used in phrase-based binary phrase-table.
 *  @todo vocab?
 *  A = type of things to numberize, ie, std::string
 *  B = map type to use, might consider using hash_map for better performance
 */
template<typename A,typename B=std::map<A,LabelId> >
class LVoc
{
  typedef A Key;
  typedef B M;
  typedef std::vector<Key> V;
  M m;
  V data;
public:
  LVoc() {}

  bool isKnown(const Key& k) const {
    return m.find(k)!=m.end();
  }
  LabelId index(const Key& k) const {
    typename M::const_iterator i=m.find(k);
    return i!=m.end()? i->second : InvalidLabelId;
  }
  LabelId add(const Key& k) {
    std::pair<typename M::iterator,bool> p
    =m.insert(std::make_pair(k,data.size()));
    if(p.second) data.push_back(k);
    assert(static_cast<size_t>(p.first->second)<data.size());
    return p.first->second;
  }
  Key const& symbol(LabelId i) const {
    assert(static_cast<size_t>(i)<data.size());
    return data[i];
  }

  typedef typename V::const_iterator const_iterator;
  const_iterator begin() const {
    return data.begin();
  }
  const_iterator end() const {
    return data.end();
  }

  void Write(const std::string& fname) const {
    std::ofstream out(fname.c_str());
    // Little-known fact: ofstream tracks failures but does not, by default,
    // report them.  You have to tell it to, or check for errors yourself.
    out.exceptions(std::ifstream::failbit | std::ifstream::badbit);
    Write(out);
    // Make sure the file is flushed, so that any errors are reported.  If we
    // flush implicitly in the destructor, it won't be able to throw
    // exceptions.
    out.close();
  }
  void Write(std::ostream& out) const {
    for(int i=data.size()-1; i>=0; --i)
      out<<i<<' '<<data[i]<<'\n';
  }
  void Read(const std::string& fname) {
    std::ifstream in(fname.c_str());
    Read(in);
  }
  void Read(std::istream& in) {
    Key k;
    size_t i;
    std::string line;
    while(getline(in,line)) {
      std::istringstream is(line);
      if(is>>i>>k) {
        if(i>=data.size()) data.resize(i+1);
        data[i]=k;
        m[k]=i;
      }
    }
  }
};

#endif