Welcome to mirror list, hosted at ThFree Co, Russian Federation.

NonTermContextProperty.cpp « PP « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3126e90d9635e1a076ae62261abd4d878e6d3ff4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include "moses/PP/NonTermContextProperty.h"
#include <string>
#include <cassert>
#include "moses/Util.h"
#include "moses/FactorCollection.h"

using namespace std;

namespace Moses
{
NonTermContextProperty::NonTermContextProperty()
{
}

NonTermContextProperty::~NonTermContextProperty()
{
  //RemoveAllInColl(m_probStores);
}

void NonTermContextProperty::ProcessValue(const std::string &value)
{
  vector<string> toks;
  Tokenize(toks, value);

  FactorCollection &fc = FactorCollection::Instance();

  size_t numNT = Scan<size_t>(toks[0]);
  m_probStores.resize(numNT);

  size_t ind = 1;
  while (ind < toks.size()) {
    vector<const Factor *> factors;

    for (size_t nt = 0; nt < numNT; ++nt) {
      size_t ntInd = Scan<size_t>(toks[ind]);
      assert(nt == ntInd);
      ++ind;

      for (size_t contextInd = 0; contextInd < 4; ++contextInd) {
        //cerr << "toks[" << ind << "]=" << toks[ind] << endl;
        const Factor *factor = fc.AddFactor(toks[ind], false);
        factors.push_back(factor);
        ++ind;
      }
    }

    // done with the context. Just get the count and put it all into data structures
    // cerr << "count=" << toks[ind] << endl;
    float count = Scan<float>(toks[ind]);
    ++ind;

    for (size_t i = 0; i < factors.size(); ++i) {
      size_t ntInd = i / 4;
      size_t contextInd = i % 4;
      const Factor *factor = factors[i];
      AddToMap(ntInd, contextInd, factor, count);
    }
  }
}

void NonTermContextProperty::AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count)
{
  if (ntIndex <= m_probStores.size()) {
    m_probStores.resize(ntIndex + 1);
  }

  ProbStore &probStore = m_probStores[ntIndex];
  probStore.AddToMap(index, factor, count);
}

float NonTermContextProperty::GetProb(size_t ntInd,
                                      size_t contextInd,
                                      const Factor *factor,
                                      float smoothConstant) const
{
  UTIL_THROW_IF2(ntInd >= m_probStores.size(), "Invalid nt index=" << ntInd);
  const ProbStore &probStore = m_probStores[ntInd];
  float ret = probStore.GetProb(contextInd, factor, smoothConstant);
  return ret;
}

//////////////////////////////////////////

void NonTermContextProperty::ProbStore::AddToMap(size_t index, const Factor *factor, float count)
{
  Map &map = m_vec[index];

  Map::iterator iter = map.find(factor);
  if (iter == map.end()) {
    map[factor] = count;
  } else {
    float &currCount = iter->second;
    currCount += count;
  }

  m_totalCount += count;
}


float NonTermContextProperty::ProbStore::GetProb(size_t contextInd,
    const Factor *factor,
    float smoothConstant) const
{
  float count = GetCount(contextInd, factor, smoothConstant);
  float total = GetTotalCount(contextInd, smoothConstant);
  float ret = count / total;
  return ret;
}

float NonTermContextProperty::ProbStore::GetCount(size_t contextInd,
    const Factor *factor,
    float smoothConstant) const
{
  const Map &map = m_vec[contextInd];

  float count = smoothConstant;
  Map::const_iterator iter = map.find(factor);
  if (iter == map.end()) {
    // nothing
  } else {
    count += iter->second;
  }

  return count;
}

float NonTermContextProperty::ProbStore::GetTotalCount(size_t contextInd, float smoothConstant) const
{
  const Map &map = m_vec[contextInd];
  return m_totalCount + smoothConstant * map.size();
}


} // namespace Moses