Welcome to mirror list, hosted at ThFree Co, Russian Federation.

Word.cpp « moses2 - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: f272f7cdcc135aa10a42ea11939b94157ddb54ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*
 * Word.cpp
 *
 *  Created on: 23 Oct 2015
 *      Author: hieu
 */
#include <boost/functional/hash_fwd.hpp>
#include <sstream>
#include <vector>
#include "Word.h"
#include "System.h"
#include "legacy/Util2.h"
#include "util/murmur_hash.hh"

using namespace std;

namespace Moses2
{

Word::Word()
{
  Init<const Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
}

Word::Word(const Word &copy)
{
  memcpy(m_factors, copy.m_factors, sizeof(const Factor *) * MAX_NUM_FACTORS);
}

Word::~Word()
{
  // TODO Auto-generated destructor stub
}

void Word::CreateFromString(FactorCollection &vocab, const System &system,
                            const std::string &str)
{
  vector<string> toks = Tokenize(str, "|");
  for (size_t i = 0; i < toks.size(); ++i) {
    const string &tok = toks[i];
    //cerr << "tok=" << tok << endl;
    const Factor *factor = vocab.AddFactor(tok, system, false);
    m_factors[i] = factor;
  }

  // null the rest
  for (size_t i = toks.size(); i < MAX_NUM_FACTORS; ++i) {
    m_factors[i] = NULL;
  }
}

size_t Word::hash() const
{
  uint64_t seed = 0;
  size_t ret = util::MurmurHashNative(m_factors,
                                      sizeof(Factor*) * MAX_NUM_FACTORS, seed);
  return ret;
}

size_t Word::hash(const std::vector<FactorType> &factors) const
{
  size_t seed = 0;
  for (size_t i = 0; i < factors.size(); ++i) {
    FactorType factorType = factors[i];
    const Factor *factor = m_factors[factorType];
    boost::hash_combine(seed, factor);
  }
  return seed;
}


int Word::Compare(const Word &compare) const
{

  int cmp = memcmp(m_factors, compare.m_factors,
                   sizeof(Factor*) * MAX_NUM_FACTORS);
  return cmp;

  /*
   int ret = m_factors[0]->GetString().compare(compare.m_factors[0]->GetString());
   return ret;
   */
}

bool Word::operator<(const Word &compare) const
{
  int cmp = Compare(compare);
  return (cmp < 0);
}

std::string Word::Debug(const System &system) const
{
  stringstream out;
  bool outputAlready = false;
  for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) {
    const Factor *factor = m_factors[i];
    if (factor) {
      if (outputAlready) {
        out << "|";
      }
      out << *factor;
      outputAlready = true;
    }
  }

  return out.str();
}

void Word::OutputToStream(const System &system, std::ostream &out) const
{
  const std::vector<FactorType> &factorTypes = system.options.output.factor_order;
  out << *m_factors[ factorTypes[0] ];

  for (size_t i = 1; i < factorTypes.size(); ++i) {
    FactorType factorType = factorTypes[i];
    const Factor *factor = m_factors[factorType];

    out << "|" << *factor;
  }
}

std::string Word::GetString(const FactorList &factorTypes) const
{
  assert(factorTypes.size());
  std::stringstream ret;

  ret << m_factors[factorTypes[0]]->GetString();
  for (size_t i = 1; i < factorTypes.size(); ++i) {
    FactorType factorType = factorTypes[i];
    ret << "|" << m_factors[factorType];
  }
  return ret.str();
}

}