Welcome to mirror list, hosted at ThFree Co, Russian Federation.

pcfg.cc « syntax-common « phrase-extract - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3efe04218b178edf170c20c49322eb3263a24fb7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include "pcfg.h"

#include <cassert>

#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>

#include "syntax-common/exception.h"

namespace MosesTraining {
namespace Syntax {

void Pcfg::Add(const Key &key, double score) {
  rules_[key] = score;
}

bool Pcfg::Lookup(const Key &key, double &score) const {
  Map::const_iterator p = rules_.find(key);
  if (p == rules_.end()) {
    return false;
  }
  score = p->second;
  return true;
}

void Pcfg::Read(std::istream &input, Vocabulary &vocab) {
  std::string line;
  std::string lhs_string;
  std::vector<std::string> rhs_strings;
  std::string score_string;
  Key key;
  while (std::getline(input, line)) {
    // Read LHS.
    std::size_t pos = line.find("|||");
    if (pos == std::string::npos) {
      throw Exception("missing first delimiter");
    }
    lhs_string = line.substr(0, pos);
    boost::trim(lhs_string);

    // Read RHS.
    std::size_t begin = pos+3;
    pos = line.find("|||", begin);
    if (pos == std::string::npos) {
      throw Exception("missing second delimiter");
    }
    std::string rhs_text = line.substr(begin, pos-begin);
    boost::trim(rhs_text);
    rhs_strings.clear();
    boost::split(rhs_strings, rhs_text, boost::algorithm::is_space(),
                 boost::algorithm::token_compress_on);

    // Read score.
    score_string = line.substr(pos+3);
    boost::trim(score_string);

    // Construct key.
    key.clear();
    key.reserve(rhs_strings.size()+1);
    key.push_back(vocab.Insert(lhs_string));
    for (std::vector<std::string>::const_iterator p = rhs_strings.begin();
         p != rhs_strings.end(); ++p) {
      key.push_back(vocab.Insert(*p));
    }

    // Add rule.
    double score = boost::lexical_cast<double>(score_string);
    Add(key, score);
  }
}

void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const {
  for (const_iterator p = begin(); p != end(); ++p) {
    const Key &key = p->first;
    double score = p->second;
    std::vector<std::size_t>::const_iterator q = key.begin();
    std::vector<std::size_t>::const_iterator end = key.end();
    output << vocab.Lookup(*q++) << " |||";
    while (q != end) {
      output << " " << vocab.Lookup(*q++);
    }
    output << " ||| " << score << std::endl;
  }
}

}  // namespace Syntax
}  // namespace MosesTraining