Welcome to mirror list, hosted at ThFree Co, Russian Federation.

phraselm.cpp « memscore « contrib - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 0f94f43265d18e0005eb1dbeb4010671af1a9d7d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// memscore - in-memory phrase scoring for Statistical Machine Translation
// Christian Hardmeier, FBK-irst, Trento, 2010
// $Id$

#include <cmath>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>

#include <n_gram.h>
#include <lmtable.h>

#include "phrasetable.h"
#include "phraselm.h"

void PhraseLanguageModel::attach(PhraseInfoList &pilist)
{
  phrase_info_list_ = &pilist;
  score_idx_ = pilist.register_data(1);
}

void PhraseLanguageModel::compute_statistic()
{
  compute_lmscores(*phrase_info_list_, false);
}

void PhraseLanguageModel::compute_lmscores(PhraseInfoList &phrase_info_list, bool closed_world)
{
  lmtable lm;
  std::ifstream lmstream(lmfile_.c_str());
  lm.load(lmstream, lmfile_.c_str(), NULL, 0);
  lm.setlogOOVpenalty(10000000);

  assert(!computation_done_);

  Score marginal_score = .0;
  for(PhraseInfoList::iterator it = phrase_info_list.begin(); it != phrase_info_list.end(); ++it) {
    PhraseInfo &pi = *it;
    ngram ng(lm.getDict());
    Score lmscore = 0;
    for(PhraseText::const_string_iterator it = pi.get_phrase().string_begin(); it != pi.get_phrase().string_end(); it++) {
      ng.pushw(it->c_str());
      lmscore += lm.clprob(ng);
    }

    pi.data(score_idx_) = exp10(lmscore);
    marginal_score += pi.data(score_idx_);
  }

  if(closed_world)
    for(PhraseInfoList::iterator it = phrase_info_list.begin(); it != phrase_info_list.end(); ++it) {
      PhraseInfo &pi = *it;
      pi.data(score_idx_) /= marginal_score;
    }

  computation_done_ = true;
}

void ClosedPhraseLanguageModel::compute_statistic()
{
  compute_lmscores(*phrase_info_list_, true);
}