blob: 0f94f43265d18e0005eb1dbeb4010671af1a9d7d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
// memscore - in-memory phrase scoring for Statistical Machine Translation
// Christian Hardmeier, FBK-irst, Trento, 2010
// $Id$
#include <cmath>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <n_gram.h>
#include <lmtable.h>
#include "phrasetable.h"
#include "phraselm.h"
void PhraseLanguageModel::attach(PhraseInfoList &pilist)
{
phrase_info_list_ = &pilist;
score_idx_ = pilist.register_data(1);
}
void PhraseLanguageModel::compute_statistic()
{
compute_lmscores(*phrase_info_list_, false);
}
void PhraseLanguageModel::compute_lmscores(PhraseInfoList &phrase_info_list, bool closed_world)
{
lmtable lm;
std::ifstream lmstream(lmfile_.c_str());
lm.load(lmstream, lmfile_.c_str(), NULL, 0);
lm.setlogOOVpenalty(10000000);
assert(!computation_done_);
Score marginal_score = .0;
for(PhraseInfoList::iterator it = phrase_info_list.begin(); it != phrase_info_list.end(); ++it) {
PhraseInfo &pi = *it;
ngram ng(lm.getDict());
Score lmscore = 0;
for(PhraseText::const_string_iterator it = pi.get_phrase().string_begin(); it != pi.get_phrase().string_end(); it++) {
ng.pushw(it->c_str());
lmscore += lm.clprob(ng);
}
pi.data(score_idx_) = exp10(lmscore);
marginal_score += pi.data(score_idx_);
}
if(closed_world)
for(PhraseInfoList::iterator it = phrase_info_list.begin(); it != phrase_info_list.end(); ++it) {
PhraseInfo &pi = *it;
pi.data(score_idx_) /= marginal_score;
}
computation_done_ = true;
}
void ClosedPhraseLanguageModel::compute_statistic()
{
compute_lmscores(*phrase_info_list_, true);
}
|