1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef moses_LanguageModelImplementation_h
#define moses_LanguageModelImplementation_h
#include <string>
#include <vector>
#include "moses/Factor.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/Word.h"
#include "Base.h"
#include <boost/shared_ptr.hpp>
namespace Moses
{
class FactorCollection;
class Factor;
class Phrase;
//! to be returned from LM functions
struct LMResult {
// log probability
float score;
// Is the word unknown?
bool unknown;
};
//! Abstract base class which represent a language model on a contiguous phrase
class LanguageModelImplementation : public LanguageModel
{
// default constructor is ok
void ShiftOrPush(std::vector<const Word*> &contextFactor, const Word &word) const;
protected:
std::string m_filePath;
size_t m_nGramOrder; //! max n-gram length contained in this LM
Word m_sentenceStartWord, m_sentenceEndWord; //! Contains factors which represents the beging and end words for this LM.
//! Usually <s> and </s>
LanguageModelImplementation(const std::string &line);
public:
virtual ~LanguageModelImplementation() {}
void SetParameter(const std::string& key, const std::string& value);
/* get score of n-gram. n-gram should not be bigger than m_nGramOrder
* Specific implementation can return State and len data to be used in hypothesis pruning
* \param contextFactor n-gram to be scored
* \param state LM state. Input and output. state must be initialized. If state isn't initialized, you want GetValueWithoutState.
*/
virtual LMResult GetValueGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const;
// Like GetValueGivenState but state may not be initialized (however it is non-NULL).
// For example, state just came from NewState(NULL).
virtual LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const = 0;
//! get State for a particular n-gram. We don't care what the score is.
// This is here so models can implement a shortcut to GetValueAndState.
virtual void GetState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
virtual const FFState *GetNullContextState() const = 0;
virtual const FFState *GetBeginSentenceState() const = 0;
virtual FFState *NewState(const FFState *from = NULL) const = 0;
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const;
void updateChartScore(float *prefixScore, float *finalScore, float score, size_t wordPos) const;
//! max n-gram order of LM
size_t GetNGramOrder() const {
return m_nGramOrder;
}
//! Contains factors which represents the beging and end words for this LM. Usually <s> and </s>
const Word &GetSentenceStartWord() const {
return m_sentenceStartWord;
}
const Word &GetSentenceEndWord() const {
return m_sentenceEndWord;
}
const FFState* EmptyHypothesisState(const InputType &/*input*/) const {
return NewState(GetBeginSentenceState());
}
};
}
#endif
|