moses/LM/Implementation.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#ifndef moses_LanguageModelImplementation_h
#define moses_LanguageModelImplementation_h

#include <string>
#include <vector>
#include "moses/Factor.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/Word.h"
#include "Base.h"

#include <boost/shared_ptr.hpp>

namespace Moses
{

class FactorCollection;
class Factor;
class Phrase;

//! to be returned from LM functions
struct LMResult {
  // log probability
  float score;
  // Is the word unknown?
  bool unknown;
};

//! Abstract base class which represent a language model on a contiguous phrase
class LanguageModelImplementation : public LanguageModel
{
  // default constructor is ok

  void ShiftOrPush(std::vector<const Word*> &contextFactor, const Word &word) const;

protected:
  std::string	m_filePath;
  size_t			m_nGramOrder; //! max n-gram length contained in this LM
  Word m_sentenceStartWord, m_sentenceEndWord; //! Contains factors which represents the beging and end words for this LM.
  //! Usually <s> and </s>

  LanguageModelImplementation(const std::string &line);

public:

  virtual ~LanguageModelImplementation() {}

  void SetParameter(const std::string& key, const std::string& value);

  /* get score of n-gram. n-gram should not be bigger than m_nGramOrder
   * Specific implementation can return State and len data to be used in hypothesis pruning
   * \param contextFactor n-gram to be scored
   * \param state LM state.  Input and output.  state must be initialized.  If state isn't initialized, you want GetValueWithoutState.
   */
  virtual LMResult GetValueGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const;

  // Like GetValueGivenState but state may not be initialized (however it is non-NULL).
  // For example, state just came from NewState(NULL).
  virtual LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const = 0;

  //! get State for a particular n-gram.  We don't care what the score is.
  // This is here so models can implement a shortcut to GetValueAndState.
  virtual void GetState(const std::vector<const Word*> &contextFactor, FFState &outState) const;

  virtual const FFState *GetNullContextState() const = 0;
  virtual const FFState *GetBeginSentenceState() const = 0;
  virtual FFState *NewState(const FFState *from = NULL) const = 0;

  virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;

  FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;

  FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const;

  void updateChartScore(float *prefixScore, float *finalScore, float score, size_t wordPos) const;

  //! max n-gram order of LM
  size_t GetNGramOrder() const {
    return m_nGramOrder;
  }

  //! Contains factors which represents the beging and end words for this LM. Usually <s> and </s>
  const Word &GetSentenceStartWord() const {
    return m_sentenceStartWord;
  }
  const Word &GetSentenceEndWord() const {
    return m_sentenceEndWord;
  }

  const FFState* EmptyHypothesisState(const InputType &/*input*/) const {
    return NewState(GetBeginSentenceState());
  }

};

}

#endif