Welcome to mirror list, hosted at ThFree Co, Russian Federation.

sapt_pscore_base.h « UG « TranslationModel « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 68a491145c4e9d663db00e55b8d8299a2336bc6b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// -*- c++ -*-
// Base classes for suffix array-based phrase scorers
// written by Ulrich Germann 
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
#include "util/exception.hh"
#include "boost/format.hpp"

namespace Moses {
  namespace bitext
  {

    // abstract base class that defines the common API for phrase scorers
    template<typename Token>
    class
    PhraseScorer
    {
    protected:
      int m_index;
      int m_num_feats;
      string m_tag;
      vector<string> m_feature_names;
    public:
 
      virtual 
      void 
      operator()(Bitext<Token> const& pt, 
		 PhrasePair<Token>& pp, 
		 vector<float> * dest=NULL) 
	const = 0;

      void
      setIndex(int const i) { m_index = i; }
    
      int
      getIndex() const { return m_index; }

      int 
      fcnt() const { return m_num_feats; }
    
      vector<string> const &
      fnames() const { return m_feature_names; }

      string const &
      fname(int i) const
      { 
	if (i < 0) i += m_num_feats;
	UTIL_THROW_IF2(i < 0 || i >= m_num_feats,
		       "Feature name index out of range at " << HERE);
	return m_feature_names.at(i); 
      }

      virtual
      bool
      isLogVal(int i) const  { return true; }; 
      // is this feature log valued? 
    
      virtual
      bool
      isIntegerValued(int i) const  { return false; }; 
      // is this feature integer valued (e.g., count features)? 

      virtual
      bool
      allowPooling() const { return true; }
      // does this feature function allow pooling of counts if 
      // there are no occurrences in the respective corpus?
      
    };

    // base class for 'families' of phrase scorers that have a single 
    template<typename Token>
    class
    SingleRealValuedParameterPhraseScorerFamily 
      : public PhraseScorer<Token>
    {
    protected:
      vector<float> m_x;

      virtual 
      void 
      init(string const specs) 
      { 
	using namespace boost;
	UTIL_THROW_IF2(this->m_tag.size() == 0, 
		       "m_tag must be initialized in constructor");
	UTIL_THROW_IF2(specs.size() == 0,"empty specification string!");
	UTIL_THROW_IF2(this->m_feature_names.size(),
		       "PhraseScorer can only be initialized once!");
	this->m_index = -1;
	float x; char c;
	for (istringstream buf(specs); buf>>x; buf>>c)
	  {
	    this->m_x.push_back(x);
	    string fname = (format("%s-%.2f") % this->m_tag % x).str();
	    this->m_feature_names.push_back(fname);
	  }
	this->m_num_feats = this->m_x.size();
      }
    };
  } // namespace bitext
} // namespace moses