Welcome to mirror list, hosted at ThFree Co, Russian Federation.

KBestExtractor.h « Syntax « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 15cf0e3c874a908c4babdfe3c80a9c7bc7c39779 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#pragma once

#include <cassert>

#include <queue>
#include <vector>

#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <boost/weak_ptr.hpp>

#include "moses/ScoreComponentCollection.h"
#include "moses/FF/InternalTree.h"

#include "SHyperedge.h"
#include "SVertex.h"

namespace Moses
{
namespace Syntax
{

// k-best list extractor that implements algorithm 3 from this paper:
//
//  Liang Huang and David Chiang
//  "Better k-best parsing"
//  In Proceedings of IWPT 2005
//
class KBestExtractor
{
public:
  struct KVertex;

  struct KHyperedge {
    KHyperedge(const SHyperedge &e) : shyperedge(e) {}

    const SHyperedge &shyperedge;
    boost::shared_ptr<KVertex> head;
    std::vector<boost::shared_ptr<KVertex> > tail;
  };

  struct Derivation {
    Derivation(const boost::shared_ptr<KHyperedge> &);
    Derivation(const Derivation &, std::size_t);

    boost::shared_ptr<KHyperedge> edge;
    std::vector<std::size_t> backPointers;
    std::vector<boost::shared_ptr<Derivation> > subderivations;
    ScoreComponentCollection scoreBreakdown;
    float score;
  };

  struct DerivationOrderer {
    bool operator()(const boost::weak_ptr<Derivation> &d1,
                    const boost::weak_ptr<Derivation> &d2) const {
      boost::shared_ptr<Derivation> s1(d1);
      boost::shared_ptr<Derivation> s2(d2);
      return s1->score < s2->score;
    }
  };

  struct KVertex {
    typedef std::priority_queue<boost::weak_ptr<Derivation>,
            std::vector<boost::weak_ptr<Derivation> >,
            DerivationOrderer> DerivationQueue;

    KVertex(const SVertex &v) : svertex(v), visited(false) {}

    const SVertex &svertex;
    std::vector<boost::weak_ptr<Derivation> > kBestList;
    DerivationQueue candidates;
    bool visited;
  };

  typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;

  // Extract the k-best list from the search hypergraph given the full, sorted
  // list of top-level SVertices.
  void Extract(const std::vector<boost::shared_ptr<SVertex> > &, std::size_t,
               KBestVec &);

  static Phrase GetOutputPhrase(const Derivation &);
  static TreePointer GetOutputTree(const Derivation &);

private:
  typedef boost::unordered_map<const SVertex *,
          boost::shared_ptr<KVertex> > VertexMap;

  struct DerivationHasher {
    std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
      std::size_t seed = 0;
      boost::hash_combine(seed, &(d->edge->shyperedge));
      boost::hash_combine(seed, d->backPointers);
      return seed;
    }
  };

  struct DerivationEqualityPred {
    bool operator()(const boost::shared_ptr<Derivation> &d1,
                    const boost::shared_ptr<Derivation> &d2) const {
      return &(d1->edge->shyperedge) == &(d2->edge->shyperedge) &&
             d1->backPointers == d2->backPointers;
    }
  };

  typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
          DerivationEqualityPred> DerivationSet;

  boost::shared_ptr<KVertex> FindOrCreateVertex(const SVertex &);
  void GetCandidates(boost::shared_ptr<KVertex>, std::size_t);
  void LazyKthBest(boost::shared_ptr<KVertex>, std::size_t, std::size_t);
  void LazyNext(KVertex &, const Derivation &, std::size_t);

  VertexMap m_vertexMap;
  DerivationSet m_derivations;
};

}  // namespace Syntax
}  // namespace Moses