Welcome to mirror list, hosted at ThFree Co, Russian Federation.

LatticeMBR.h « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 5fa47949d4a51e4e283f6da9d19ca6953c22f71e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*
 *  LatticeMBR.h
 *  moses-cmd
 *
 *  Created by Abhishek Arun on 26/01/2010.
 *  Copyright 2010 __MyCompanyName__. All rights reserved.
 *
 */

#ifndef moses_cmd_LatticeMBR_h
#define moses_cmd_LatticeMBR_h

#include <map>
#include <vector>
#include <set>
#include "moses/Hypothesis.h"
#include "moses/Manager.h"
#include "moses/TrellisPathList.h"



namespace Moses
{

class Edge;

typedef std::vector< const Moses::Hypothesis *> Lattice;
typedef std::vector<const Edge*> Path;
typedef std::map<Path, size_t> PathCounts;
typedef std::map<Moses::Phrase, PathCounts > NgramHistory;

class Edge
{
  const Moses::Hypothesis* m_tailNode;
  const Moses::Hypothesis* m_headNode;
  float m_score;
  Moses::TargetPhrase m_targetPhrase;
  NgramHistory m_ngrams;

public:
  Edge(const Moses::Hypothesis* from, const Moses::Hypothesis* to, float score, const Moses::TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
    //cout << "Creating new edge from Node " << from->GetId() << ", to Node : " << to->GetId() << ", score: " << score << " phrase: " << targetPhrase << endl;
  }

  const Moses::Hypothesis* GetHeadNode() const {
    return m_headNode;
  }

  const Moses::Hypothesis* GetTailNode() const {
    return m_tailNode;
  }

  float GetScore() const {
    return m_score;
  }

  size_t GetWordsSize() const {
    return m_targetPhrase.GetSize();
  }

  const Moses::Phrase& GetWords() const {
    return m_targetPhrase;
  }

  friend std::ostream& operator<< (std::ostream& out, const Edge& edge);

  const NgramHistory&  GetNgrams(  std::map<const Moses::Hypothesis*, std::vector<Edge> > & incomingEdges) ;

  bool operator < (const Edge & compare) const;

  void GetPhraseSuffix(const Moses::Phrase& origPhrase, size_t lastN, Moses::Phrase& targetPhrase) const;

  void storeNgramHistory(const Moses::Phrase& phrase, Path & path, size_t count = 1) {
    m_ngrams[phrase][path]+= count;
  }

};

/**
* Data structure to hold the ngram scores as we traverse the lattice. Maps (hypo,ngram) to score
*/
class NgramScores
{
public:
  NgramScores() {}

  /** logsum this score to the existing score */
  void addScore(const Moses::Hypothesis* node, const Moses::Phrase& ngram, float score);

  /** Iterate through ngrams for selected node */
  typedef std::map<const Moses::Phrase*, float>::const_iterator NodeScoreIterator;
  NodeScoreIterator nodeBegin(const Moses::Hypothesis* node);
  NodeScoreIterator nodeEnd(const Moses::Hypothesis* node);

private:
  std::set<Moses::Phrase> m_ngrams;
  std::map<const Moses::Hypothesis*, std::map<const Moses::Phrase*, float> > m_scores;
};


/** Holds a lattice mbr solution, and its scores */
class LatticeMBRSolution
{
public:
  /** Read the words from the path */
  LatticeMBRSolution(const Moses::TrellisPath& path, bool isMap);
  const std::vector<float>& GetNgramScores() const {
    return m_ngramScores;
  }
  const std::vector<Moses::Word>& GetWords() const {
    return m_words;
  }
  float GetMapScore() const {
    return m_mapScore;
  }
  float GetScore() const {
    return m_score;
  }

  /** Initialise ngram scores */
  void CalcScore(std::map<Moses::Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);

private:
  std::vector<Moses::Word> m_words;
  float m_mapScore;
  std::vector<float> m_ngramScores;
  float m_score;
};

struct LatticeMBRSolutionComparator {
  bool operator()(const LatticeMBRSolution& a, const LatticeMBRSolution& b) {
    return a.GetScore() > b.GetScore();
  }
};

void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*, std::set <const Moses::Hypothesis* > > & outgoingHyps, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges,
                    const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale);

//Use the ngram scores to rerank the nbest list, return at most n solutions
void getLatticeMBRNBest(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
//calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true.
void calcNgramExpectations(Lattice & connectedHyp, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Moses::Phrase,
                           float>& finalNgramScores, bool posteriors);
void GetOutputFactors(const Moses::TrellisPath &path, std::vector <Moses::Word> &translation);
void extract_ngrams(const std::vector<Moses::Word >& sentence, std::map < Moses::Phrase, int >  & allngrams);
bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b);
std::vector<Moses::Word> doLatticeMBR(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList);
const Moses::TrellisPath doConsensusDecoding(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList);
//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);

}

#endif