Welcome to mirror list, hosted at ThFree Co, Russian Federation.

HypothesisStack.h « src « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 6132595e8f67661a34a14f9604cc220ef2d94df6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// $Id$

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#pragma once

#include <limits>
#include <set>
#include "Hypothesis.h"

/** defines less-than relation on hypotheses.
* The particular order is not important for us, we need just to figure out
* which hypothesis are equal based on:
*   the last n-1 target words are the same
*   and the covers (source words translated) are the same
*/
class HypothesisRecombinationOrderer
{
public:
	bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
	{
		// Are the last (n-1) words the same on the target side (n for n-gram LM)?
		int ret = hypoA->NGramCompare(*hypoB);
//		int ret = hypoA->FastNGramCompare(*hypoB, m_NGramMaxOrder - 1);
		if (ret != 0)
		{
			return (ret < 0);
		}

		// same last n-grams. compare source words translated
		const WordsBitmap &bitmapA		= hypoA->GetWordsBitmap()
			, &bitmapB	= hypoB->GetWordsBitmap();
		ret = bitmapA.Compare(bitmapB);

		return (ret < 0);
	}
};

/** Stack for instances of Hypothesis, includes functions for pruning. */ 
class HypothesisStack 
{
private:
	typedef std::set< Hypothesis*, HypothesisRecombinationOrderer > _HCType;
public:
	typedef _HCType::iterator iterator;
	typedef _HCType::const_iterator const_iterator;
	friend std::ostream& operator<<(std::ostream&, const HypothesisStack&);

protected:
	float m_bestScore; /**< score of the best hypothesis in collection */
	float m_worstScore; /**< score of the worse hypthesis in collection */
	float m_beamWidth; /**< minimum score due to threashold pruning */
	size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
	_HCType m_hypos; /**< contains hypotheses */
	bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */

	/** add hypothesis to stack. Prune if necessary. 
	 * Returns false if equiv hypo exists in collection, otherwise returns true
	 */
	std::pair<HypothesisStack::iterator, bool> Add(Hypothesis *hypothesis);

	//! remove hypothesis pointed to by iterator but don't delete the object
	inline void Detach(const HypothesisStack::iterator &iter)
	{
		m_hypos.erase(iter);
	}
	/** destroy all instances of Hypothesis in this collection */
	void RemoveAll();
	/** destroy Hypothesis pointed to by iterator (object pool version) */
	inline void Remove(const HypothesisStack::iterator &iter)
	{
		FREEHYPO(*iter);
		Detach(iter);
	}

public:
	//! iterators
	const_iterator begin() const { return m_hypos.begin(); }
	const_iterator end() const { return m_hypos.end(); }
	size_t size() const { return m_hypos.size(); }

	HypothesisStack();
	~HypothesisStack()
	{
		RemoveAll();
	}

	/** adds the hypo, but only if within thresholds (beamThr, stackSize).
	*	This function will recombine hypotheses silently!  There is no record
	* (could affect n-best list generation...TODO)
	* Call stack for adding hypothesis is
			AddPrune()
				Add()
					AddNoPrune()
	*/
	void AddPrune(Hypothesis *hypothesis);

	/** set maximum number of hypotheses in the collection
   * \param maxHypoStackSize maximum number (typical number: 100)
   */
	inline void SetMaxHypoStackSize(size_t maxHypoStackSize)
	{
		m_maxHypoStackSize = maxHypoStackSize;
	}
	/** set beam threshold, hypotheses in the stack must not be worse than 
    * this factor times the best score to be allowed in the stack
	 * \param beamThreshold minimum factor (typical number: 0.03)
	 */
	inline void SetBeamWidth(float beamWidth)
	{
		m_beamWidth = beamWidth;
	}
	/** return score of the best hypothesis in the stack */
	inline float GetBestScore() const
	{
		return m_bestScore;
	}
	
	/** pruning, if too large.
	 * Pruning algorithm: find a threshold and delete all hypothesis below it.
	 * The threshold is chosen so that exactly newSize top items remain on the 
	 * stack in fact, in situations where some of the hypothesis fell below 
	 * m_beamWidth, the stack will contain less items.
	 * \param newSize maximum size */
	void PruneToSize(size_t newSize);

	//! return the hypothesis with best score. Used to get the translated at end of decoding
	const Hypothesis *GetBestHypothesis() const;
	//! return all hypothesis, sorted by descending score. Used in creation of N best list
	std::vector<const Hypothesis*> GetSortedList() const;
	
	/** make all arcs in point to the equiv hypothesis that contains them. 
	* Ie update doubly linked list be hypo & arcs
	*/
	void CleanupArcList();
	
	TO_STRING();
};