Welcome to mirror list, hosted at ThFree Co, Russian Federation.

GenerationDictionary.cpp « src « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 12f3f819255a9ee31c17e7a58cd6562737215ed0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// $Id$

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#include <fstream>
#include <string>
#include "GenerationDictionary.h"
#include "FactorCollection.h"
#include "Word.h"
#include "Util.h"
#include "InputFileStream.h"
#include "StaticData.h"

using namespace std;

GenerationDictionary::GenerationDictionary()
  : Dictionary(1)
{
	const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
}

void GenerationDictionary::Load(const std::vector<FactorType> &input
																			, const std::vector<FactorType> &output
																			, FactorCollection &factorCollection
																			, const std::string &filePath
																			, float weight
																			, FactorDirection direction)
{	
	m_weight = weight;

	//factors	
	m_factorsUsed[Input] = new FactorTypeSet(input);
	m_factorsUsed[Output] = new FactorTypeSet(output);
	
	// data from file
	InputFileStream inFile(filePath);

  m_filename = filePath;
	string line;
	while(getline(inFile, line)) 
	{
		vector<string> token = Tokenize( line );
		
		// add each line in generation file into class
		Word inputWord, outputWord;

		// create word with certain factors filled out

		// inputs
		vector<string> factorString = Tokenize( token[0], "|" );
		for (size_t i = 0 ; i < input.size() ; i++)
		{
			FactorType factorType = input[i];
			const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
			inputWord.SetFactor(factorType, factor);
		}

		factorString = Tokenize( token[1], "|" );
		for (size_t i = 0 ; i < output.size() ; i++)
		{
			FactorType factorType = output[i];
			
			const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
			outputWord.SetFactor(factorType, factor);
		}

		float score		= TransformScore(Scan<float>(token[2]));
		
		m_collection[inputWord][outputWord] = score;					
	}
	inFile.Close();
}

GenerationDictionary::~GenerationDictionary()
{
	for (size_t i = 0 ; i < m_factorsUsed.size() ; i++)
	{
		delete m_factorsUsed[i];
	}	
}

unsigned int GenerationDictionary::GetNumScoreComponents() const
{
  return this->GetNoScoreComponents();
}

const std::string GenerationDictionary::GetScoreProducerDescription() const
{
  return "Generation score, file=" + m_filename;
}

const OutputWordCollection *GenerationDictionary::FindWord(const FactorArray &factorArray) const
{
	const OutputWordCollection *ret;
	Word word;
	Word::Copy(word.GetFactorArray(), factorArray);
	
	std::map<Word , OutputWordCollection>::const_iterator iter = m_collection.find(word);
	if (iter == m_collection.end())
	{ // can't find source phrase
		ret = NULL;
	}
	else
	{
		ret = &iter->second;
	}
	return ret;
}