Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/src/Hypothesis.cpp')
-rwxr-xr-xmoses/src/Hypothesis.cpp532
1 files changed, 0 insertions, 532 deletions
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
deleted file mode 100755
index 0a0edf34e..000000000
--- a/moses/src/Hypothesis.cpp
+++ /dev/null
@@ -1,532 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-#include <cassert>
-#include <iostream>
-#include <limits>
-#include <assert.h>
-#include "TranslationOption.h"
-#include "TranslationOptionCollection.h"
-#include "DummyScoreProducers.h"
-#include "Hypothesis.h"
-#include "Util.h"
-#include "SquareMatrix.h"
-#include "LexicalReordering.h"
-#include "StaticData.h"
-#include "Input.h"
-#include "LMList.h"
-#include "hash.h"
-
-using namespace std;
-
-unsigned int Hypothesis::s_numNodes = 0;
-unsigned int Hypothesis::s_HypothesesCreated = 0;
-
-
-Hypothesis::Hypothesis(InputType const& source)
- : m_prevHypo(NULL)
- , m_targetPhrase(Output)
- , m_sourceCompleted(source.GetSize())
- , m_sourceInput(source)
- , m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)
- , m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
- , m_wordDeleted(false)
-#ifdef N_BEST
- , m_arcList(NULL)
-#endif
- , m_id(s_HypothesesCreated++)
-{ // used for initial seeding of trans process
- // initialize scores
- _hash_computed = false;
- ResetScore();
-}
-
-/***
- * continue prevHypo by appending the phrases in transOpt
- */
-Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
- : m_prevHypo(&prevHypo)
- , m_targetPhrase(transOpt.GetTargetPhrase())
- , m_sourceCompleted (prevHypo.m_sourceCompleted )
- , m_sourceInput (prevHypo.m_sourceInput)
- , m_currSourceWordsRange (transOpt.GetSourceWordsRange())
- , m_currTargetWordsRange ( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
- ,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetPhrase().GetSize())
- , m_totalScore(0.0f)
- , m_futureScore(0.0f)
- , m_wordDeleted(false)
- , m_id(s_HypothesesCreated++)
- , m_scoreBreakdown (prevHypo.m_scoreBreakdown)
-#ifdef N_BEST
- , m_arcList(NULL)
-#endif
-{
- // assert that we are not extending our hypothesis by retranslating something
- // that this hypothesis has already translated!
- assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
-
- _hash_computed = false;
- m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
- m_wordDeleted = transOpt.IsDeletionOption();
- m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
-}
-
-Hypothesis::~Hypothesis()
-{
-#ifdef N_BEST
- if (m_arcList) {
- RemoveAllInColl< ArcList::iterator >(*m_arcList);
- delete m_arcList;
- }
-#endif
-}
-
-#ifdef N_BEST
-void Hypothesis::AddArc(Hypothesis *loserHypo)
-{
- if (!m_arcList) {
- if (loserHypo->m_arcList) // we don't have an arcList, but loser does
- {
- this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
- loserHypo->m_arcList = 0; // prevent a double deletion
- }
- else
- { this->m_arcList = new ArcList(); }
- } else {
- if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser
- size_t my_size = m_arcList->size();
- size_t add_size = loserHypo->m_arcList->size();
- this->m_arcList->resize(my_size + add_size, 0);
- std::memcpy(&(*m_arcList)[0] + my_size, &(*m_arcList)[0], add_size * sizeof(Hypothesis *));
- delete loserHypo->m_arcList;
- loserHypo->m_arcList = 0;
- } else { // loserHypo doesn't have any arcs
- // DO NOTHING
- }
- }
- m_arcList->push_back(loserHypo);
-}
-#endif
-
-/***
- * return the subclass of Hypothesis most appropriate to the given translation option
- */
-Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt) const
-{
- return Create(*this, transOpt);
-}
-
-/***
- * return the subclass of Hypothesis most appropriate to the given translation option
- */
-Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt)
-{
- return new Hypothesis(prevHypo, transOpt);
-}
-/***
- * return the subclass of Hypothesis most appropriate to the given target phrase
- */
-
-Hypothesis* Hypothesis::Create(InputType const& m_source)
-{
- return new Hypothesis(m_source);
-}
-
-bool Hypothesis::IsCompatible(const Phrase &phrase) const
-{
- // make sure factors don't contradict each other
- // similar to phrase comparison
-
- if (m_currTargetWordsRange.GetWordsCount() != phrase.GetSize())
- {
- return false;
- }
- size_t hypoSize = GetSize();
-
- size_t transOptPos = 0;
- for (size_t hypoPos = hypoSize - m_currTargetWordsRange.GetWordsCount() ; hypoPos < hypoSize ; hypoPos++)
- {
- for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
- {
- FactorType factorType = static_cast<FactorType>(currFactor);
- const Factor *thisFactor = GetFactor(hypoPos, factorType)
- ,*compareFactor = phrase.GetFactor(transOptPos, factorType);
- if (thisFactor != NULL && compareFactor != NULL && thisFactor != compareFactor)
- return false;
- }
- transOptPos++;
- }
- return true;
-}
-
-#if 0
-void Hypothesis::GenerateNGramCompareKey(size_t contextSize)
-{
- struct MD5Context md5c;
-
- MD5Init(&md5c);
- size_t thisSize = this->GetSize();
- size_t effectiveContextSize = std::min(thisSize, contextSize);
- int start = thisSize - effectiveContextSize;
-
- if (m_currTargetWordsRange.GetWordsCount() > 0) // initial hypothesis check
- {
- const Hypothesis *curHyp = this;
- int curStart = 0;
- while (start < (curStart = curHyp->m_currTargetWordsRange.GetStartPos())) {
- for (int col = curHyp->m_currTargetWordsRange.GetEndPos(); col >= curStart; col--) {
- MD5Update(&md5c,
- (unsigned char*)curHyp->GetCurrFactorArray(col - curStart),
- sizeof(FactorArray));
- curHyp = curHyp->m_prevHypo;
- }
- }
- for (int col = curHyp->m_currTargetWordsRange.GetEndPos(); col >= (int)start; col--) {
- MD5Update(&md5c,
- (unsigned char*)curHyp->GetCurrFactorArray(col - curStart),
- sizeof(FactorArray));
- }
- }
- MD5Final(m_compSignature, &md5c);
-}
-#endif
-
-void Hypothesis::GenerateNGramCompareHash() const
-{
- _hash = 0xcafe5137; // random
- const size_t thisSize = GetSize();
-
- for (size_t currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
- {
- size_t ngramMax = StaticData::Instance()->GetMaxNGramOrderForFactorId(currFactor);
- if (ngramMax < 2) continue; // unigrams have no context
-
- const size_t minSize = std::min(ngramMax-1, thisSize);
- _hash = quick_hash((const char*)&minSize, sizeof(size_t), _hash);
-
- for (size_t currNGram = 1 ; currNGram <= minSize ; currNGram++)
- {
- FactorType factorType = static_cast<FactorType>(currFactor);
- const Factor *thisFactor = GetFactor(thisSize - currNGram, factorType);
- _hash = quick_hash((const char*)&thisFactor, sizeof(const Factor*), _hash);
- }
- }
- vector<size_t> wordCoverage = m_sourceCompleted.GetCompressedReprentation();
- _hash = quick_hash((const char*)&wordCoverage[0], sizeof(size_t)*wordCoverage.size(), _hash);
- _hash_computed = true;
-}
-
-int Hypothesis::NGramCompare(const Hypothesis &compare) const
-{ // -1 = this < compare
- // +1 = this > compare
- // 0 = this ==compare
-
- const size_t thisSize = GetSize();
- const size_t compareSize = compare.GetSize();
-
- for (size_t currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
- {
- size_t ngramMax = StaticData::Instance()->GetMaxNGramOrderForFactorId(currFactor);
- if (ngramMax < 2) continue; // unigrams have no context
-
- const size_t minSize = std::min(ngramMax-1, thisSize)
- , minCompareSize = std::min(ngramMax-1, compareSize);
- if ( minSize != minCompareSize )
- { // quick decision
- return (minSize < minCompareSize) ? -1 : 1;
- }
-
- for (size_t currNGram = 1 ; currNGram <= minSize ; currNGram++)
- {
- FactorType factorType = static_cast<FactorType>(currFactor);
- const Factor *thisFactor = GetFactor(thisSize - currNGram, factorType)
- ,*compareFactor = compare.GetFactor(compareSize - currNGram, factorType);
- // just use address of factor
- if (thisFactor < compareFactor)
- return -1;
- if (thisFactor > compareFactor)
- return 1;
- }
- }
- // identical
- return 0;
-}
-/**
- * Calculates the overall language model score by combining the scores
- * of language models generated for each of the factors. Because the factors
- * represent a variety of tag sets, and because factors with smaller tag sets
- * (such as POS instead of words) allow us to calculate richer statistics, we
- * allow a different length of n-gram to be specified for each factor.
- * /param lmListInitial todo - describe this parameter
- * /param lmListEnd todo - describe this parameter
- */
-void Hypothesis::CalcLMScore(const LMList &lmListInitial, const LMList &lmListEnd)
-{
- const size_t startPos = m_currTargetWordsRange.GetStartPos();
- LMList::const_iterator iterLM;
-
- // for LM which are not in PossTran
- // must go through each trigram in current phrase
- for (iterLM = lmListEnd.begin() ; iterLM != lmListEnd.end() ; ++iterLM)
- {
- const LanguageModel &languageModel = **iterLM;
- FactorType factorType = languageModel.GetFactorType();
- size_t nGramOrder = languageModel.GetNGramOrder();
- float lmScore;
-
- // 1st n-gram
- vector<const Factor*> contextFactor(nGramOrder);
- size_t index = 0;
- for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)
- {
- if (currPos >= 0)
- contextFactor[index++] = GetFactor(currPos, factorType);
- else
- contextFactor[index++] = languageModel.GetSentenceStart();
- }
- lmScore = languageModel.GetValue(contextFactor);
- //cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl;
-
- // main loop
- for (size_t currPos = startPos + 1 ; currPos <= m_currTargetWordsRange.GetEndPos() ; currPos++)
- {
- // shift all args down 1 place
- for (size_t i = 0 ; i < nGramOrder - 1 ; i++)
- contextFactor[i] = contextFactor[i + 1];
-
- // add last factor
- contextFactor.back() = GetFactor(currPos, factorType);
-
- lmScore += languageModel.GetValue(contextFactor);
- //cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl;
-
- }
-
- // end of sentence
- if (m_sourceCompleted.IsComplete())
- {
- // shift all args down 1 place
- for (size_t i = 0 ; i < nGramOrder - 1 ; i++)
- contextFactor[i] = contextFactor[i + 1];
-
- contextFactor.back() = languageModel.GetSentenceEnd();
- lmScore += languageModel.GetValue(contextFactor);
- }
- m_scoreBreakdown.PlusEquals(&languageModel, lmScore);
- }
-
- // for LM which are in possTran
- // already have LM scores from previous and trigram score of poss trans.
- // just need trigram score of the words of the start of current phrase
- for (iterLM = lmListInitial.begin() ; iterLM != lmListInitial.end() ; ++iterLM)
- {
- const LanguageModel &languageModel = **iterLM;
- FactorType factorType = languageModel.GetFactorType();
- size_t nGramOrder = languageModel.GetNGramOrder();
- float lmScore;
-
- if(m_currTargetWordsRange.GetWordsCount() > 0) //non-empty target phrase
- {
- // 1st n-gram
- vector<const Factor*> contextFactor(nGramOrder);
- size_t index = 0;
- for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)
- {
- if (currPos >= 0)
- contextFactor[index++] = GetFactor(currPos, factorType);
- else
- contextFactor[index++] = languageModel.GetSentenceStart();
- }
- lmScore = languageModel.GetValue(contextFactor);
- //cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl;
-
- // main loop
- size_t endPos = std::min(startPos + nGramOrder - 2
- , m_currTargetWordsRange.GetEndPos());
- for (size_t currPos = startPos + 1 ; currPos <= endPos ; currPos++)
- {
- // shift all args down 1 place
- for (size_t i = 0 ; i < nGramOrder - 1 ; i++)
- contextFactor[i] = contextFactor[i + 1];
-
- // add last factor
- contextFactor.back() = GetFactor(currPos, factorType);
-
- lmScore += languageModel.GetValue(contextFactor);
- //cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl;
- }
-
- // end of sentence
- if (m_sourceCompleted.IsComplete())
- {
- const size_t size = GetSize();
- contextFactor.back() = languageModel.GetSentenceEnd();
-
- for (size_t i = 0 ; i < nGramOrder - 1 ; i ++)
- {
- int currPos = size - nGramOrder + i + 1;
- if (currPos < 0)
- contextFactor[i] = languageModel.GetSentenceStart();
- else
- contextFactor[i] = GetFactor((size_t)currPos, factorType);
- }
- lmScore += languageModel.GetValue(contextFactor);
- }
- }
- else lmScore = 0; //the score associated with dropping source words is not part of the language model
-
- m_scoreBreakdown.PlusEquals(&languageModel, lmScore);
- }
-}
-
-void Hypothesis::CalcDistortionScore()
-
-{
- const DistortionScoreProducer *dsp = StaticData::Instance()->GetDistortionScoreProducer();
- float distortionScore = dsp->CalculateDistortionScore(
- m_prevHypo->GetCurrSourceWordsRange(),
- this->GetCurrSourceWordsRange()
- );
- m_scoreBreakdown.PlusEquals(dsp, distortionScore);
-}
-
-void Hypothesis::ResetScore()
-{
- m_scoreBreakdown.ZeroAll();
- m_futureScore = m_totalScore = 0.0f;
-}
-
-/***
- * calculate the logarithm of our total translation score (sum up components)
- */
-void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &futureScore)
-{
- // DISTORTION COST
- CalcDistortionScore();
-
- // LANGUAGE MODEL COST
- CalcLMScore(staticData.GetLanguageModel(Initial), staticData.GetLanguageModel(Other));
-
- // WORD PENALTY
- m_scoreBreakdown.PlusEquals(staticData.GetWordPenaltyProducer(), - (float) m_currTargetWordsRange.GetWordsCount());
-
- // FUTURE COST
- CalcFutureScore(futureScore);
-
-
- //LEXICAL REORDERING COST
- LexicalReordering *m_lexReorder = staticData.GetLexReorder();
- if (m_lexReorder) {
- std::vector<float> lroScores;
- lroScores.push_back(m_lexReorder->CalcScore(this,LexReorderType::Forward));
- lroScores.push_back(m_lexReorder->CalcScore(this,LexReorderType::Backward));
- m_scoreBreakdown.PlusEquals(m_lexReorder, lroScores);
- }
-
- // TOTAL
- m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
-}
-
-void Hypothesis::CalcFutureScore(const SquareMatrix &futureScore)
-{
- const size_t maxSize= numeric_limits<size_t>::max();
- size_t start = maxSize;
- m_futureScore = 0.0f;
- for(size_t currPos = 0 ; currPos < m_sourceCompleted.GetSize() ; currPos++)
- {
- if(m_sourceCompleted.GetValue(currPos) == 0 && start == maxSize)
- {
- start = currPos;
- }
- if(m_sourceCompleted.GetValue(currPos) == 1 && start != maxSize)
- {
-// m_score[ScoreType::FutureScoreEnum] += futureScore[start][currPos - 1];
- m_futureScore += futureScore.GetScore(start, currPos - 1);
- start = maxSize;
- }
- }
- if (start != maxSize)
- {
-// m_score[ScoreType::FutureScoreEnum] += futureScore[start][m_sourceCompleted.GetSize() - 1];
- m_futureScore += futureScore.GetScore(start, m_sourceCompleted.GetSize() - 1);
- }
-
- // add future costs for distortion model
- // m_score[ScoreType::FutureScoreEnum] += m_sourceCompleted.GetFutureCosts(m_currSourceWordsRange.GetEndPos()) * StaticData::Instance()->GetWeightDistortion();
-
-}
-
-
-int Hypothesis::GetId()const{
- return m_id;
-}
-
-const Hypothesis* Hypothesis::GetPrevHypo()const{
- return m_prevHypo;
-}
-
-/**
- * print hypothesis information for pharaoh-style logging
- */
-void Hypothesis::PrintHypothesis(const InputType &source, float weightDistortion, float weightWordPenalty) const{
- cout<<"creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ";
- int end = m_prevHypo->m_targetPhrase.GetSize()-1;
- int start = end-1;
- if ( start < 0 ) start = 0;
- if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
- cout << "<s> ";
- }
- else {
- cout << "... ";
- }
- if (end>=0) {
- WordsRange range(start, end);
- cout << m_prevHypo->m_targetPhrase.GetSubString(range) << " ";
- }
- cout<<")"<<endl;
- cout<<"\tbase score "<< (m_prevHypo->m_totalScore - m_prevHypo->m_futureScore) <<endl;
- cout<<"\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "<< source.GetSubString(m_currSourceWordsRange) <<endl;
- cout<<"\ttranslated as: "<<m_targetPhrase; // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
- if (m_wordDeleted) cout <<" word_deleted";
- cout<<endl;
- cout<<"\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange()); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
- cout<<"\tlanguage model cost "; // <<m_score[ScoreType::LanguageModelScore]<<endl;
- cout<<"\tword penalty "; // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
- cout<<"\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl;
- cout << "\tunweighted feature scores: " << m_scoreBreakdown << endl;
- //PrintLMScores();
-}
-
-TO_STRING_BODY(Hypothesis)
-
-// friend
-ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
-{
- hypothesis.ToStream(out);
- // words bitmap
- out << "[" << hypothesis.m_sourceCompleted << "] ";
-
- // scores
- out << " [0.12, " << hypothesis.GetTotalScore() << "]";
- out << " " << hypothesis.GetScoreBreakdown();
- return out;
-}
-
-