diff options
author | dowobeha <dowobeha@1f5c12ca-751b-0410-a591-d2e778427230> | 2009-02-18 21:38:36 +0300 |
---|---|---|
committer | dowobeha <dowobeha@1f5c12ca-751b-0410-a591-d2e778427230> | 2009-02-18 21:38:36 +0300 |
commit | d52e3f332fbe134365f19363703c0c9bfdc732f5 (patch) | |
tree | 28a2d87b000dc531f123455d98e498434b12ac1d | |
parent | 7aff1789c99c6d10dc25bfb71c3e63281dd79857 (diff) |
Added new class for SearchMultilane-multi
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/lane-multi@2190 1f5c12ca-751b-0410-a591-d2e778427230
-rw-r--r-- | moses/SearchMulti.cpp | 630 | ||||
-rw-r--r-- | moses/SearchMulti.h | 66 | ||||
-rw-r--r-- | moses/moses.xcodeproj/project.pbxproj | 8 | ||||
-rw-r--r-- | moses/src/Manager.cpp | 9 | ||||
-rw-r--r-- | moses/src/Search.cpp | 88 | ||||
-rw-r--r-- | moses/src/Search.h | 6 | ||||
-rw-r--r-- | moses/src/TypeDef.h | 1 |
7 files changed, 769 insertions, 39 deletions
diff --git a/moses/SearchMulti.cpp b/moses/SearchMulti.cpp new file mode 100644 index 000000000..257aace96 --- /dev/null +++ b/moses/SearchMulti.cpp @@ -0,0 +1,630 @@ +#include "Timer.h" +#include "SearchMulti.h" + +namespace Moses +{ + /** + * Organizing main function + * + * /param source input sentence + * /param transOptColl collection of translation options to be used for this sentence + */ + SearchMulti::SearchMulti(const InputType &source, const InputType &source1, const TranslationOptionCollection &transOptColl, const TranslationOptionCollection &transOptColl1) + :m_source(source) + ,m_source1(source1) + //,m_hypoStackColl((source.GetSize() + 1) * (source1.GetSize() + 1)) + ,m_hypoStackColl((source.GetSize() + 1)) + ,m_initialTargetPhrase(Output) + ,m_start(clock()) + ,interrupted_flag(0) + ,m_transOptColl(transOptColl) + ,m_transOptColl1(transOptColl1) + { + VERBOSE(1, "Translating: " << m_source << + " ...and also: " << m_source1 << endl); + const StaticData &staticData = StaticData::Instance(); + + // only if constraint decoding (having to match a specified output) + long sentenceID = source.GetTranslationId(); + m_constraint = staticData.GetConstrainingPhrase(sentenceID); + m_WERLimit = staticData.GetWERLimit(); + if (m_WERLimit < 0.0f) m_WERUnlimited = true; + else m_WERUnlimited = false; + + // initialize the stacks: create data structure and set limits + std::vector < HypothesisStackNormal >::iterator iterStack; + for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind) + { + HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal(); + sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize(),staticData.GetMinHypoStackDiversity()); + sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth()); + + m_hypoStackColl[ind] = sourceHypoColl; + } + } + + SearchMulti::~SearchMulti() + { + RemoveAllInColl(m_hypoStackColl); + } + + /** + * Main decoder loop that translates a sentence by expanding + * hypotheses stack by stack, until the end of the sentence. + */ + void SearchMulti::ProcessSentence() + { + /* + if (m_constraint!=NULL && m_WERUnlimited) { + // If attempting constraint decoding with unlimited WER allowed, + // keep increasing allowed WER until a result is obtained. + for (m_WERLimit=0; GetBestHypothesis()==NULL; m_WERLimit++) { + VERBOSE(1, "WER Limit = " << m_WERLimit << endl); + AttemptProcessSentence(); + } + //VERBOSE(1, "WER Limit = " << m_WERLimit << " GetBestHypothesis()==" << *GetBestHypothesis() << endl); + } else { + AttemptProcessSentence(); + } + */ + AttemptProcessSentence(); + } + + + void SearchMulti::AttemptProcessSentence() + { + const StaticData &staticData = StaticData::Instance(); + SentenceStats &stats = staticData.GetSentenceStats(); + clock_t t=0; // used to track time for steps + + // initial seed hypothesis: nothing translated, no words produced + Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase); + m_hypoStackColl[0]->AddPrune(hypo); + + // go through each stack + std::vector < HypothesisStack* >::iterator iterStack; + for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) + { + // check if decoding ran out of time + double _elapsed_time = GetUserTime(); + if (_elapsed_time > staticData.GetTimeoutThreshold()){ + VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl); + interrupted_flag = 1; + return; + } + HypothesisStackNormal &sourceHypoColl = *static_cast<HypothesisStackNormal*>(*iterStack); + + // the stack is pruned before processing (lazy pruning): + VERBOSE(3,"processing hypothesis from next stack"); + IFVERBOSE(2) { t = clock(); } + sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize()); + VERBOSE(3,std::endl); + sourceHypoColl.CleanupArcList(); + IFVERBOSE(2) { stats.AddTimeStack( clock()-t ); } + + // go through each hypothesis on the stack and try to expand it + HypothesisStackNormal::const_iterator iterHypo; + for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo) + { + Hypothesis &hypothesis = **iterHypo; + ProcessOneHypothesis(hypothesis); // expand the hypothesis + } + // some logging + IFVERBOSE(2) { OutputHypoStackSize(); } + + // this stack is fully expanded; + actual_hypoStack = &sourceHypoColl; + } + + // some more logging + IFVERBOSE(2) { staticData.GetSentenceStats().SetTimeTotal( clock()-m_start ); } + VERBOSE(2, staticData.GetSentenceStats()); + } + + + /** Find all translation options to expand one hypothesis, trigger expansion + * this is mostly a check for overlap with already covered words, and for + * violation of reordering limits. + * \param hypothesis hypothesis to be expanded upon + */ + void SearchMulti::ProcessOneHypothesis(const Hypothesis &hypothesis) + { + // since we check for reordering limits, its good to have that limit handy + int maxDistortion = StaticData::Instance().GetMaxDistortion(); + bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput; + + // no limit of reordering: only check for overlap + if (maxDistortion < 0) + { + const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap(); + const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos() + , sourceSize = m_source.GetSize(); + + for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) + { + size_t maxSize = sourceSize - startPos; + size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); + maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; + + for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) + { + // basic checks + // there have to be translation options + if (m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos)).size() == 0 || + // no overlap with existing words + hypoBitmap.Overlap(WordsRange(startPos, endPos)) || + // specified reordering constraints (set with -monotone-at-punctuation or xml) + !m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) ) + { + continue; + } + + //TODO: does this method include incompatible WordLattice hypotheses? + ExpandAllHypotheses(hypothesis, startPos, endPos); + } + } + + return; // done with special case (no reordering limit) + } + + // if there are reordering limits, make sure it is not violated + // the coverage bitmap is handy here (and the position of the first gap) + const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap(); + const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos() + , sourceSize = m_source.GetSize(); + + // MAIN LOOP. go through each possible range + for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) + { + size_t maxSize = sourceSize - startPos; + size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); + maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; + size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos); + if (isWordLattice) { + // first question: is there a path from the closest translated word to the left + // of the hypothesized extension to the start of the hypothesized extension? + // long version: is there anything to our left? is it farther left than where we're starting anyway? can we get to it? + // closestLeft is exclusive: a value of 3 means 2 is covered, our arc is currently ENDING at 3 and can start at 3 implicitly + if (closestLeft != 0 && closestLeft != startPos && !m_source.CanIGetFromAToB(closestLeft, startPos)) { + continue; + } + } + + for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) + { + // basic checks + WordsRange extRange(startPos, endPos); + // there have to be translation options + if (m_transOptColl.GetTranslationOptionList(extRange).size() == 0 || + // no overlap with existing words + hypoBitmap.Overlap(extRange) || + // specified reordering constraints (set with -monotone-at-punctuation or xml) + !m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) || // + // connection in input word lattice + (isWordLattice && !m_source.IsCoveragePossible(extRange))) + { + continue; + } + + // ask second question here: + // we already know we can get to our starting point from the closest thing to the left. We now ask the follow up: + // can we get from our end to the closest thing on the right? + // long version: is anything to our right? is it farther right than our (inclusive) end? can our end reach it? + bool leftMostEdge = (hypoFirstGapPos == startPos); + + // closest right definition: + size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos); + if (isWordLattice) { + //if (!leftMostEdge && closestRight != endPos && closestRight != sourceSize && !m_source.CanIGetFromAToB(endPos, closestRight + 1)) { + if (closestRight != endPos && ((closestRight + 1) < sourceSize) && !m_source.CanIGetFromAToB(endPos, closestRight + 1)) { + continue; + } + } + + // any length extension is okay if starting at left-most edge + if (leftMostEdge) + { + ExpandAllHypotheses(hypothesis, startPos, endPos); + } + // starting somewhere other than left-most edge, use caution + else + { + // the basic idea is this: we would like to translate a phrase starting + // from a position further right than the left-most open gap. The + // distortion penalty for the following phrase will be computed relative + // to the ending position of the current extension, so we ask now what + // its maximum value will be (which will always be the value of the + // hypothesis starting at the left-most edge). If this value is less than + // the distortion limit, we don't allow this extension to be made. + WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos); + int required_distortion = + m_source.ComputeDistortionDistance(extRange, bestNextExtension); + + if (required_distortion > maxDistortion) { + continue; + } + + // everything is fine, we're good to go + ExpandAllHypotheses(hypothesis, startPos, endPos); + + } + } + } + } + + + /** + * Expand a hypothesis given a list of translation options + * \param hypothesis hypothesis to be expanded upon + * \param startPos first word position of span covered + * \param endPos last word position of span covered + */ + + void SearchMulti::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos) + { + // early discarding: check if hypothesis is too bad to build + // this idea is explained in (Moore&Quirk, MT Summit 2007) + float expectedScore = 0.0f; + if (StaticData::Instance().UseEarlyDiscarding()) + { + // expected score is based on score of current hypothesis + expectedScore = hypothesis.GetScore(); + + // add new future score estimate + expectedScore += m_transOptColl.GetFutureScore().CalcFutureScore( hypothesis.GetWordsBitmap(), startPos, endPos ); + } + + //bool foundSomething = false; + + // loop through all translation options + const TranslationOptionList &transOptList = m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos)); + TranslationOptionList::const_iterator iter; + for (iter = transOptList.begin() ; iter != transOptList.end() ; ++iter) + { + //LS ExpandHypothesis(hypothesis, **iter, expectedScore); + if (m_constraint == NULL) { + ExpandHypothesis(hypothesis, **iter, expectedScore); + } + else if (m_constraint != NULL && m_WERLimit == 0.0f) { + if (isCompatibleWithConstraint(hypothesis, **iter) ) { + //(**iter). + ExpandHypothesis(hypothesis, **iter, expectedScore); + //foundSomething = true; + } //else { + // VERBOSE(1,"Expanding incompatible hypothesis" << endl); + // ExpandHypothesis(hypothesis, **iter, expectedScore); + //} + + } else { + TargetPhrase *curTarget = getCurrentTargetPhrase(hypothesis); + //LS float currConstraintWER = getCurrConstraintWER(hypothesis, **iter); + float currConstraintWER = getCurrConstraintWER(curTarget, **iter); + //LS VERBOSE(1, "WER==" << currConstraintWER << " for \"" << static_cast<const Phrase&>(*curTarget) << "\"" << endl); + //printf("WER: %f Limit: %f\n", currConstraintWER, m_WERLimit); + if (currConstraintWER <= m_WERLimit) + ExpandHypothesis(hypothesis, **iter, expectedScore); + } + } + /* + if (m_constraint!=NULL && !foundSomething) { + size_t start = 1 + hypothesis.GetCurrTargetWordsRange().GetEndPos(); + const WordsRange range(start, start); + const Phrase &relevantConstraint = m_constraint->GetSubString(range); + //const TargetPhrase tp(relevantConstraint); + Phrase sourcePhrase(Input); + std::string targetPhraseString("hi"); + + vector<FactorType> output = Tokenize<FactorType>("0,", ","); + //const vector<FactorType> output(); + const StaticData &staticData = StaticData::Instance(); + TargetPhrase targetPhrase(Output); + //targetPhrase.AddWord(<#const Word newWord#>) + targetPhrase.SetSourcePhrase(&sourcePhrase); + targetPhrase.CreateFromString( output, targetPhraseString, staticData.GetFactorDelimiter()); + + TranslationOption newOpt(range, targetPhrase, m_source); + + + VERBOSE(2, "Should we add \"" << relevantConstraint << "\" (" << start << "-" << start << ") " << newOpt << endl); + + ExpandHypothesis(hypothesis, newOpt, expectedScore); + } + VERBOSE(2, "Found something==" << foundSomething << " for " << startPos << "-" << endPos << endl); + */ + } + + /** + * Enforce constraint when appropriate + * \param hypothesis hypothesis to be expanded upon + * \param transOptList list of translation options to be applied + */ + + bool SearchMulti::isCompatibleWithConstraint(const Hypothesis &hypothesis, + const TranslationOption &transOpt) + { + size_t constraintSize = m_constraint->GetSize(); + size_t start = 1 + hypothesis.GetCurrTargetWordsRange().GetEndPos(); + const Phrase &transOptPhrase = transOpt.GetTargetPhrase(); + size_t transOptSize = transOptPhrase.GetSize(); + + if (transOptSize==0) { + VERBOSE(4, "Empty transOpt IS COMPATIBLE with constraint \"" << *m_constraint << "\"" << endl); + return true; + } + size_t endpoint = start + transOptSize - 1; + //size_t endpoint = start + transOptSize; + //if (endpoint > 0) endpoint = endpoint - 1; + WordsRange range(start, endpoint); + + if (endpoint >= constraintSize) { + VERBOSE(4, "Appending \"" << transOptPhrase << "\" after \"" << static_cast<const Phrase&>(hypothesis.GetTargetPhrase()) << "\" (start=" << start << ", endpoint=" << endpoint << ", transOptSize=" << transOptSize << ") would be too long for constraint \"" << *m_constraint << "\"" << endl); + return false; + } else { + const Phrase &relevantConstraint = m_constraint->GetSubString(range); + if ( ! relevantConstraint.IsCompatible(transOptPhrase) ) { + VERBOSE(4, "\"" << transOptPhrase << "\" is incompatible with \"" << relevantConstraint << "\" (" << start << "-" << endpoint << ")" << endl); + return false; + } else { + VERBOSE(4, "\"" << transOptPhrase << "\" IS COMPATBILE with \"" << relevantConstraint << "\"" << endl); + return true; + } + } + } + + TargetPhrase *SearchMulti::getCurrentTargetPhrase(const Hypothesis &hypothesis) + { + // Rebuild Target String via recursing on previous hypothesis + const Hypothesis *hypo = &hypothesis; + std::vector<Phrase> target; + + while (hypo != NULL) { + target.push_back(hypo->GetCurrTargetPhrase()); + hypo = hypo->GetPrevHypo(); + } + + TargetPhrase *targetphrase = new TargetPhrase(); + + for (int i = target.size() - 1; i >= 0; i--) { + targetphrase->Append(target[i]); + } + + return targetphrase; + } + + float SearchMulti::getCurrConstraintWER(TargetPhrase *curTarget, + const TranslationOption &transOpt) + { + + //const size_t constraintSize = m_constraint->GetSize(); + const TargetPhrase transOptPhrase = transOpt.GetTargetPhrase(); + + + TargetPhrase newTarget = TargetPhrase(*curTarget); + newTarget.Append(transOptPhrase); + + //size_t endpoint = newTarget.GetSize() - 1; + + // Account for target strings that are longer than the reference + //if (endpoint >= constraintSize) + // endpoint = constraintSize - 1; + + // Extract relevant constraint... + //WordsRange range(0, endpoint); + //const Phrase &relevantConstraint = m_constraint->GetSubString(range); + + + // Compute WER between reference and target string + //float editDistance = computeEditDistance(relevantConstraint, newTarget); + float editDistance = computeEditDistance(*m_constraint, newTarget); + float normalizedEditDistance = editDistance - (m_constraint->GetSize() - newTarget.GetSize()); + normalizedEditDistance = (normalizedEditDistance<0) ? editDistance : normalizedEditDistance; + //VERBOSE(1, "m_constraint->GetSize() - newTarget.GetSize() == " << m_constraint->GetSize() << " - " << newTarget.GetSize() << endl); + VERBOSE(2, "WER==" << normalizedEditDistance << " (" << editDistance << ") for \"" << static_cast<const Phrase&>(newTarget) << "\" with constraint \"" << *m_constraint << "\"" << endl); + + //return editDistance; + return normalizedEditDistance; + } + + + float SearchMulti::computeEditDistance(const Phrase &hypPhrase, const Phrase &constraintPhrase) const + { + const size_t len1 = hypPhrase.GetSize(), len2 = constraintPhrase.GetSize(); + vector<vector<unsigned int> > d(len1 + 1, vector<unsigned int>(len2 + 1)); + + for(int i = 0; i <= len1; ++i) d[i][0] = i; + for(int i = 0; i <= len2; ++i) d[0][i] = i; + + for(int i = 1; i <= len1; ++i) + { + for(int j = 1; j <= len2; ++j) { + WordsRange s1range(i-1, i-1); + WordsRange s2range(j-1, j-1); + int cost = hypPhrase.GetSubString(s1range).IsCompatible(constraintPhrase.GetSubString(s2range)) ? 0 : 1; + d[i][j] = std::min( std::min(d[i - 1][j] + 1, + d[i][j - 1] + 1), + d[i - 1][j - 1] + cost); + } + } + return d[len1][len2]; + } + + /** + * Expand one hypothesis with a translation option. + * this involves initial creation, scoring and adding it to the proper stack + * \param hypothesis hypothesis to be expanded upon + * \param transOpt translation option (phrase translation) + * that is applied to create the new hypothesis + * \param expectedScore base score for early discarding + * (base hypothesis score plus future score estimation) + */ + void SearchMulti::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, float expectedScore) + { + const StaticData &staticData = StaticData::Instance(); + SentenceStats &stats = staticData.GetSentenceStats(); + clock_t t=0; // used to track time for steps + + Hypothesis *newHypo; + if (! staticData.UseEarlyDiscarding()) + { + // simple build, no questions asked + IFVERBOSE(2) { t = clock(); } + //LS newHypo = hypothesis.CreateNext(transOpt, m_constraint); + newHypo = hypothesis.CreateNext(transOpt); + IFVERBOSE(2) { stats.AddTimeBuildHyp( clock()-t ); } + if (newHypo==NULL) return; + newHypo->CalcScore(m_transOptColl.GetFutureScore()); + //newHypo->IncrementTotalScore(bonus); + } + else + // early discarding: check if hypothesis is too bad to build + { + // worst possible score may have changed -> recompute + size_t wordsTranslated = hypothesis.GetWordsBitmap().GetNumWordsCovered() + transOpt.GetSize(); + float allowedScore = m_hypoStackColl[wordsTranslated]->GetWorstScore(); + if (staticData.GetMinHypoStackDiversity()) + { + WordsBitmapID id = hypothesis.GetWordsBitmap().GetIDPlus(transOpt.GetStartPos(), transOpt.GetEndPos()); + float allowedScoreForBitmap = m_hypoStackColl[wordsTranslated]->GetWorstScoreForBitmap( id ); + allowedScore = std::min( allowedScore, allowedScoreForBitmap ); + } + allowedScore += staticData.GetEarlyDiscardingThreshold(); + + // add expected score of translation option + expectedScore += transOpt.GetFutureScore(); + // TRACE_ERR("EXPECTED diff: " << (newHypo->GetTotalScore()-expectedScore) << " (pre " << (newHypo->GetTotalScore()-expectedScorePre) << ") " << hypothesis.GetTargetPhrase() << " ... " << transOpt.GetTargetPhrase() << " [" << expectedScorePre << "," << expectedScore << "," << newHypo->GetTotalScore() << "]" << endl); + //expectedScore += bonus; + // check if transOpt score push it already below limit + if (expectedScore < allowedScore) + { + IFVERBOSE(2) { stats.AddNotBuilt(); } + return; + } + + // build the hypothesis without scoring + IFVERBOSE(2) { t = clock(); } + //LS newHypo = hypothesis.CreateNext(transOpt, m_constraint); + newHypo = hypothesis.CreateNext(transOpt); + if (newHypo==NULL) return; + IFVERBOSE(2) { stats.AddTimeBuildHyp( clock()-t ); } + + // compute expected score (all but correct LM) + expectedScore = newHypo->CalcExpectedScore( m_transOptColl.GetFutureScore() ); + + // ... and check if that is below the limit + if (expectedScore < allowedScore) + { + IFVERBOSE(2) { stats.AddEarlyDiscarded(); } + FREEHYPO( newHypo ); + return; + } + + // ok, all is good, compute remaining scores + newHypo->CalcRemainingScore(); + //newHypo->IncrementTotalScore(bonus); + } + + // logging for the curious + IFVERBOSE(3) { + newHypo->PrintHypothesis(); + } + + // add to hypothesis stack + size_t wordsTranslated = newHypo->GetWordsBitmap().GetNumWordsCovered(); + IFVERBOSE(2) { t = clock(); } + m_hypoStackColl[wordsTranslated]->AddPrune(newHypo); + IFVERBOSE(2) { stats.AddTimeStack( clock()-t ); } + } + + const std::vector < HypothesisStack* >& SearchMulti::GetHypothesisStacks() const + { + return m_hypoStackColl; + } + + /** + * Find best hypothesis on the last stack. + * This is the end point of the best translation, which can be traced back from here + */ + const Hypothesis *SearchMulti::GetBestHypothesis() const + { + /*LS + if (interrupted_flag == 0){ + const HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(m_hypoStackColl.back()); + return hypoColl.GetBestHypothesis(); + } + else{ + const HypothesisStackNormal &hypoColl = *actual_hypoStack; + return hypoColl.GetBestHypothesis(); + } + */ + + if (interrupted_flag == 0){ + const HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(m_hypoStackColl.back()); + + if (m_constraint != NULL) { + HypothesisStackNormal::const_iterator iter; + + const Hypothesis *bestHypo = NULL; + + + for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter) + { + const Hypothesis *hypo = *iter; + WordsRange range(0, m_constraint->GetSize() - 1); + Phrase constraint = m_constraint->GetSubString(range); + + if (hypo != NULL) { + TargetPhrase targetPhrase = TargetPhrase(hypo->GetCurrTargetPhrase()); + hypo = hypo->GetPrevHypo(); + while (hypo != NULL) { + TargetPhrase newTargetPhrase = TargetPhrase(hypo->GetCurrTargetPhrase()); + newTargetPhrase.Append(targetPhrase); + targetPhrase = newTargetPhrase; + hypo = hypo->GetPrevHypo(); + } + + if ( m_WERLimit != 0.0f ) { // is WER-constraint active? + //VERBOSE(1, "constraint : " << constraint << endl); + //VERBOSE(1, "targetPhrase: " << targetPhrase << endl); + if (computeEditDistance(constraint, targetPhrase) <= m_WERLimit) { + //VERBOSE(1, "TRUE" << endl); + if (bestHypo==NULL || (*iter)->GetTotalScore() > bestHypo->GetTotalScore()) + bestHypo = *iter; + } else { + //VERBOSE(1, "FALSE" << endl); + } + } else { + if (constraint.IsCompatible(targetPhrase) && + (bestHypo==NULL || (*iter)->GetTotalScore() > bestHypo->GetTotalScore())) + bestHypo = *iter; + } + } + } + return bestHypo; + //return NULL; + } else { + return hypoColl.GetBestHypothesis(); + } + } + else{ + const HypothesisStackNormal &hypoColl = *actual_hypoStack; + return hypoColl.GetBestHypothesis(); + } + + } + + /** + * Logging of hypothesis stack sizes + */ + void SearchMulti::OutputHypoStackSize() + { + std::vector < HypothesisStack* >::const_iterator iterStack = m_hypoStackColl.begin(); + TRACE_ERR( "Stack sizes: " << (int)(*iterStack)->size()); + for (++iterStack; iterStack != m_hypoStackColl.end() ; ++iterStack) + { + TRACE_ERR( ", " << (int)(*iterStack)->size()); + } + TRACE_ERR( endl); + } + +} + diff --git a/moses/SearchMulti.h b/moses/SearchMulti.h new file mode 100644 index 000000000..8ede8dc2a --- /dev/null +++ b/moses/SearchMulti.h @@ -0,0 +1,66 @@ +/* + * SearchMulti.h + * moses + * + * Created by Lane Schwartz on 2/15/09. + * Copyright 2009. + * + */ + + +#pragma once + +#include <vector> +#include "Search.h" +#include "HypothesisStackNormal.h" +#include "TranslationOptionCollection.h" +#include "Timer.h" + +namespace Moses +{ + + class InputType; + class TranslationOptionCollection; + + class SearchMulti: public Search + { + protected: + const InputType &m_source; + const InputType &m_source1; + std::vector < HypothesisStack* > m_hypoStackColl; /**< stacks to store hypotheses (partial translations) */ + // no of elements = no of words in source + 1 + TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */ + clock_t m_start; /**< starting time, used for logging */ + size_t interrupted_flag; /**< flag indicating that decoder ran out of time (see switch -time-out) */ + HypothesisStackNormal* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/ + const TranslationOptionCollection &m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */ + const TranslationOptionCollection &m_transOptColl1; /**< pre-computed list of translation options for the phrases in this sentence */ + + // functions for creating hypotheses + void ProcessOneHypothesis(const Hypothesis &hypothesis); + void ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos); + void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore); + + bool isCompatibleWithConstraint(const Hypothesis &hypothesis, const TranslationOption &transOpt); + //LS float SearchNormal::getCurrConstraintWER(const Hypothesis &hypothesis, const TranslationOption &transOpt); + TargetPhrase* getCurrentTargetPhrase(const Hypothesis &hypothesis); + float getCurrConstraintWER(TargetPhrase *targetphrase, const TranslationOption &transOpt); + float computeEditDistance(const Phrase &hypPhrase, const Phrase &constraintPhrase) const; + + void AttemptProcessSentence(); + + public: + SearchMulti(const InputType &source, const InputType &source1, const TranslationOptionCollection &transOptColl, const TranslationOptionCollection &transOptColl1); + ~SearchMulti(); + + void ProcessSentence(); + + void OutputHypoStackSize(); + void OutputHypoStack(int stack); + + virtual const std::vector < HypothesisStack* >& GetHypothesisStacks() const; + virtual const Hypothesis *GetBestHypothesis() const; + }; + +} + diff --git a/moses/moses.xcodeproj/project.pbxproj b/moses/moses.xcodeproj/project.pbxproj index a47ebbd7a..9b71db137 100644 --- a/moses/moses.xcodeproj/project.pbxproj +++ b/moses/moses.xcodeproj/project.pbxproj @@ -133,6 +133,8 @@ 1C8CFF090AD67A9700FA22E2 /* WordsBitmap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE8C0AD67A9700FA22E2 /* WordsBitmap.h */; }; 1C8CFF0A0AD67A9700FA22E2 /* WordsRange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE8D0AD67A9700FA22E2 /* WordsRange.cpp */; }; 1C8CFF0B0AD67A9700FA22E2 /* WordsRange.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE8E0AD67A9700FA22E2 /* WordsRange.h */; }; + 3D0200CF0F4C58E6007FCB1D /* SearchMulti.h in Headers */ = {isa = PBXBuildFile; fileRef = 3D0200CD0F4C58E6007FCB1D /* SearchMulti.h */; }; + 3D0200D00F4C58E6007FCB1D /* SearchMulti.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D0200CE0F4C58E6007FCB1D /* SearchMulti.cpp */; }; 3D5CBEEF0F2F683D004520C1 /* LanguageModelRemote.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D5CBEEB0F2F683D004520C1 /* LanguageModelRemote.cpp */; }; 3D5CBEF00F2F683D004520C1 /* LanguageModelRemote.h in Headers */ = {isa = PBXBuildFile; fileRef = 3D5CBEEC0F2F683D004520C1 /* LanguageModelRemote.h */; }; B219B8610E9382EE00EAB407 /* AlignmentElement.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B219B85B0E9382EE00EAB407 /* AlignmentElement.cpp */; }; @@ -302,6 +304,8 @@ 1C8CFE8C0AD67A9700FA22E2 /* WordsBitmap.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = WordsBitmap.h; path = src/WordsBitmap.h; sourceTree = "<group>"; }; 1C8CFE8D0AD67A9700FA22E2 /* WordsRange.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = WordsRange.cpp; path = src/WordsRange.cpp; sourceTree = "<group>"; }; 1C8CFE8E0AD67A9700FA22E2 /* WordsRange.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = WordsRange.h; path = src/WordsRange.h; sourceTree = "<group>"; }; + 3D0200CD0F4C58E6007FCB1D /* SearchMulti.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SearchMulti.h; sourceTree = "<group>"; }; + 3D0200CE0F4C58E6007FCB1D /* SearchMulti.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SearchMulti.cpp; sourceTree = "<group>"; }; 3D5CBEE90F2F683D004520C1 /* LanguageModelRandLM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelRandLM.cpp; path = src/LanguageModelRandLM.cpp; sourceTree = "<group>"; }; 3D5CBEEA0F2F683D004520C1 /* LanguageModelRandLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LanguageModelRandLM.h; path = src/LanguageModelRandLM.h; sourceTree = "<group>"; }; 3D5CBEEB0F2F683D004520C1 /* LanguageModelRemote.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelRemote.cpp; path = src/LanguageModelRemote.cpp; sourceTree = "<group>"; }; @@ -411,6 +415,8 @@ E21C11150DFEE88800ADAED0 /* SearchCubePruning.h */, E21C11160DFEE88800ADAED0 /* SearchNormal.cpp */, E21C11170DFEE88800ADAED0 /* SearchNormal.h */, + 3D0200CE0F4C58E6007FCB1D /* SearchMulti.cpp */, + 3D0200CD0F4C58E6007FCB1D /* SearchMulti.h */, E21C110A0DFEE86B00ADAED0 /* HypothesisStackCubePruning.cpp */, E21C110B0DFEE86B00ADAED0 /* HypothesisStackCubePruning.h */, E21C110C0DFEE86B00ADAED0 /* HypothesisStackNormal.cpp */, @@ -658,6 +664,7 @@ B2639DEB0EF199D400A67519 /* ReorderingConstraint.h in Headers */, B2639DED0EF199D400A67519 /* TranslationOptionList.h in Headers */, 3D5CBEF00F2F683D004520C1 /* LanguageModelRemote.h in Headers */, + 3D0200CF0F4C58E6007FCB1D /* SearchMulti.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -782,6 +789,7 @@ B2639DEA0EF199D400A67519 /* ReorderingConstraint.cpp in Sources */, B2639DEC0EF199D400A67519 /* TranslationOptionList.cpp in Sources */, 3D5CBEEF0F2F683D004520C1 /* LanguageModelRemote.cpp in Sources */, + 3D0200D00F4C58E6007FCB1D /* SearchMulti.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp index f656619c7..0e596d5dc 100644 --- a/moses/src/Manager.cpp +++ b/moses/src/Manager.cpp @@ -76,7 +76,6 @@ vector<TranslationOptionCollection*> * getTranslationOptionCollections(vector<In vector<TranslationOptionCollection*> * collections = new vector<TranslationOptionCollection*>(sources->size()); for (int i=0; i<sources->size(); i++) { - //TranslationOptionCollection* coll = (*sources)[i]->CreateTranslationOptionCollection(); (*collections)[i] = (*sources)[i]->CreateTranslationOptionCollection(); } @@ -97,7 +96,8 @@ vector<TranslationOptionCollection*> * getTranslationOptionCollections(vector<In Manager::Manager(vector<InputType const*> *sources, SearchAlgorithm searchAlgorithm) :m_sources(sources) ,m_transOptColls(getTranslationOptionCollections(sources)) - ,m_search(Search::CreateSearch((*((*sources)[0])), searchAlgorithm, (*(*m_transOptColls)[0]))) + //,m_search(Search::CreateSearch((*((*sources)[0])), searchAlgorithm, (*(*m_transOptColls)[0]))) + ,m_search(Search::CreateSearch(sources, searchAlgorithm, m_transOptColls)) ,m_start(clock()) ,interrupted_flag(0) { @@ -107,7 +107,10 @@ Manager::Manager(vector<InputType const*> *sources, SearchAlgorithm searchAlgori Manager::~Manager() { - delete m_transOptColls; + for (int i=0; i<m_transOptColls->size(); i++) { + delete (*m_transOptColls)[i]; + } + delete m_transOptColls; delete m_search; StaticData::Instance().CleanUpAfterSentenceProcessing(); diff --git a/moses/src/Search.cpp b/moses/src/Search.cpp index be156700f..5b8bd4cc2 100644 --- a/moses/src/Search.cpp +++ b/moses/src/Search.cpp @@ -1,34 +1,54 @@ -
-#include "SearchCubePruning.h"
-#include "SearchNormal.h"
-#include "UserMessage.h"
-
-namespace Moses
-{
-Search::Search()
-{
-// long sentenceID = m_source.GetTranslationId();
-// m_constraint = staticData.GetConstrainingPhrase(sentenceID);
-}
-
-Search *Search::CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
-{
- switch(searchAlgorithm)
- {
- case Normal:
- return new SearchNormal(source, transOptColl);
- case CubePruning:
- return new SearchCubePruning(source, transOptColl);
- case CubeGrowing:
- return NULL;
- default:
- UserMessage::Add("ERROR: search. Aborting\n");
- abort();
- return NULL;
- }
-
-}
-
-}
-
-
+ +#include "SearchCubePruning.h" +#include "SearchNormal.h" +#include "SearchMulti.h" +#include "UserMessage.h" + +namespace Moses +{ +Search::Search() +{ +// long sentenceID = m_source.GetTranslationId(); +// m_constraint = staticData.GetConstrainingPhrase(sentenceID); +} + +//Search *Search::CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl) +//{ +// switch(searchAlgorithm) +// { +// case Normal: +// return new SearchNormal(source, transOptColl); +// case CubePruning: +// return new SearchCubePruning(source, transOptColl); +// case CubeGrowing: +// return NULL; +// default: +// UserMessage::Add("ERROR: search. Aborting\n"); +// abort(); +// return NULL; +// } +// +//} + +Search *Search::CreateSearch(std::vector< InputType const* > *sources, SearchAlgorithm searchAlgorithm, std::vector< TranslationOptionCollection* > *transOptColls) +{ + switch(searchAlgorithm) + { + case Normal: + return new SearchNormal(*((*sources)[0]), *((*transOptColls)[0])); + case CubePruning: + return new SearchCubePruning(*((*sources)[0]), *((*transOptColls)[0])); + case CubeGrowing: + return NULL; + case Multi: + return new SearchMulti(*((*sources)[0]), *((*sources)[1]), *((*transOptColls)[0]), *((*transOptColls)[1])); + default: + UserMessage::Add("ERROR: search. Aborting\n"); + abort(); + return NULL; + } +} + +} + + diff --git a/moses/src/Search.h b/moses/src/Search.h index 2d4d2c387..34ac61276 100644 --- a/moses/src/Search.h +++ b/moses/src/Search.h @@ -24,8 +24,10 @@ public: {} // Factory - static Search *CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl); - + //static Search *CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl); + + static Search *CreateSearch(std::vector< InputType const* > *sources, SearchAlgorithm searchAlgorithm, std::vector< TranslationOptionCollection* > *tranOptColls); + protected: float m_WERLimit; diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h index 847f72d7a..410aea068 100644 --- a/moses/src/TypeDef.h +++ b/moses/src/TypeDef.h @@ -170,6 +170,7 @@ enum SearchAlgorithm Normal = 0 ,CubePruning = 1 ,CubeGrowing = 2 + ,Multi = 3 }; // typedef |