diff options
-rw-r--r-- | moses/InputPath.cpp | 2 | ||||
-rw-r--r-- | moses/InputPath.h | 7 | ||||
-rw-r--r-- | moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp | 12 | ||||
-rw-r--r-- | moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp | 2 | ||||
-rw-r--r-- | moses/TranslationOptionCollectionConfusionNet.cpp | 97 | ||||
-rw-r--r-- | moses/TranslationOptionCollectionConfusionNet.h | 1 |
6 files changed, 74 insertions, 47 deletions
diff --git a/moses/InputPath.cpp b/moses/InputPath.cpp index 43b4207cf..9dda42895 100644 --- a/moses/InputPath.cpp +++ b/moses/InputPath.cpp @@ -4,7 +4,7 @@ namespace Moses { InputPath::InputPath(const Phrase &phrase, const WordsRange &range, const InputPath *prevNode - ,const ScoreComponentCollection *inputScore) + ,const ScoreComponentCollection *inputScore) :m_prevNode(prevNode) ,m_phrase(phrase) ,m_range(range) diff --git a/moses/InputPath.h b/moses/InputPath.h index 63f09bfa1..934b0fcbe 100644 --- a/moses/InputPath.h +++ b/moses/InputPath.h @@ -41,7 +41,7 @@ public: } InputPath(const Phrase &phrase, const WordsRange &range, const InputPath *prevNode - ,const ScoreComponentCollection *inputScore); + ,const ScoreComponentCollection *inputScore); ~InputPath(); const Phrase &GetPhrase() const { @@ -62,8 +62,9 @@ public: } const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary &phraseDictionary) const; const void *GetPtNode(const PhraseDictionary &phraseDictionary) const; - const ScoreComponentCollection *GetInputScore() const - { return m_inputScore; } + const ScoreComponentCollection *GetInputScore() const { + return m_inputScore; + } }; diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp index b7f19d4ff..8aa4cdee6 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp @@ -234,12 +234,12 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( std::vector<float> weightT = staticData.GetWeights(&m_dictionary); targetPhraseCollection - = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec - ,m_outputFactorsVec - ,m_dictionary - ,weightT - ,m_dbWrapper.GetVocab() - ,true); + = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec + ,m_outputFactorsVec + ,m_dictionary + ,weightT + ,m_dbWrapper.GetVocab() + ,true); delete tpcollBerkeleyDb; m_cache[tpCollFilePos] = targetPhraseCollection; diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp index ada9ce685..199141eeb 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp @@ -134,7 +134,7 @@ void PhraseDictionaryOnDisk::SetTargetPhraseFromPtMatrix(const InputPathList &ph const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper); TargetPhraseCollection *targetPhrases - = targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false); + = targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false); node.SetTargetPhrases(*this, targetPhrases, ptNode); diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index ebe62b0dd..9f7513b3b 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -28,51 +28,76 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet // 1-word phrases for (size_t startPos = 0; startPos < size; ++startPos) { vector<InputPathList> &vec = m_targetPhrasesfromPt[startPos]; - vec.push_back(InputPathList()); - InputPathList &list = vec.back(); + vec.push_back(InputPathList()); + InputPathList &list = vec.back(); - WordsRange range(startPos, startPos); + WordsRange range(startPos, startPos); - const ConfusionNet::Column &col = input.GetColumn(startPos); - for (size_t i = 0; i < col.size(); ++i) { - const Word &word = col[i].first; - Phrase subphrase; - subphrase.AddWord(word); + const ConfusionNet::Column &col = input.GetColumn(startPos); + for (size_t i = 0; i < col.size(); ++i) { + const Word &word = col[i].first; + Phrase subphrase; + subphrase.AddWord(word); - const std::vector<float> &scores = col[i].second; - ScoreComponentCollection *inputScore = new ScoreComponentCollection(); - inputScore->Assign(inputFeature, scores); + const std::vector<float> &scores = col[i].second; + ScoreComponentCollection *inputScore = new ScoreComponentCollection(); + inputScore->Assign(inputFeature, scores); - InputPath *node = new InputPath(subphrase, range, NULL, inputScore); - list.push_back(node); + InputPath *node = new InputPath(subphrase, range, NULL, inputScore); + list.push_back(node); - } + m_phraseDictionaryQueue.push_back(node); + } } - /* - for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) { - for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) { - size_t endPos = startPos + phaseSize -1; - vector<InputPathList> &vec = m_targetPhrasesfromPt[startPos]; - - Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); - WordsRange range(startPos, endPos); - - InputPath *node; - if (range.GetNumWordsCovered() == 1) { - node = new InputPath(subphrase, range, NULL, NULL); - vec.push_back(node); - } else { - const InputPath &prevNode = GetInputPath(startPos, endPos - 1); - node = new InputPath(subphrase, range, &prevNode, NULL); - vec.push_back(node); - } - - m_phraseDictionaryQueue.push_back(node); - } + // subphrases of 2+ words + for (size_t phaseSize = 2; phaseSize <= size; ++phaseSize) { + for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) { + size_t endPos = startPos + phaseSize -1; + WordsRange range(startPos, endPos); + + vector<InputPathList> &vec = m_targetPhrasesfromPt[startPos]; + InputPathList &list = vec.back(); + + + // loop thru every previous path + const InputPathList &prevNodes = GetInputPathList(startPos, endPos - 1); + InputPathList::const_iterator iter; + for (iter = prevNodes.begin(); iter != prevNodes.end(); ++iter) { + const InputPath &prevNode = **iter; + const Phrase &prevPhrase = prevNode.GetPhrase(); + const ScoreComponentCollection *prevInputScore = prevNode.GetInputScore(); + CHECK(prevInputScore); + + // loop thru every word at this position + const ConfusionNet::Column &col = input.GetColumn(startPos); + for (size_t i = 0; i < col.size(); ++i) { + const Word &word = col[i].first; + Phrase subphrase(prevPhrase); + subphrase.AddWord(word); + + const std::vector<float> &scores = col[i].second; + ScoreComponentCollection *inputScore = new ScoreComponentCollection(*prevInputScore); + inputScore->PlusEquals(inputFeature, scores); + + InputPath *node = new InputPath(subphrase, range, NULL, inputScore); + list.push_back(node); + + m_phraseDictionaryQueue.push_back(node); + } + + } + } } - */ + +} + +InputPathList &TranslationOptionCollectionConfusionNet::GetInputPathList(size_t startPos, size_t endPos) +{ + size_t offset = endPos - startPos; + CHECK(offset < m_targetPhrasesfromPt[startPos].size()); + return m_targetPhrasesfromPt[startPos][offset]; } /* forcibly create translation option for a particular source word. diff --git a/moses/TranslationOptionCollectionConfusionNet.h b/moses/TranslationOptionCollectionConfusionNet.h index 0284c44c7..418932b85 100644 --- a/moses/TranslationOptionCollectionConfusionNet.h +++ b/moses/TranslationOptionCollectionConfusionNet.h @@ -21,6 +21,7 @@ public: protected: TargetPhraseMatrix m_targetPhrasesfromPt; /*< contains translation options */ + InputPathList &GetInputPathList(size_t startPos, size_t endPos); public: TranslationOptionCollectionConfusionNet(const ConfusionNet &source, size_t maxNoTransOptPerCoverage, float translationOptionThreshold); |