Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/src/PhraseDictionary.cpp')
-rwxr-xr-xmoses/src/PhraseDictionary.cpp268
1 files changed, 0 insertions, 268 deletions
diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp
deleted file mode 100755
index 1de4d3d79..000000000
--- a/moses/src/PhraseDictionary.cpp
+++ /dev/null
@@ -1,268 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include <fstream>
-#include <string>
-#include <iterator>
-#include <sys/stat.h>
-#include "boost/filesystem/operations.hpp" // includes boost/filesystem/path.hpp
-#include "PhraseDictionary.h"
-#include "FactorCollection.h"
-#include "Word.h"
-#include "Util.h"
-#include "InputFileStream.h"
-#include "StaticData.h"
-#include "WordsRange.h"
-
-using namespace std;
-
-void PhraseDictionary::Load(const std::vector<FactorType> &input
- , const std::vector<FactorType> &output
- , FactorCollection &factorCollection
- , const string &filePath
- , const string &hashFilePath
- , const vector<float> &weight
- , size_t maxTargetPhrase
- , bool filter
- , const list< Phrase > &inputPhraseList
- , const LMList &languageModels
- , float weightWP
- , const StaticData& staticData)
-{
- m_maxTargetPhrase = maxTargetPhrase;
- m_filename = filePath;
-
- //factors
- m_factorsUsed[Input] = new FactorTypeSet(input);
- m_factorsUsed[Output] = new FactorTypeSet(output);
-
- // data from file
- InputFileStream inFile(filePath);
-
- // create hash file if necessary
- ofstream tempFile;
- string tempFilePath;
- if (filter)
- {
- CreateTempFile(tempFile, tempFilePath);
- TRACE_ERR(filePath << " -> " << tempFilePath << " -> " << hashFilePath << endl);
- }
-
- vector< vector<string> > phraseVector;
- string line, prevSourcePhrase = "";
- bool addPhrase = !filter;
- size_t count = 0;
- size_t line_num = 0;
- while(getline(inFile, line))
- {
- ++line_num;
- vector<string> tokens = TokenizeMultiCharSeparator( line , "|||" );
- if (tokens.size() != 3)
- {
- TRACE_ERR("Syntax error at " << filePath << ":" << line_num);
- abort(); // TODO- error handling
- }
-
- bool isLHSEmpty = (tokens[1].find_first_not_of(" \t", 0) == string::npos);
- if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
- TRACE_ERR(filePath << ":" << line_num << ": pt entry contains empty target, skipping\n");
- continue;
- }
-
- if (!filter)
- {
- if (tokens[0] != prevSourcePhrase)
- phraseVector = Phrase::Parse(tokens[0]);
- }
- else if (tokens[0] == prevSourcePhrase)
- { // same source phrase as prev line.
- }
- else
- {
- phraseVector = Phrase::Parse(tokens[0]);
- prevSourcePhrase = tokens[0];
-
- addPhrase = Contains(phraseVector, inputPhraseList, input);
- }
-
- if (addPhrase)
- {
- vector<float> scoreVector = Tokenize<float>(tokens[2]);
- if (scoreVector.size() != m_noScoreComponent) {
- TRACE_ERR("Size of scoreVector != number (" <<scoreVector.size() << "!=" <<m_noScoreComponent<<") of score components on line " << line_num);
- abort();
- }
-// assert(scoreVector.size() == m_noScoreComponent);
-
- // source
- Phrase sourcePhrase(Input);
- sourcePhrase.CreateFromString( input, phraseVector, factorCollection);
- //target
- TargetPhrase targetPhrase(Output);
- targetPhrase.CreateFromString( output, tokens[1], factorCollection);
-
- // component score, for n-best output
- std::vector<float> scv(scoreVector.size());
- std::transform(scoreVector.begin(),scoreVector.end(),scv.begin(),TransformScore);
- targetPhrase.SetScore(this, scv, weight, languageModels, weightWP);
-
- AddEquivPhrase(sourcePhrase, targetPhrase);
-
- // add to hash file
- if (filter)
- tempFile << line << endl;
- }
- count++;
- }
-
- // move temp file to hash file
- if (filter)
- {
- tempFile.close();
- using namespace boost::filesystem;
- if (!exists(path(hashFilePath, native)))
- {
- try
- {
- rename( path(tempFilePath, native) , path(hashFilePath, native) );
- }
- catch (...)
- { // copy instead
- copy_file(path(tempFilePath, native) , path(hashFilePath, native) );
- remove(tempFilePath);
- }
- }
-#ifndef _WIN32
- // change permission to let everyone use cached file
- chmod(hashFilePath.c_str(), S_IRWXU | S_IRWXG | S_IRWXO);
-#endif
- }
-}
-
-TargetPhraseCollection *PhraseDictionary::CreateTargetPhraseCollection(const Phrase &source)
-{
- const size_t size = source.GetSize();
-
- PhraseDictionaryNode *currNode = &m_collection;
- for (size_t pos = 0 ; pos < size ; ++pos)
- {
- Word word(source.GetFactorArray(pos));
- currNode = currNode->GetOrCreateChild(word);
- if (currNode == NULL)
- return NULL;
- }
-
- return currNode->CreateTargetPhraseCollection();
-}
-
-void PhraseDictionary::AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
-{
- TargetPhraseCollection &phraseColl = *CreateTargetPhraseCollection(source);
- if (m_maxTargetPhrase == 0)
- { // don't need keep list sorted
- // create sub tree & put target phrase into collection
- phraseColl.push_back(targetPhrase);
- }
- else
- { // must keep list in sorted order
- TargetPhraseCollection::iterator iter;
- for (iter = phraseColl.begin() ; iter != phraseColl.end() ; ++iter)
- {
- TargetPhrase &insertPhrase = *iter;
- if (targetPhrase.GetFutureScore() < insertPhrase.GetFutureScore())
- {
- break;
- }
- }
- phraseColl.insert(iter, targetPhrase);
-
- // get rid of least probable phrase if we have enough
- if (phraseColl.size() > m_maxTargetPhrase)
- {
- phraseColl.erase(phraseColl.begin());
- }
- }
-}
-
-const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const Phrase &source) const
-{ // exactly like CreateTargetPhraseCollection, but don't create
- const size_t size = source.GetSize();
-
- const PhraseDictionaryNode *currNode = &m_collection;
- for (size_t pos = 0 ; pos < size ; ++pos)
- {
- Word word(source.GetFactorArray(pos));
- currNode = currNode->GetChild(word);
- if (currNode == NULL)
- return NULL;
- }
-
- return currNode->GetTargetPhraseCollection();
-}
-
-PhraseDictionary::~PhraseDictionary()
-{
- for (size_t i = 0 ; i < m_factorsUsed.size() ; i++)
- {
- delete m_factorsUsed[i];
- }
-}
-
-void PhraseDictionary::SetWeightTransModel(const vector<float> &weightT)
-{
- PhraseDictionaryNode::iterator iterDict;
- for (iterDict = m_collection.begin() ; iterDict != m_collection.end() ; ++iterDict)
- {
- PhraseDictionaryNode &phraseDictionaryNode = iterDict->second;
- // recursively set weights in nodes
- phraseDictionaryNode.SetWeightTransModel(this, weightT);
- }
-}
-
-bool PhraseDictionary::Contains(const vector< vector<string> > &phraseVector
- , const list<Phrase> &inputPhraseList
- , const vector<FactorType> &inputFactorType)
-{
- std::list<Phrase>::const_iterator iter;
- for (iter = inputPhraseList.begin() ; iter != inputPhraseList.end() ; ++iter)
- {
- const Phrase &inputPhrase = *iter;
- if (inputPhrase.Contains(phraseVector, inputFactorType))
- return true;
- }
- return false;
-}
-
-TO_STRING_BODY(PhraseDictionary);
-
-// friend
-ostream& operator<<(ostream& out, const PhraseDictionary& phraseDict)
-{
- const PhraseDictionaryNode &coll = phraseDict.m_collection;
- PhraseDictionaryNode::const_iterator iter;
- for (iter = coll.begin() ; iter != coll.end() ; ++iter)
- {
- const Word &word = (*iter).first;
- out << word;
- }
- return out;
-}
-