diff options
Diffstat (limited to 'OnDiskPt/Word.cpp')
-rw-r--r-- | OnDiskPt/Word.cpp | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/OnDiskPt/Word.cpp b/OnDiskPt/Word.cpp new file mode 100644 index 000000000..a8d4c683a --- /dev/null +++ b/OnDiskPt/Word.cpp @@ -0,0 +1,162 @@ +// $Id$ +/*********************************************************************** + Moses - factored phrase-based, hierarchical and syntactic language decoder + Copyright (C) 2009 Hieu Hoang + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#include "../moses/src/Util.h" +#include "../moses/src/Word.h" +#include "Word.h" + +using namespace std; + +namespace OnDiskPt +{ + +Word::Word(const Word ©) + :m_isNonTerminal(copy.m_isNonTerminal) + ,m_factors(copy.m_factors) +{} + +Word::~Word() +{} + +void Word::CreateFromString(const std::string &inString, Vocab &vocab) +{ + if (inString.substr(0, 1) == "[" && inString.substr(inString.size() - 1, 1) == "]") { + // non-term + m_isNonTerminal = true; + } else { + m_isNonTerminal = false; + } + + m_factors.resize(1); + m_factors[0] = vocab.AddVocabId(inString); +} + +size_t Word::WriteToMemory(char *mem) const +{ + UINT64 *vocabMem = (UINT64*) mem; + + // factors + for (size_t ind = 0; ind < m_factors.size(); ind++) + vocabMem[ind] = m_factors[ind]; + + size_t size = sizeof(UINT64) * m_factors.size(); + + // is non-term + char bNonTerm = (char) m_isNonTerminal; + mem[size] = bNonTerm; + ++size; + + return size; +} + +size_t Word::ReadFromMemory(const char *mem, size_t numFactors) +{ + m_factors.resize(numFactors); + UINT64 *vocabMem = (UINT64*) mem; + + // factors + for (size_t ind = 0; ind < m_factors.size(); ind++) + m_factors[ind] = vocabMem[ind]; + + size_t memUsed = sizeof(UINT64) * m_factors.size(); + + // is non-term + char bNonTerm; + bNonTerm = mem[memUsed]; + m_isNonTerminal = (bool) bNonTerm; + ++memUsed; + + return memUsed; +} + +size_t Word::ReadFromFile(std::fstream &file, size_t numFactors) +{ + size_t memAlloc = numFactors * sizeof(UINT64) + sizeof(char); + char *mem = (char*) malloc(memAlloc); + file.read(mem, memAlloc); + + size_t memUsed = ReadFromMemory(mem, numFactors); + CHECK(memAlloc == memUsed); + free(mem); + + return memUsed; +} + +Moses::Word *Word::ConvertToMoses(Moses::FactorDirection direction + , const std::vector<Moses::FactorType> &outputFactorsVec + , const Vocab &vocab) const +{ + Moses::Word *ret = new Moses::Word(m_isNonTerminal); + + for (size_t ind = 0; ind < m_factors.size(); ++ind) { + Moses::FactorType factorType = outputFactorsVec[ind]; + UINT32 vocabId = m_factors[ind]; + const Moses::Factor *factor = vocab.GetFactor(vocabId, factorType, direction, m_isNonTerminal); + ret->SetFactor(factorType, factor); + } + + return ret; + +} + +int Word::Compare(const Word &compare) const +{ + int ret; + + if (m_isNonTerminal != compare.m_isNonTerminal) + return m_isNonTerminal ?-1 : 1; + + if (m_factors < compare.m_factors) + ret = -1; + else if (m_factors > compare.m_factors) + ret = 1; + else + ret = 0; + + return ret; +} + +bool Word::operator<(const Word &compare) const +{ + int ret = Compare(compare); + return ret < 0; +} + +bool Word::operator==(const Word &compare) const +{ + int ret = Compare(compare); + return ret == 0; +} + +std::ostream& operator<<(std::ostream &out, const Word &word) +{ + out << "["; + + std::vector<UINT64>::const_iterator iter; + for (iter = word.m_factors.begin(); iter != word.m_factors.end(); ++iter) { + out << *iter << "|"; + } + + out << (word.m_isNonTerminal ? "n" : "t"); + out << "]"; + + return out; +} +} |