move moses/src/* to moses/

author: Hieu Hoang <hieuhoang@gmail.com> 2012-11-12 23:56:18 +0400
committer: Hieu Hoang <hieuhoang@gmail.com> 2012-11-12 23:56:18 +0400
commit: 5e3ef23cef6101d2c098eb3445f562e8f595655b (patch)
tree: b8c332b6fa82bae84ea4910967a10ba1b08a7107 /moses/Word.cpp
parent: 8c785cff2b1be3cccd76ea9026f71b649762dfc3 (diff)
1 files changed, 157 insertions, 0 deletions
diff --git a/moses/Word.cpp b/moses/Word.cpp
new file mode 100644
index 000000000..c23e8de8c
--- /dev/null
+++ b/moses/Word.cpp
@@ -0,0 +1,157 @@
+// $Id$
+// vim::tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <sstream>
+#include "memory.h"
+#include "Word.h"
+#include "TypeDef.h"
+#include "StaticData.h"  // needed to determine the FactorDelimiter
+#include "util/tokenize_piece.hh"
+
+using namespace std;
+
+namespace Moses
+{
+
+// static
+int Word::Compare(const Word &targetWord, const Word &sourceWord)
+{
+  if (targetWord.IsNonTerminal() != sourceWord.IsNonTerminal()) {
+    return targetWord.IsNonTerminal() ? -1 : 1;
+  }
+
+  for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++) {
+    const Factor *targetFactor		= targetWord[factorType]
+                                    ,*sourceFactor	= sourceWord[factorType];
+
+    if (targetFactor == NULL || sourceFactor == NULL)
+      continue;
+    if (targetFactor == sourceFactor)
+      continue;
+
+    return (targetFactor<sourceFactor) ? -1 : +1;
+  }
+  return 0;
+
+}
+
+void Word::Merge(const Word &sourceWord)
+{
+  for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
+    const Factor *sourcefactor		= sourceWord.m_factorArray[currFactor]
+                                    ,*targetFactor		= this     ->m_factorArray[currFactor];
+    if (targetFactor == NULL && sourcefactor != NULL) {
+      m_factorArray[currFactor] = sourcefactor;
+    }
+  }
+}
+
+std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlank) const
+{
+  stringstream strme;
+  CHECK(factorType.size() <= MAX_NUM_FACTORS);
+  const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+  bool firstPass = true;
+  for (unsigned int i = 0 ; i < factorType.size() ; i++) {
+    const Factor *factor = m_factorArray[factorType[i]];
+    if (factor != NULL) {
+      if (firstPass) {
+        firstPass = false;
+      } else {
+        strme << factorDelimiter;
+      }
+      strme << factor->GetString();
+    }
+  }
+  if(endWithBlank) strme << " ";
+  return strme.str();
+}
+
+std::string Word::GetString(FactorType factorType) const
+{
+	const Factor *factor = m_factorArray[factorType];
+  if (factor != NULL)
+  	return factor->GetString();
+  else
+  	return NULL;
+}
+
+void Word::CreateFromString(FactorDirection direction
+                            , const std::vector<FactorType> &factorOrder
+                            , const StringPiece &str
+                            , bool isNonTerminal)
+{
+  FactorCollection &factorCollection = FactorCollection::Instance();
+
+  util::TokenIter<util::MultiCharacter> fit(str, StaticData::Instance().GetFactorDelimiter());
+  for (size_t ind = 0; ind < factorOrder.size() && fit; ++ind, ++fit) {
+    m_factorArray[factorOrder[ind]] = factorCollection.AddFactor(*fit);
+  }
+  CHECK(!fit);
+
+  // assume term/non-term same for all factors
+  m_isNonTerminal = isNonTerminal;
+}
+
+void Word::CreateUnknownWord(const Word &sourceWord)
+{
+  FactorCollection &factorCollection = FactorCollection::Instance();
+
+  for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
+    FactorType factorType = static_cast<FactorType>(currFactor);
+
+    const Factor *sourceFactor = sourceWord[currFactor];
+    if (sourceFactor == NULL)
+      SetFactor(factorType, factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR));
+    else
+      SetFactor(factorType, factorCollection.AddFactor(Output, factorType, sourceFactor->GetString()));
+  }
+  m_isNonTerminal = sourceWord.IsNonTerminal();
+}
+
+TO_STRING_BODY(Word);
+
+// friend
+ostream& operator<<(ostream& out, const Word& word)
+{
+  stringstream strme;
+
+  const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+  bool firstPass = true;
+  for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
+    FactorType factorType = static_cast<FactorType>(currFactor);
+    const Factor *factor = word.GetFactor(factorType);
+    if (factor != NULL) {
+      if (firstPass) {
+        firstPass = false;
+      } else {
+        strme << factorDelimiter;
+      }
+      strme << *factor;
+    }
+  }
+  out << strme.str() << " ";
+  return out;
+}
+
+}
+
author	Hieu Hoang <hieuhoang@gmail.com>	2012-11-12 23:56:18 +0400
committer	Hieu Hoang <hieuhoang@gmail.com>	2012-11-12 23:56:18 +0400
commit	5e3ef23cef6101d2c098eb3445f562e8f595655b (patch)
tree	b8c332b6fa82bae84ea4910967a10ba1b08a7107 /moses/Word.cpp
parent	8c785cff2b1be3cccd76ea9026f71b649762dfc3 (diff)