Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'v0.6.4/src/ttableDiff.hpp')
-rw-r--r--v0.6.4/src/ttableDiff.hpp115
1 files changed, 115 insertions, 0 deletions
diff --git a/v0.6.4/src/ttableDiff.hpp b/v0.6.4/src/ttableDiff.hpp
new file mode 100644
index 0000000..0a5f3fb
--- /dev/null
+++ b/v0.6.4/src/ttableDiff.hpp
@@ -0,0 +1,115 @@
+/* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */
+/*
+ * newgiza
+ * Copyright (C) Qin Gao 2007 <qing@cs.cmu.edu>
+ *
+ * newgiza is free software.
+ *
+ * You may redistribute it and/or modify it under the terms of the
+ * GNU General Public License, as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * newgiza is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with newgiza. If not, write to:
+ * The Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02110-1301, USA.
+ */
+
+
+#ifndef _TTABLEDIFF_HPP_
+#define _TTABLEDIFF_HPP_
+#include "TTables.h"
+#include <sstream>
+#include <string>
+#include "types.h"
+
+using namespace std;
+/*!
+This class is meant to create a difference file in order to make
+GIZA paralell.
+*/
+template <class COUNT,class PROB>
+class CTTableDiff{
+private:
+ INT32 noEnglishWords; // total number of unique source words
+ INT32 noFrenchWords; // total number of unique target words
+ /*!
+ Store only the counting*/
+ hash_map<wordPairIds, COUNT, hashpair, equal_to<wordPairIds> > ef;
+
+public:
+ INT32 SaveToFile(const char* filename){
+ ofstream ofs(filename);
+ if(!ofs.is_open()){
+ return -1;
+ }else{
+ typename hash_map<wordPairIds, COUNT, hashpair, equal_to<wordPairIds> >::iterator it;
+ for( it = ef.begin() ; it != ef.end(); it++){
+ ofs << it->first.first << " " << it->first.second << " "
+ << it->second << std::endl;
+ }
+ }
+ return SUCCESS;
+ }
+
+ INT32 LoadFromFile(const char* filename){
+ ef.clear();
+ ifstream ifs(filename);
+ if(!ifs.is_open()){
+ return -1;
+ }
+ string sline;
+ while(!ifs.eof()){
+ sline = "";
+ std::getline(ifs,sline);
+ if(sline.length()){
+ //cout << sline << endl;
+ stringstream ss(sline.c_str());
+ WordIndex we=-1,wf=-1;
+ COUNT ct=-1 ;
+ ss >> we >> wf >> ct;
+ if(we==-1||wf==-1||ct==-1)
+ continue;
+ ef[wordPairIds(we,wf)] = ct;
+ }
+ }
+ return SUCCESS;
+ }
+
+ COUNT * GetPtr(WordIndex e, WordIndex f){
+ // look up this pair and return its position
+ typename hash_map<wordPairIds, COUNT, hashpair, equal_to<wordPairIds> >::iterator i = ef.find(wordPairIds(e, f));
+ if(i != ef.end()) // if it exists, return a pointer to it.
+ return(&((*i).second));
+ else return(0) ; // else return NULL pointer
+ }
+
+ void incCount(WordIndex e, WordIndex f, COUNT inc)
+ // increments the count of the given word pair. if the pair does not exist,
+ // it creates it with the given value.
+ {
+ if( inc )
+ ef[wordPairIds(e, f)] += inc ;
+ }
+
+ INT32 AugmentTTable(tmodel<COUNT,PROB>& ttable){
+ typename hash_map<wordPairIds, COUNT, hashpair,
+ equal_to<wordPairIds> >::iterator it;
+ for( it = ef.begin() ; it != ef.end(); it++){
+ ttable.incCount(it->first.first,it->first.second,it->second);
+ }
+ return SUCCESS;
+ }
+
+protected:
+
+};
+
+#endif // _TTABLEDIFF_HPP_