Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/giza-pp.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'GIZA++-v2/WordClasses.h')
-rw-r--r--GIZA++-v2/WordClasses.h95
1 files changed, 95 insertions, 0 deletions
diff --git a/GIZA++-v2/WordClasses.h b/GIZA++-v2/WordClasses.h
new file mode 100644
index 0000000..1fea083
--- /dev/null
+++ b/GIZA++-v2/WordClasses.h
@@ -0,0 +1,95 @@
+/*
+
+Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
+
+This file is part of GIZA++ ( extension of GIZA ).
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+USA.
+
+*/
+#ifndef WordClasses_h_DEFINED
+#define WordClasses_h_DEFINED
+#include <map>
+#include <string>
+#include <set>
+
+class WordClasses
+{
+ private:
+ map<string,string> Sw2c;
+ map<string,int> Sc2int;
+ Vector<string> Sint2c;
+ Vector<int> w2c;
+ unsigned int classes;
+ public:
+ WordClasses()
+ : classes(1)
+ {
+ Sint2c.push_back("0");
+ Sc2int["0"]=0;
+ }
+ template<class MAPPER> bool read(istream&in,const MAPPER&m)
+ {
+ string sline;
+ int maxword=0;
+ while(getline(in,sline))
+ {
+ string word,wclass;
+ istrstream iline(sline.c_str());
+ iline>>word>>wclass;
+ maxword=max(m(word),maxword);
+ assert(Sw2c.count(word)==0);
+ Sw2c[word]=wclass;
+ if( !Sc2int.count(wclass) )
+ {
+ Sc2int[wclass]=classes++;
+ Sint2c.push_back(wclass);
+ assert(classes==Sint2c.size());
+ }
+ }
+ w2c=Vector<int>(maxword+1,0);
+ for(map<string,string>::const_iterator i=Sw2c.begin();i!=Sw2c.end();++i)
+ w2c[m(i->first)]=Sc2int[i->second];
+ cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl;
+ return 1;
+ }
+ int getClass(int w)const
+ {
+ if(w>=0&&int(w)<int(w2c.size()) )
+ return w2c[w];
+ else
+ return 0;
+ }
+ const int operator()(const string&x)const
+ {
+ if( Sc2int.count(x) )
+ return Sc2int.find(x)->second;
+ else
+ {
+ cerr << "WARNING: class " << x << " not found.\n";
+ return 0;
+ }
+ }
+ string classString(unsigned int cnr)const
+ {
+ if( cnr<Sint2c.size())
+ return Sint2c[cnr];
+ else
+ return string("0");
+ }
+};
+
+#endif