#include #include #include #include #include #include #include #include #include using namespace std; void readVoc(istream&in,map&voc) { string line,s1,s2; voc["1"]="UNK"; if( !in )cerr <<"Vocabulary does not exist.\n"; while(getline(in,line)) { istrstream eingabe(line.c_str()); if( !(eingabe>>s1>>s2)) cerr << "ERROR in vocabulary '" << line << "'\n"; voc[s1]=s2; } } int maxElems=0; int main(int argc,char **argv) { if( argc!=5&&argc!=6 ) { cerr << "Usage: " << argv[0] << " output vcb1 vcb2 snt12 \n"; cerr << "Converts GIZA++ snt-format into plain text.\n"; exit(1); } bool counts=0; if( argc==6 ) { if(string(argv[5])!="-counts") cerr << "ERROR: wrong option " << argv[6] << endl; counts=1; maxElems=10000000; } ifstream v1(argv[2]),v2(argv[3]),t(argv[4]); ofstream ou(argv[1]); mapvoc1,voc2; readVoc(v1,voc1); readVoc(v2,voc2); string line1,line2,line3; vector > vsi(voc1.size()+1000); int nLine=0; int totalElems=0; while(getline(t,line1)&&getline(t,line2)&&getline(t,line3)) { istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str()); double count; string word; eingabe1>>count; vectorl1,l2; while(eingabe2>>word) l1.push_back(atoi(word.c_str())); while(eingabe3>>word) l2.push_back(atoi(word.c_str())); if( ((++nLine)%1000)==0 ) cerr << "line " << nLine << '\n'; totalElems-=vsi[0].size(); for(unsigned int j=0; j=int(vsi.size()) ) { cerr << "I have to resize: " << l1[i] << endl; vsi.resize(l1[i]+1); } map&theset=vsi[l1[i]]; totalElems-=theset.size(); for(unsigned int j=0; jmaxElems&&maxElems ) { cerr << "INFO: print out " << totalElems << " entries.\n"; for(unsigned int i=0; i::const_iterator j=vsi[i].begin(); j!=vsi[i].end(); ++j) { if(counts==1 ) ou << j->second << " " << i << " " << j->first << '\n'; else ou << i << " " << j->first << '\n'; } totalElems=0; vsi.clear(); vsi.resize(voc1.size()+1000); } } cerr << "END.\n"; for(unsigned int i=0; i::const_iterator j=vsi[i].begin(); j!=vsi[i].end(); ++j) { if(counts==1 ) ou << j->second << " " << i << " " << j->first << '\n'; else ou << i << " " << j->first << '\n'; } ou.flush(); ou.close(); }