diff options
Diffstat (limited to 'mgizapp/src/snt2plain.cpp')
-rw-r--r-- | mgizapp/src/snt2plain.cpp | 107 |
1 files changed, 49 insertions, 58 deletions
diff --git a/mgizapp/src/snt2plain.cpp b/mgizapp/src/snt2plain.cpp index 9950050..2f1998f 100644 --- a/mgizapp/src/snt2plain.cpp +++ b/mgizapp/src/snt2plain.cpp @@ -12,33 +12,30 @@ using namespace std; void readVoc(istream&in,map<string,string>&voc) { - string line,s1,s2; + string line,s1,s2; voc["1"]="UNK"; if( !in )cerr <<"Vocabulary does not exist.\n"; - while(getline(in,line)) - { - istrstream eingabe(line.c_str()); - if( !(eingabe>>s1>>s2)) - cerr << "ERROR in vocabulary '" << line << "'\n"; - voc[s1]=s2; - } + while(getline(in,line)) { + istrstream eingabe(line.c_str()); + if( !(eingabe>>s1>>s2)) + cerr << "ERROR in vocabulary '" << line << "'\n"; + voc[s1]=s2; + } } int main(int argc,char **argv) { - if( argc!=5&&argc!=6 ) - { - cerr << "Usage: " << argv[0] << " vcb1 vcb2 snt12 output_prefix [ -counts ]\n"; - cerr << "Converts GIZA++ snt-format into plain text.\n"; - exit(1); - } + if( argc!=5&&argc!=6 ) { + cerr << "Usage: " << argv[0] << " vcb1 vcb2 snt12 output_prefix [ -counts ]\n"; + cerr << "Converts GIZA++ snt-format into plain text.\n"; + exit(1); + } bool counts=0; - if( argc==6 ) - { - if(string(argv[5])!="-counts") - cerr << "ERROR: wrong option " << argv[5] << endl; - counts=1; - } + if( argc==6 ) { + if(string(argv[5])!="-counts") + cerr << "ERROR: wrong option " << argv[5] << endl; + counts=1; + } ifstream v1(argv[1]),v2(argv[2]),t(argv[3]); string prefix(argv[4]); string outfil1=prefix+"1.txt"; @@ -51,43 +48,37 @@ int main(int argc,char **argv) int source=0,target=0; string line1,line2,line3; int printed=0; - while(getline(t,line1)&&getline(t,line2)&&getline(t,line3)) - { - istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str()); - double count; - string word; - eingabe1>>count; - vector<string>l1,l2; - while(eingabe2>>word) - l1.push_back(word); - while(eingabe3>>word) - l2.push_back(word); - if( counts ) - cout << count << '\n'; - for(unsigned int p=0;p<l1.size();p++) - { - if(voc1.count(l1[p])==0) - { - if( printed++==0) - cerr << "ERROR: source vocabulary entry " << l1[p] << " unknown.\n"; - out1 << l1[p]<<' '; - } - else - out1 << voc1[l1[p]] << ' '; - source++; - } - for(unsigned int p=0;p<l2.size();p++) - { - if(voc2.count(l2[p])==0) - { - if( printed++ ==0) - cerr << "ERROR: target vocabulary entry " << l2[p] << " unknown.\n"; - out2 <<l2[p]<<' '; - } - out2 << voc2[l2[p]] << ' '; - target++; - } - out1<<'\n'; - out2<<'\n'; + while(getline(t,line1)&&getline(t,line2)&&getline(t,line3)) { + istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str()); + double count; + string word; + eingabe1>>count; + vector<string>l1,l2; + while(eingabe2>>word) + l1.push_back(word); + while(eingabe3>>word) + l2.push_back(word); + if( counts ) + cout << count << '\n'; + for(unsigned int p=0; p<l1.size(); p++) { + if(voc1.count(l1[p])==0) { + if( printed++==0) + cerr << "ERROR: source vocabulary entry " << l1[p] << " unknown.\n"; + out1 << l1[p]<<' '; + } else + out1 << voc1[l1[p]] << ' '; + source++; + } + for(unsigned int p=0; p<l2.size(); p++) { + if(voc2.count(l2[p])==0) { + if( printed++ ==0) + cerr << "ERROR: target vocabulary entry " << l2[p] << " unknown.\n"; + out2 <<l2[p]<<' '; + } + out2 << voc2[l2[p]] << ' '; + target++; } + out1<<'\n'; + out2<<'\n'; + } } |