/* * score_reordering.cpp * * Created by: Sara Stymne - Linköping University * Machine Translation Marathon 2010, Dublin */ #include #include #include #include #include #include #include #include #include "reordering_classes.h" using namespace std; void split_line(const string& line, string& foreign, string& english, string& wbe, string& phrase, string& hier); void get_orientations(const string& pair, string& previous, string& next); int main(int argc, char* argv[]) { cerr << "Lexical Reordering Scorer\n" << "scores lexical reordering models of several types (hierarchical, phrase-based and word-based-extraction\n"; if (argc < 3) { cerr << "syntax: score_reordering extractFile smoothingValue filepath (--model \"type max-orientation (specification-strings)\" )+\n"; exit(1); } char* extractFileName = argv[1]; double smoothingValue = atof(argv[2]); string filepath = argv[3]; ifstream eFile(extractFileName); if (!eFile) { cerr << "Could not open the extract file " << extractFileName <<"for scoring of lexical reordering models\n"; exit(1); } bool smoothWithCounts = false; map modelScores; vector models; bool hier = false; bool phrase = false; bool wbe = false; string e,f,w,p,h; string prev, next; int i = 4; while (i= argc) { cerr << "score: syntax error, no model information provided to the option" << argv[i] << endl; exit(1); } istringstream is(argv[++i]); string m,t; is >> m >> t; modelScores[m] = ModelScore::createModelScore(t); if (m.compare("hier") == 0) { hier = true; } else if (m.compare("phrase") == 0) { phrase = true; } if (m.compare("wbe") == 0) { wbe = true; } if (!hier && !phrase && !wbe) { cerr << "WARNING: No models specified for lexical reordering. No lexical reordering table will be trained.\n"; return 0; } string config; //Store all models while (is >> config) { models.push_back(Model::createModel(modelScores[m],config,filepath)); } } else { cerr << "illegal option given to lexical reordering model score\n"; exit(1); } i++; } //////////////////////////////////// //calculate smoothing if (smoothWithCounts) { string line; while (getline(eFile,line)) { split_line(line,e,f,w,p,h); if (hier) { get_orientations(h, prev, next); modelScores["hier"]->add_example(prev,next); } if (phrase) { get_orientations(p, prev, next); modelScores["phrase"]->add_example(prev,next); } if (wbe) { get_orientations(w, prev, next); modelScores["wbe"]->add_example(prev,next); } } // calculate smoothing for each model for (int i=0; icreateSmoothing(smoothingValue); } //reopen eFile eFile.close(); eFile.open(extractFileName); } else { //constant smoothing for (int i=0; icreateConstSmoothing(smoothingValue); } } //////////////////////////////////// //calculate scores for reordering table string line,f_current,e_current; bool first = true; while (getline(eFile, line)) { split_line(line,f,e,w,p,h); if (first) { f_current = f; e_current = e; first = false; } else if (f.compare(f_current) != 0 || e.compare(e_current) != 0) { //fe - score for (int i=0; iscore_fe(f_current,e_current); } //reset for(map::const_iterator it = modelScores.begin(); it != modelScores.end(); ++it) { it->second->reset_fe(); } if (f.compare(f_current) != 0) { //f - score for (int i=0; iscore_f(f_current); } //reset for(map::const_iterator it = modelScores.begin(); it != modelScores.end(); ++it) { it->second->reset_f(); } } f_current = f; e_current = e; } // uppdate counts if (hier) { get_orientations(h, prev, next); modelScores["hier"]->add_example(prev,next); } if (phrase) { get_orientations(p, prev, next); modelScores["phrase"]->add_example(prev,next); } if (wbe) { get_orientations(w, prev, next); modelScores["wbe"]->add_example(prev,next); } } //Score the last phrases for (int i=0; iscore_fe(f,e); } for (int i=0; iscore_f(f); } //Zip all files for (int i=0; izipFile(); } return 0; } void split_line(const string& line, string& foreign, string& english, string& wbe, string& phrase, string& hier) { int begin = 0; int end = line.find(" ||| "); foreign = line.substr(begin, end - begin); begin = end+5; end = line.find(" ||| ", begin); english = line.substr(begin, end - begin); begin = end+5; end = line.find(" | ", begin); wbe = line.substr(begin, end - begin); begin = end+3; end = line.find(" | ", begin); phrase = line.substr(begin, end - begin); begin = end+3; hier = line.substr(begin, line.size() - begin); } void get_orientations(const string& pair, string& previous, string& next) { istringstream is(pair); is >> previous >> next; }