#include #include #include #include #include #include #include #include "zlib.h" #include "reordering_classes.h" using namespace std; ModelScore::ModelScore() { for(int i=MONO; i<=NOMONO; ++i) { count_fe_prev.push_back(0); count_fe_next.push_back(0); count_f_prev.push_back(0); count_f_next.push_back(0); } } ModelScore::~ModelScore() {} ModelScore* ModelScore::createModelScore(const string& modeltype) { if (modeltype.compare("mslr") == 0) { return new ModelScoreMSLR(); } else if (modeltype.compare("msd") == 0) { return new ModelScoreMSD(); } else if (modeltype.compare("monotonicity") == 0 ) { return new ModelScoreMonotonicity(); } else if (modeltype.compare("leftright") == 0) { return new ModelScoreLR(); } else { cerr << "Illegal model type given for lexical reordering model scoring: " << modeltype << ". The allowed types are: mslr, msd, monotonicity, leftright" << endl; exit(1); } } void ModelScore::reset_fe() { for(int i=MONO; i<=NOMONO; ++i) { count_fe_prev[i] = 0; count_fe_next[i] = 0; } } void ModelScore::reset_f() { for(int i=MONO; i<=NOMONO; ++i) { count_f_prev[i] = 0; count_f_next[i] = 0; } } void ModelScore::add_example (const StringPiece& previous, const StringPiece& next, float weight) { count_fe_prev[getType(previous)]+=weight; count_f_prev[getType(previous)]+=weight; count_fe_next[getType(next)]+=weight; count_f_next[getType(next)]+=weight; } const vector& ModelScore::get_scores_fe_prev() const { return count_fe_prev; } const vector& ModelScore::get_scores_fe_next() const { return count_fe_next; } const vector& ModelScore::get_scores_f_prev() const { return count_f_prev; } const vector& ModelScore::get_scores_f_next() const { return count_f_next; } ORIENTATION ModelScore::getType(const StringPiece& s) { if (s.compare("mono") == 0) { return MONO; } else if (s.compare("swap") == 0) { return SWAP; } else if (s.compare("dright") == 0) { return DRIGHT; } else if (s.compare("dleft") == 0) { return DLEFT; } else if (s.compare("other") == 0) { return OTHER; } else if (s.compare("nomono") == 0) { return NOMONO; } else { cerr << "Illegal reordering type used: " << s << endl; exit(1); } } ORIENTATION ModelScoreMSLR::getType(const StringPiece& s) { if (s.compare("mono") == 0) { return MONO; } else if (s.compare("swap") == 0) { return SWAP; } else if (s.compare("dright") == 0) { return DRIGHT; } else if (s.compare("dleft") == 0) { return DLEFT; } else if (s.compare("other") == 0 || s.compare("nomono") == 0) { cerr << "Illegal reordering type used: " << s << " for model type mslr. You have to re-run step 5 in order to train such a model." << endl; exit(1); } else { cerr << "Illegal reordering type used: " << s << endl; exit(1); } } ORIENTATION ModelScoreLR::getType(const StringPiece& s) { if (s.compare("mono") == 0 || s.compare("dright") == 0) { return DRIGHT; } else if (s.compare("swap") == 0 || s.compare("dleft") == 0) { return DLEFT; } else if (s.compare("other") == 0 || s.compare("nomono") == 0) { cerr << "Illegal reordering type used: " << s << " for model type LeftRight. You have to re-run step 5 in order to train such a model." << endl; exit(1); } else { cerr << "Illegal reordering type used: " << s << endl; exit(1); } } ORIENTATION ModelScoreMSD::getType(const StringPiece& s) { if (s.compare("mono") == 0) { return MONO; } else if (s.compare("swap") == 0) { return SWAP; } else if (s.compare("dleft") == 0 || s.compare("dright") == 0 || s.compare("other") == 0) { return OTHER; } else if (s.compare("nomono") == 0) { cerr << "Illegal reordering type used: " << s << " for model type msd. You have to re-run step 5 in order to train such a model." << endl; exit(1); } else { cerr << "Illegal reordering type used: " << s << endl; exit(1); } } ORIENTATION ModelScoreMonotonicity::getType(const StringPiece& s) { if (s.compare("mono") == 0) { return MONO; } else if (s.compare("swap") == 0 || s.compare("dleft") == 0 || s.compare("dright") == 0 || s.compare("other") == 0 || s.compare("nomono") == 0 ) { return NOMONO; } else { cerr << "Illegal reordering type used: " << s << endl; exit(1); } } void ScorerMSLR::score(const vector& all_scores, vector& scores) const { scores.push_back(all_scores[MONO]); scores.push_back(all_scores[SWAP]); scores.push_back(all_scores[DLEFT]); scores.push_back(all_scores[DRIGHT]); } void ScorerMSD::score(const vector& all_scores, vector& scores) const { scores.push_back(all_scores[MONO]); scores.push_back(all_scores[SWAP]); scores.push_back(all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]); } void ScorerMonotonicity::score(const vector& all_scores, vector& scores) const { scores.push_back(all_scores[MONO]); scores.push_back(all_scores[SWAP]+all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]+all_scores[NOMONO]); } void ScorerLR::score(const vector& all_scores, vector& scores) const { scores.push_back(all_scores[MONO]+all_scores[DRIGHT]); scores.push_back(all_scores[SWAP]+all_scores[DLEFT]); } void ScorerMSLR::createSmoothing(const vector& scores, double weight, vector& smoothing) const { double total = accumulate(scores.begin(), scores.end(), 0); smoothing.push_back(weight*(scores[MONO]+0.1)/total); smoothing.push_back(weight*(scores[SWAP]+0.1)/total); smoothing.push_back(weight*(scores[DLEFT]+0.1)/total); smoothing.push_back(weight*(scores[DRIGHT]+0.1)/total); } void ScorerMSLR::createConstSmoothing(double weight, vector& smoothing) const { for (int i=1; i<=4; ++i) { smoothing.push_back(weight); } } void ScorerMSD::createSmoothing(const vector& scores, double weight, vector& smoothing) const { double total = accumulate(scores.begin(), scores.end(), 0); smoothing.push_back(weight*(scores[MONO]+0.1)/total); smoothing.push_back(weight*(scores[SWAP]+0.1)/total); smoothing.push_back(weight*(scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+0.1)/total); } void ScorerMSD::createConstSmoothing(double weight, vector& smoothing) const { for (int i=1; i<=3; ++i) { smoothing.push_back(weight); } } void ScorerMonotonicity::createSmoothing(const vector& scores, double weight, vector& smoothing) const { double total = accumulate(scores.begin(), scores.end(), 0); smoothing.push_back(weight*(scores[MONO]+0.1)/total); smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+scores[NOMONO]+0.1)/total); } void ScorerMonotonicity::createConstSmoothing(double weight, vector& smoothing) const { for (double i=1; i<=2; ++i) { smoothing.push_back(weight); } } void ScorerLR::createSmoothing(const vector& scores, double weight, vector& smoothing) const { double total = accumulate(scores.begin(), scores.end(), 0); smoothing.push_back(weight*(scores[MONO]+scores[DRIGHT]+0.1)/total); smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT])/total); } void ScorerLR::createConstSmoothing(double weight, vector& smoothing) const { for (int i=1; i<=2; ++i) { smoothing.push_back(weight); } } void Model::score_fe(const string& f, const string& e) { if (!fe) //Make sure we do not do anything if it is not a fe model return; fprintf(file,"%s ||| %s ||| ",f.c_str(),e.c_str()); //condition on the previous phrase if (previous) { vector scores; scorer->score(modelscore->get_scores_fe_prev(), scores); double sum = 0; for(size_t i=0; i scores; scorer->score(modelscore->get_scores_fe_next(), scores); double sum = 0; for(size_t i=0; i scores; scorer->score(modelscore->get_scores_f_prev(), scores); double sum = 0; for(size_t i=0; i scores; scorer->score(modelscore->get_scores_f_next(), scores); double sum = 0; for(size_t i=0; i 0) { gzwrite(gzfile, inbuffer, num_read); } fclose(file); gzclose(gzfile); //Remove the unzipped file remove(filename.c_str()); } void Model::split_config(const string& config, string& dir, string& lang, string& orient) { istringstream is(config); string type; getline(is, type, '-'); getline(is, orient, '-'); getline(is, dir, '-'); getline(is, lang, '-'); } Model* Model::createModel(ModelScore* modelscore, const string& config, const string& filepath) { string dir, lang, orient, filename; split_config(config,dir,lang,orient); filename = filepath + config; if (orient.compare("mslr") == 0) { return new Model(modelscore, new ScorerMSLR(), dir, lang, filename); } else if (orient.compare("msd") == 0) { return new Model(modelscore, new ScorerMSD(), dir, lang, filename); } else if (orient.compare("monotonicity") == 0) { return new Model(modelscore, new ScorerMonotonicity(), dir, lang, filename); } else if (orient.compare("leftright") == 0) { return new Model(modelscore, new ScorerLR(), dir, lang, filename); } else { cerr << "Illegal orientation type of reordering model: " << orient << "\n allowed types: mslr, msd, monotonicity, leftright\n"; exit(1); } } void Model::createSmoothing(double w) { scorer->createSmoothing(modelscore->get_scores_fe_prev(), w, smoothing_prev); scorer->createSmoothing(modelscore->get_scores_fe_next(), w, smoothing_next); } void Model::createConstSmoothing(double w) { scorer->createConstSmoothing(w, smoothing_prev); scorer->createConstSmoothing(w, smoothing_next); }