/** * Extract features and score statistics from nvest file, optionally merging with * those from the previous iteration. * Developed during the 2nd MT marathon. **/ #include #include #include #include #include "Data.h" #include "Scorer.h" #include "ScorerFactory.h" #include "Timer.h" #include "Util.h" using namespace std; void usage() { cerr<<"usage: extractor [options])"< 0 && referenceFile.length() == 0)) { throw runtime_error("Error: reference file is not specified; you can not score the nbest"); } vector nbestFiles; if (nbestFile.length() > 0) { Tokenize(nbestFile.c_str(), ',', &nbestFiles); } vector referenceFiles; if (referenceFile.length() > 0) { Tokenize(referenceFile.c_str(), ',', &referenceFiles); } vector prevScoreDataFiles; if (prevScoreDataFile.length() > 0) { Tokenize(prevScoreDataFile.c_str(), ',', &prevScoreDataFiles); } vector prevFeatureDataFiles; if (prevFeatureDataFile.length() > 0) { Tokenize(prevFeatureDataFile.c_str(), ',', &prevFeatureDataFiles); } if (prevScoreDataFiles.size() != prevFeatureDataFiles.size()) { throw runtime_error("Error: there is a different number of previous score and feature files"); } if (binmode) cerr << "Binary write mode is selected" << endl; else cerr << "Binary write mode is NOT selected" << endl; TRACE_ERR("Scorer type: " << scorerType << endl); // ScorerFactory sfactory; Scorer* scorer = ScorerFactory::getScorer(scorerType,scorerConfig); // load references if (referenceFiles.size() > 0) scorer->setReferenceFiles(referenceFiles); PrintUserTime("References loaded"); Data data(*scorer); // load old data for (size_t i=0; i < prevScoreDataFiles.size(); i++) { data.load(prevFeatureDataFiles.at(i), prevScoreDataFiles.at(i)); } PrintUserTime("Previous data loaded"); // computing score statistics of each nbest file for (size_t i=0; i < nbestFiles.size(); i++) { data.loadnbest(nbestFiles.at(i)); } PrintUserTime("Nbest entries loaded and scored"); if (binmode) cerr << "Binary write mode is selected" << endl; else cerr << "Binary write mode is NOT selected" << endl; data.save(featureDataFile, scoreDataFile, binmode); PrintUserTime("Stopping..."); // timer.stop("Stopping..."); delete scorer; return EXIT_SUCCESS; } catch (const exception& e) { cerr << "Exception: " << e.what() << endl; return EXIT_FAILURE; } }