Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/training/eppex/eppex.cpp')
-rw-r--r--scripts/training/eppex/eppex.cpp217
1 files changed, 0 insertions, 217 deletions
diff --git a/scripts/training/eppex/eppex.cpp b/scripts/training/eppex/eppex.cpp
deleted file mode 100644
index d382890d2..000000000
--- a/scripts/training/eppex/eppex.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-/**
- * Epochal Phrase Extraction.
- *
- * (C) Moses: http://www.statmt.org/moses/
- * (C) Ceslav Przywara, UFAL MFF UK, 2011
- *
- * $Id$
- */
-
-
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <string.h>
-
-#include "config.h"
-#include "phrase-extract.h"
-#include "shared.h"
-
-
-#define REQUIRED_PARAMS_NUM 5
-
-
-//// Output processor declaration.
-
-class FlushingOutputProcessor: public OutputProcessor {
-
-private:
- const bool _compactOutputFlag;
-
-public:
- FlushingOutputProcessor(bool compactOutputFlag): _compactOutputFlag(compactOutputFlag) {}
-
- void operator() (const std::string& srcPhrase, const std::string& tgtPhrase, const std::string& orientationInfo, const alignment_t& alignment, const size_t frequency, int mode);
-};
-
-
-//// Global variables.
-
-// output files
-std::ofstream extractFile; // extract
-std::ofstream extractFileInv; // extract.inv
-std::ofstream extractFileOrientation; // extract.o
-
-////
-bool compactOutputFlag = false; // Generate compact output:
-// Each phrase pair is printed only once with its frequency prepended.
-// Note that compacted output is not compatible with std phrase-extract format.
-
-
-//// Functions.
-
-void program_info(void) {
- std::cerr
- << "Epochal Phrase Extraction (" << PACKAGE_STRING << ") written by Ceslav Przywara (based on PhraseExtract v1.4 by Philipp Koehn).\n"
- << "Compiled with "
-#ifdef USE_UNORDERED_MAP
- << "std::tr1::unordered_map"
-#else
- << "std::map"
-#endif
- << " implementation.\n"
- ;
-}
-
-void read_optional_params(int argc, char* argv[], int optionalParamsStart);
-
-void usage(const char* programName) {
- std::cerr << std::endl << "Syntax: " << std::string(programName) << " tgt src align extract lossy-counter [lossy-counter-2 [...]] [--compact] [--sort] [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ]]" << std::endl;
- std::cerr << get_lossy_counting_params_format();
- exit(1);
-}
-
-
-/*******************************************************************************
- * MAIN *
- ******************************************************************************/
-int main(int argc, char* argv[]) {
-
- // Welcome user with program info!
- program_info();
-
- if (argc <= REQUIRED_PARAMS_NUM) {
- usage(argv[0]);
- }
-
- const char* fileNameE = argv[1];
- const char* fileNameF = argv[2];
- const char* fileNameA = argv[3];
- std::string fileNameExtract = std::string(argv[4]);
-
- // Init lossy counters.
- std::string lossyCountersParams;
- int paramIdx = 5;
-
- while ( (argc > paramIdx) && (*argv[paramIdx] != '-') ) {
- std::string param = std::string(argv[paramIdx]);
- if ( !parse_lossy_counting_params(param) ) {
- usage(argv[0]);
- }
- lossyCountersParams += (" " + param);
- ++paramIdx;
- }
-
- if ( paramIdx == REQUIRED_PARAMS_NUM ) {
- std::cerr << "ERROR: no Lossy Counting parameters specified!" << std::endl;
- usage(argv[0]);
- }
-
- for ( size_t i = 1; i < lossyCounters.size(); ++i ) {
- if ( lossyCounters[i] == NULL ) {
- std::cerr << "ERROR: max phrase length set to " << maxPhraseLength << ", but no Lossy Counting parameters specified for phrase pairs of length " << i << "!" << std::endl;
- usage(argv[0]);
- }
- }
-
- if ( (argc > paramIdx) && (strcmp(argv[paramIdx], "--compact") == 0) ) {
- compactOutputFlag = true;
- ++paramIdx;
- }
-
- if ( (argc > paramIdx) && (strcmp(argv[paramIdx], "--sort") == 0) ) {
- sortedOutput = true;
- ++paramIdx;
- }
-
- //
- read_optional_params(argc, argv, paramIdx);
-
- std::cerr << "Starting epochal phrase table extraction with params:" << lossyCountersParams << std::endl;
- std::cerr << "Output will be " << (sortedOutput ? "sorted" : "unsorted") << "." << std::endl;
-
- // open input files
- std::ifstream eFile(fileNameE);
- std::ifstream fFile(fileNameF);
- std::ifstream aFile(fileNameA);
-
- // open output files
- if (translationFlag) {
- if (sortedOutput) {
- extractFile.open((fileNameExtract + ".sorted").c_str());
- extractFileInv.open((fileNameExtract + ".inv.sorted").c_str());
- }
- else {
- extractFile.open(fileNameExtract.c_str());
- extractFileInv.open((fileNameExtract + ".inv").c_str());
- }
- }
- if (orientationFlag) {
- extractFileOrientation.open((fileNameExtract + ".o").c_str());
- }
-
- //
- readInput(eFile, fFile, aFile);
-
- std::cerr << std::endl; // Leave the progress bar end on previous line.
-
- // close input files
- eFile.close();
- fFile.close();
- aFile.close();
-
- FlushingOutputProcessor processor(compactOutputFlag);
- processOutput(processor);
-
- // close output files
- if (translationFlag) {
- extractFile.close();
- extractFileInv.close();
- }
- if (orientationFlag) {
- extractFileOrientation.close();
- }
-
- printStats();
-
-} // end of main()
-
-
-void FlushingOutputProcessor::operator()(const std::string& srcPhrase, const std::string& tgtPhrase, const std::string& orientationInfo, const alignment_t& alignment, const size_t frequency, int mode) {
-
- size_t m = frequency;
-
- if ( _compactOutputFlag ) {
- // Prepend frequency.
- if (translationFlag && (mode >= 0)) extractFile << frequency << " ||| ";
- if (translationFlag && (mode <= 0)) extractFileInv << frequency << " ||| ";
- if (orientationFlag && (mode >= 0)) extractFileOrientation << frequency << " ||| ";
- m = 1; // Loop only once!
- }
-
- for ( size_t i = 0; i < m; ++i ) {
-
- // alignment
- if (translationFlag) {
-
- if (mode >= 0) extractFile << srcPhrase << " ||| " << tgtPhrase << " |||";
- if (mode <= 0) extractFileInv << tgtPhrase << " ||| " << srcPhrase << " |||";
-
- for ( alignment_t::const_iterator alignIter = alignment.begin(); alignIter != alignment.end(); ++alignIter ) {
- // Note that unsigned char isn't treated as numeric value by stream operators,
- // so casting is necessary.
- if (mode >= 0) extractFile << " " << (int) alignIter->first << "-" << (int) alignIter->second;
- if (mode <= 0) extractFileInv << " " << (int) alignIter->second << "-" << (int) alignIter->first;
- }
-
- if (mode >= 0) extractFile << "\n";
- if (mode <= 0) extractFileInv << "\n";
- }
-
- if (orientationFlag && (mode >= 0)) {
- extractFileOrientation << srcPhrase << " ||| " << tgtPhrase << " ||| " << orientationInfo << "\n";
- }
-
- }
-
-}