Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/training/eppex/shared.cpp')
-rw-r--r--scripts/training/eppex/shared.cpp194
1 files changed, 0 insertions, 194 deletions
diff --git a/scripts/training/eppex/shared.cpp b/scripts/training/eppex/shared.cpp
deleted file mode 100644
index 670df1c0f..000000000
--- a/scripts/training/eppex/shared.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-/**
- * Implementation of functionality shared between counter, eppex and
- * (not yet finished) memscoring eppex.
- *
- * (C) Moses: http://www.statmt.org/moses/
- * (C) Ceslav Przywara, UFAL MFF UK, 2011
- *
- * $Id$
- */
-
-#include <string.h>
-#include <boost/tokenizer.hpp>
-#include <iostream>
-
-#include "typedefs.h"
-#include "phrase-extract.h"
-#include "shared.h"
-
-
-std::string get_lossy_counting_params_format(void) {
- return "\n"
- "You may specify separate Lossy Counter (LC) for each phrase length or\n"
- "use shared LC for all phrase pairs with length from given inclusive interval.\n"
- "Every LC is defined by parameter in form phrase-length:error:support, where:\n"
- " phrase-length ... a single number (eg. 2) or interval (eg. 2-4)\n"
- " error ... error parameter for lossy counting\n"
- " support ... support parameter for lossy counting\n"
- "\n"
- "Example of LC params: 1:0:0 2-4:1e-7:4e-7 5-7:2e-8:8e-8\n"
- " - phrase pairs of length 1 will NOT be pruned\n"
- " - phrase pairs of length from 2 to 4 (inclusive) will be pruned altogether by LC\n"
- " with parameters support=4e-7 and error=1e-7\n"
- " - phrase pairs of length from 5 to 7 (inclusive) will be pruned altogether by LC\n"
- " with parameters support=8e-8 and error=2e-8\n"
- " - max phrase length extracted will be set to 7\n"
- "\n"
- "Note: there has to be Lossy Counter defined for every phrase pair length\n"
- "up to the maximum phrase length! Following will not work: 1:0:0 5-7:2e-8:8e-8\n"
- "\n"
- "To count phrase pairs by their length a separate program (counter) may be used.\n"
- "\n"
- ;
-}
-
-bool parse_lossy_counting_params(const std::string& param) {
-
- // See: http://www.boost.org/doc/libs/1_42_0/libs/tokenizer/char_separator.htm
- boost::char_separator<char> separators(",:");
- boost::tokenizer<boost::char_separator<char> > tokens(param, separators);
- boost::tokenizer<boost::char_separator<char> >::iterator iter = tokens.begin();
-
- std::string interval = *iter;
-
- if ( ++iter == tokens.end() ) {
- std::cerr << "ERROR: Failed to proccess Lossy Counting param \"" << param << "\": invalid format, missing error and support parameters specification!" << std::endl;
- return false;
- }
- PhrasePairsLossyCounter::error_t error = atof((*iter).c_str());
-
- if ( ++iter == tokens.end() ) {
- std::cerr << "ERROR: Failed to proccess Lossy Counting param \"" << param << "\": invalid format, missing support parameter specification!" << std::endl;
- return false;
- }
- PhrasePairsLossyCounter::support_t support = atof((*iter).c_str());
-
- if ( (error > 0) && !(error < support) ) {
- std::cerr << "ERROR: Failed to proccess Lossy Counting param \"" << param << "\": support parameter (" << support << ") is not greater than error (" << error << ")!" << std::endl;
- return false;
- }
-
- // Split interval.
- boost::char_separator<char> separator("-");
- boost::tokenizer<boost::char_separator<char> > intervalTokens(interval, separator);
- iter = intervalTokens.begin();
-
- int from = 0, to = 0;
-
- from = atoi((*iter).c_str());
- if ( ++iter == intervalTokens.end() )
- to = from;
- else
- to = atoi((*iter).c_str());
-
- if ( ! (from <= to) ) {
- std::cerr << "ERROR: Failed to proccess Lossy Counting param \"" << param << "\": invalid interval " << from << "-" << to << " specified!" << std::endl;
- return false;
- }
-
- LossyCounterInstance* lci = new LossyCounterInstance(error, support);
-
- if ( lossyCounters.size() <= to ) {
- lossyCounters.resize(to + 1, NULL);
- }
-
- for ( size_t i = from; i <= to; ++i ) {
- if ( lossyCounters[i] != NULL ) {
- std::cerr << "ERROR: Failed to proccess Lossy Counting param \"" << param << "\": Lossy Counter for phrases of length " << i << " is already defined!" << std::endl;
- return false;
- }
- lossyCounters[i] = lci;
- }
-
- // Set maximum phrase length accordingly:
- if ( maxPhraseLength < to )
- maxPhraseLength = to;
-
- return true;
-}
-
-void read_optional_params(int argc, char* argv[], int optionalParamsStart) {
-
- for ( int i = optionalParamsStart; i < argc; i++ ) {
- if (strcmp(argv[i],"--OnlyOutputSpanInfo") == 0) {
- std::cerr << "Error: option --OnlyOutputSpanInfo is not supported!\n";
- exit(2);
- } else if (strcmp(argv[i],"orientation") == 0 || strcmp(argv[i],"--Orientation") == 0) {
- orientationFlag = true;
- } else if (strcmp(argv[i],"--NoTTable") == 0) {
- translationFlag = false;
- } else if(strcmp(argv[i],"--model") == 0) {
- if (i+1 >= argc) {
- std::cerr << "extract: syntax error, no model's information provided to the option --model " << std::endl;
- exit(1);
- }
- char* modelParams = argv[++i];
- const char* modelName = strtok(modelParams, "-");
- const char* modelType = strtok(NULL, "-");
-
- if(strcmp(modelName, "wbe") == 0) {
- wordModel = true;
- if(strcmp(modelType, "msd") == 0) {
- wordType = REO_MSD;
- }
- else if(strcmp(modelType, "mslr") == 0) {
- wordType = REO_MSLR;
- }
- else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0) {
- wordType = REO_MONO;
- }
- else {
- std::cerr << "extract: syntax error, unknown reordering model type: " << modelType << std::endl;
- exit(1);
- }
- } else if(strcmp(modelName, "phrase") == 0) {
- phraseModel = true;
- if(strcmp(modelType, "msd") == 0) {
- phraseType = REO_MSD;
- }
- else if(strcmp(modelType, "mslr") == 0) {
- phraseType = REO_MSLR;
- }
- else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0) {
- phraseType = REO_MONO;
- }
- else {
- std::cerr << "extract: syntax error, unknown reordering model type: " << modelType << std::endl;
- exit(1);
- }
- } else if(strcmp(modelName, "hier") == 0) {
- hierModel = true;
- if(strcmp(modelType, "msd") == 0) {
- hierType = REO_MSD;
- }
- else if(strcmp(modelType, "mslr") == 0) {
- hierType = REO_MSLR;
- }
- else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0) {
- hierType = REO_MONO;
- }
- else {
- std::cerr << "extract: syntax error, unknown reordering model type: " << modelType << std::endl;
- exit(1);
- }
- } else {
- std::cerr << "extract: syntax error, unknown reordering model: " << modelName << std::endl;
- exit(1);
- }
-
- allModelsOutputFlag = true;
-
- } else {
- std::cerr << "extract: syntax error, unknown option '" << std::string(argv[i]) << "'\n";
- exit(1);
- }
- }
-
- // default reordering model if no model selected
- // allows for the old syntax to be used
- if(orientationFlag && !allModelsOutputFlag) {
- wordModel = true;
- wordType = REO_MSD;
- }
-
-} // end of read_optional_params()