Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicola Bertoldi <bertoldi@fbk.eu>2014-12-13 14:52:47 +0300
committerNicola Bertoldi <bertoldi@fbk.eu>2014-12-13 14:52:47 +0300
commite4eb201c52be74fee74399a6f35fcbe8eb85d834 (patch)
tree7792ef96d63262f6e28f1857741e1162c7dccbc4 /moses/Parameter.cpp
parentcea2d9d8bb34a81660974cae20d66aefec4e0468 (diff)
parenta0b6b6a341e74b47bbef4652ad7fd928cf91e17c (diff)
merged master into dynamic-models and solved conflicts
Diffstat (limited to 'moses/Parameter.cpp')
-rw-r--r--moses/Parameter.cpp210
1 files changed, 129 insertions, 81 deletions
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index f9a17fd74..36b9a955d 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -36,6 +36,7 @@ using namespace std;
namespace Moses
{
+
/** define allowed parameters */
Parameter::Parameter()
{
@@ -49,13 +50,14 @@ Parameter::Parameter()
AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");
AddParam("input-factors", "list of factors in the input");
AddParam("input-file", "i", "location of the input file to be translated");
- AddParam("inputtype", "text (0), confusion network (1), word lattice (2) (default = 0)");
+ AddParam("inputtype", "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)");
AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
AddParam("mark-unknown", "mu", "mark unknown words in output");
AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
AddParam("max-phrase-length", "maximum phrase length (default 20)");
AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
+ AddParam("n-best-trees", "Write n-best target-side trees to n-best-list");
AddParam("lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
AddParam("print-all-derivations", "to print all derivations in search graph");
@@ -102,7 +104,7 @@ Parameter::Parameter()
AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
AddParam("unpruned-search-graph", "usg", "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
- AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF) - the flag should be followed byy a directory name, which must exist");
+ AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF) - the flag should be followed by a directory name, which must exist");
AddParam("output-search-graph-hypergraph", "Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder). This flag is followed by 3 values: 'true (gz|txt|bz) directory-name'");
AddParam("include-lhs-in-search-graph", "lhssg", "When outputting chart search graph, include the label of the LHS of the rule (useful when using syntax)");
#ifdef HAVE_PROTOBUF
@@ -207,15 +209,31 @@ Parameter::Parameter()
AddParam("placeholder-factor", "Which source factor to use to store the original text for placeholders. The factor must not be used by a translation or gen model");
AddParam("no-cache", "Disable all phrase-table caching. Default = false (ie. enable caching)");
+ AddParam("default-non-term-for-empty-range-only", "Don't add [X] to all ranges, just ranges where there isn't a source non-term. Default = false (ie. add [X] everywhere)");
+ AddParam("s2t", "Use specialized string-to-tree decoder.");
+ AddParam("s2t-parsing-algorithm", "Which S2T parsing algorithm to use. 0=recursive CYK+, 1=scope-3 (default = 0)");
- AddParam("adjacent-only", "Only allow hypotheses which are adjacent to current derivation. ITG without block moves");
-
+ AddParam("spe-src", "Simulated post-editing. Source filename");
+ AddParam("spe-trg", "Simulated post-editing. Target filename");
+ AddParam("spe-aln", "Simulated post-editing. Alignment filename");
}
Parameter::~Parameter()
{
}
+const PARAM_VEC *Parameter::GetParam(const std::string &paramName) const
+{
+ PARAM_MAP::const_iterator iter = m_setting.find( paramName );
+ if (iter == m_setting.end()) {
+ return NULL;
+ }
+ else {
+ return &iter->second;
+ }
+
+}
+
/** initialize a parameter, sub of constructor */
void Parameter::AddParam(const string &paramName, const string &description)
{
@@ -324,28 +342,29 @@ bool Parameter::LoadParam(int argc, char* argv[])
}
// don't mix old and new format
- if ((isParamSpecified("feature") || isParamSpecified("weight"))
- && (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
- isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
- isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
- isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
- isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
- isParamSpecified("weight-p") ||
- isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
- isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
- isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
- isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
- isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
- isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
- isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
+ if ((GetParam("feature") || GetParam("weight"))
+ && (GetParam("weight-slm") || GetParam("weight-bl") || GetParam("weight-d") ||
+ GetParam("weight-dlm") || GetParam("weight-lrl") || GetParam("weight-generation") ||
+ GetParam("weight-i") || GetParam("weight-l") || GetParam("weight-lex") ||
+ GetParam("weight-glm") || GetParam("weight-wt") || GetParam("weight-pp") ||
+ GetParam("weight-pb") || GetParam("weight-t") || GetParam("weight-w") ||
+ GetParam("weight-p") ||
+ GetParam("weight-u") || GetParam("weight-e") ||
+ GetParam("dlm-mode") || GetParam("generation-file") || GetParam("global-lexical-file") ||
+ GetParam("glm-feature") || GetParam("lmodel-file") || GetParam("lmodel-dub") ||
+ GetParam("slmodel-file") || GetParam("slmodel-factor") ||
+ GetParam("slmodel-beam") || GetParam("ttable-file") || GetParam("phrase-pair-feature") ||
+ GetParam("phrase-boundary-source-feature") || GetParam("phrase-boundary-target-feature") || GetParam("phrase-length-feature") ||
+ GetParam("target-word-insertion-feature") || GetParam("source-word-deletion-feature") || GetParam("word-translation-feature")
)
) {
UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
}
// convert old weights args to new format
- if (!isParamSpecified("feature"))
+ if (GetParam("feature") == NULL) {
ConvertWeightArgs();
+ }
CreateWeightsMap();
WeightOverwrite();
@@ -370,19 +389,16 @@ bool Parameter::LoadParam(int argc, char* argv[])
void Parameter::AddFeaturesCmd()
{
- if (!isParamSpecified("feature-add")) {
- return;
+ const PARAM_VEC *params = GetParam("feature-add");
+ if (params) {
+ PARAM_VEC::const_iterator iter;
+ for (iter = params->begin(); iter != params->end(); ++iter) {
+ const string &line = *iter;
+ AddFeature(line);
+ }
+
+ m_setting.erase("feature-add");
}
-
- const PARAM_VEC &params = GetParam("feature-add");
-
- PARAM_VEC::const_iterator iter;
- for (iter = params.begin(); iter != params.end(); ++iter) {
- const string &line = *iter;
- AddFeature(line);
- }
-
- m_setting.erase("feature-add");
}
std::vector<float> Parameter::GetWeights(const std::string &name)
@@ -456,9 +472,12 @@ void Parameter::ConvertWeightArgsSingleWeight(const string &oldWeightName, const
void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
{
+ const PARAM_VEC *params;
+
// process input weights 1st
- if (isParamSpecified("weight-i")) {
- vector<float> inputWeights = Scan<float>(m_setting["weight-i"]);
+ params = GetParam("weight-i");
+ if (params) {
+ vector<float> inputWeights = Scan<float>(*params);
PARAM_VEC &numInputScores = m_setting["input-scores"];
if (inputWeights.size() == 1) {
UTIL_THROW_IF2(numInputScores.size() != 0, "No [input-scores] section allowed");
@@ -480,19 +499,26 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
size_t numRealWordsInInput = 0;
map<string, size_t> ptIndices;
- if (GetParam("input-scores").size()) {
- numInputScores = Scan<size_t>(GetParam("input-scores")[0]);
- }
+ params = GetParam("input-scores");
+ if (params) {
+ numInputScores = Scan<size_t>(params->at(0));
- if (GetParam("input-scores").size() > 1) {
- numRealWordsInInput = Scan<size_t>(GetParam("input-scores")[1]);
+ if (params->size() > 1) {
+ numRealWordsInInput = Scan<size_t>(params->at(1));
+ }
}
// load phrase translation tables
- if (GetParam("ttable-file").size() > 0) {
+ params = GetParam("ttable-file");
+ if (params) {
// weights
- const vector<string> &translationVector = GetParam("ttable-file");
- vector<size_t> maxTargetPhrase = Scan<size_t>(GetParam("ttable-limit"));
+ const vector<string> translationVector = *params;
+
+ vector<size_t> maxTargetPhrase;
+ params = GetParam("ttable-limit");
+ if (params) {
+ maxTargetPhrase = Scan<size_t>(*params);
+ }
if(maxTargetPhrase.size() == 1 && translationVector.size() > 1) {
VERBOSE(1, "Using uniform ttable-limit of " << maxTargetPhrase[0] << " for all translation tables." << endl);
@@ -520,32 +546,32 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
}
UTIL_THROW_IF2(token.size() < 5, "Phrase table must have at least 5 scores");
- PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(token[0]);
+ int implementation = Scan<int>(token[0]);
string ptType;
switch (implementation) {
- case Memory:
+ case 0: // Memory
ptType = "PhraseDictionaryMemory";
break;
- case Binary:
+ case 1: // Binary
ptType = "PhraseDictionaryBinary";
break;
- case OnDisk:
+ case 2: // OnDisk
ptType = "PhraseDictionaryOnDisk";
break;
- case SCFG:
+ case 6: // SCFG
ptType = "PhraseDictionaryMemory";
break;
- case Compact:
+ case 12: // Compact
ptType = "PhraseDictionaryCompact";
break;
- case SuffixArray:
+ case 8: // SuffixArray
ptType = "PhraseDictionarySuffixArray";
break;
- case DSuffixArray:
+ case 14: // DSuffixArray
ptType = "PhraseDictionaryDynSuffixArray";
break;
- case DCacheBased:
+ case 15: // DCacheBased:
ptType = "PhraseDictionaryDynamicCacheBased";
break;
default:
@@ -601,7 +627,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
ptLine << "num-features=" << numScoreComponent << " ";
ptLine << "table-limit=" << maxTargetPhrase[currDict] << " ";
- if (implementation == SuffixArray || implementation == DSuffixArray) {
+ if (implementation == 8 || implementation == 14) {
ptLine << "target-path=" << token[5] << " ";
ptLine << "alignment-path=" << token[6] << " ";
}
@@ -629,37 +655,36 @@ void Parameter::ConvertWeightArgsDistortion()
const string oldLexReordingName = "distortion-file";
// distortion / lex distortion
- const PARAM_VEC &oldWeights = GetParam(oldWeightName);
+ const PARAM_VEC *oldWeights = GetParam(oldWeightName);
- if (oldWeights.size() > 0) {
- if (!isParamSpecified("search-algorithm") ||
- (GetParam("search-algorithm").size() > 0
- && (Trim(GetParam("search-algorithm")[0]) == "0"
- ||Trim(GetParam("search-algorithm")[0]) == "1"
- )
+ if (oldWeights) {
+ const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
+ if (searchAlgo == NULL ||
+ (searchAlgo->size() > 0
+ && (Trim(searchAlgo->at(0)) == "0" || Trim(searchAlgo->at(0)) == "1")
)
) {
// phrase-based. Add distance distortion to list of features
AddFeature("Distortion");
- SetWeight("Distortion", 0, Scan<float>(oldWeights[0]));
+ SetWeight("Distortion", 0, Scan<float>(oldWeights->at(0)));
}
// everything but the last is lex reordering model
size_t currOldInd = 1;
- const PARAM_VEC &lextable = GetParam(oldLexReordingName);
+ const PARAM_VEC *lextable = GetParam(oldLexReordingName);
- for (size_t indTable = 0; indTable < lextable.size(); ++indTable) {
- const string &line = lextable[indTable];
+ for (size_t indTable = 0; lextable && indTable < lextable->size(); ++indTable) {
+ const string &line = lextable->at(indTable);
vector<string> toks = Tokenize(line);
size_t numFF = Scan<size_t>(toks[2]);
vector<float> weights(numFF);
for (size_t currFF = 0; currFF < numFF; ++currFF) {
- UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
- "Errors converting old distortion weights to new weights");
- float weight = Scan<float>(oldWeights[currOldInd]);
+ UTIL_THROW_IF2(oldWeights && currOldInd >= oldWeights->size(),
+ "Errors converting old distortion weights to new weights");
+ float weight = Scan<float>(oldWeights->at(currOldInd));
weights[currFF] = weight;
++currOldInd;
@@ -693,21 +718,23 @@ void Parameter::ConvertWeightArgsLM()
{
const string oldWeightName = "weight-l";
const string oldFeatureName = "lmodel-file";
+ const PARAM_VEC *params;
bool isChartDecoding = true;
- if (!isParamSpecified("search-algorithm") ||
- (GetParam("search-algorithm").size() > 0
- && (Trim(GetParam("search-algorithm")[0]) == "0"
- ||Trim(GetParam("search-algorithm")[0]) == "1"
- )
+
+ params = GetParam("search-algorithm");
+ if (params == NULL ||
+ (params->size() > 0
+ && (Trim(params->at(0)) == "0" || Trim(params->at(0)) == "1")
)
) {
isChartDecoding = false;
}
vector<int> oovWeights;
- if (isParamSpecified("lmodel-oov-feature")) {
- oovWeights = Scan<int>(m_setting["lmodel-oov-feature"]);
+ params = GetParam("lmodel-oov-feature");
+ if (params) {
+ oovWeights = Scan<int>(*params);
}
PARAM_MAP::iterator iterMap;
@@ -827,11 +854,10 @@ void Parameter::ConvertWeightArgsWordPenalty()
const std::string newWeightName = "WordPenalty";
bool isChartDecoding = true;
- if (!isParamSpecified("search-algorithm") ||
- (GetParam("search-algorithm").size() > 0
- && (Trim(GetParam("search-algorithm")[0]) == "0"
- ||Trim(GetParam("search-algorithm")[0]) == "1"
- )
+ const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
+ if (searchAlgo == NULL ||
+ (searchAlgo->size() > 0
+ && (Trim(searchAlgo->at(0)) == "0" || Trim(searchAlgo->at(0)) == "1")
)
) {
isChartDecoding = false;
@@ -858,10 +884,11 @@ void Parameter::ConvertWeightArgsWordPenalty()
void Parameter::ConvertPhrasePenalty()
{
string oldWeightName = "weight-p";
- if (isParamSpecified(oldWeightName)) {
- UTIL_THROW_IF2(m_setting[oldWeightName].size() != 1,
- "There should be only 1 phrase-penalty weight");
- float weight = Scan<float>(m_setting[oldWeightName][0]);
+ const PARAM_VEC *params = GetParam(oldWeightName);
+ if (params) {
+ UTIL_THROW_IF2(params->size() != 1,
+ "There should be only 1 phrase-penalty weight");
+ float weight = Scan<float>(params->at(0));
AddFeature("PhrasePenalty");
SetWeight("PhrasePenalty", 0, weight);
@@ -1371,6 +1398,27 @@ void Parameter::Save(const std::string path)
file.close();
}
+template<>
+void Parameter::SetParameter<bool>(bool &parameter, const std::string &parameterName, const bool &defaultValue) const
+{
+ const PARAM_VEC *params = GetParam(parameterName);
+
+ // default value if nothing is specified
+ parameter = defaultValue;
+ if (params == NULL) {
+ return;
+ }
+
+ // if parameter is just specified as, e.g. "-parameter" set it true
+ if (params->size() == 0) {
+ parameter = true;
+ }
+ // if paramter is specified "-parameter true" or "-parameter false"
+ else if (params->size() == 1) {
+ parameter = Scan<bool>( params->at(0));
+ }
}
+} // namespace
+