Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--moses/FF/FeatureFunction.cpp31
-rw-r--r--moses/FF/FeatureFunction.h14
-rw-r--r--moses/Parameter.cpp22
-rw-r--r--moses/ScoreComponentCollection.cpp6
-rw-r--r--moses/Util.cpp6
-rwxr-xr-xscripts/training/mert-moses.pl151
6 files changed, 179 insertions, 51 deletions
diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp
index 71f4ff568..fa898857d 100644
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@@ -50,6 +50,7 @@ FeatureFunction(const std::string& line)
, m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(1)
{
+ m_numTuneableComponents = m_numScoreComponents;
Initialize(line);
}
@@ -61,6 +62,7 @@ FeatureFunction(size_t numScoreComponents,
, m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(numScoreComponents)
{
+ m_numTuneableComponents = m_numScoreComponents;
Initialize(line);
}
@@ -95,6 +97,7 @@ void FeatureFunction::ParseLine(const std::string &line)
if (args[0] == "num-features") {
m_numScoreComponents = Scan<size_t>(args[1]);
+ m_numTuneableComponents = m_numScoreComponents;
} else if (args[0] == "name") {
m_description = args[1];
} else {
@@ -120,13 +123,17 @@ void FeatureFunction::SetParameter(const std::string& key, const std::string& va
{
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
+ } else if (key == "tuneable-components") {
+ UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription()
+ << ": tuneable-components cannot be set if tuneable=false");
+ SetTuneableComponents(value);
} else if (key == "require-sorting-after-source-context") {
m_requireSortingAfterSourceContext = Scan<bool>(value);
} else if (key == "verbosity") {
m_verbosity = Scan<size_t>(value);
} else if (key == "filterable") { //ignore
} else {
- UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
+ UTIL_THROW2(GetScoreProducerDescription() << ": Unknown argument " << key << "=" << value);
}
}
@@ -142,7 +149,27 @@ void FeatureFunction::ReadParameters()
std::vector<float> FeatureFunction::DefaultWeights() const
{
- UTIL_THROW(util::Exception, "No default weights");
+ UTIL_THROW2(GetScoreProducerDescription() << ": No default weights");
+}
+
+void FeatureFunction::SetTuneableComponents(const std::string& value)
+{
+ std::vector<std::string> toks = Tokenize(value,",");
+ UTIL_THROW_IF2(toks.empty(), GetScoreProducerDescription()
+ << ": Empty tuneable-components");
+ UTIL_THROW_IF2(toks.size()!=m_numScoreComponents, GetScoreProducerDescription()
+ << ": tuneable-components value has to be a comma-separated list of "
+ << m_numScoreComponents << " boolean values");
+
+ m_tuneableComponents.resize(m_numScoreComponents);
+ m_numTuneableComponents = m_numScoreComponents;
+
+ for (size_t i = 0; i < toks.size(); ++i) {
+ m_tuneableComponents[i] = Scan<bool>(toks[i]);
+ if (!m_tuneableComponents[i]) {
+ --m_numTuneableComponents;
+ }
+ }
}
}
diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h
index 767270140..b59998d9d 100644
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@@ -39,6 +39,8 @@ protected:
bool m_requireSortingAfterSourceContext;
size_t m_verbosity;
size_t m_numScoreComponents;
+ std::vector<bool> m_tuneableComponents;
+ size_t m_numTuneableComponents;
//In case there's multiple producers with the same description
static std::multiset<std::string> description_counts;
@@ -90,6 +92,17 @@ public:
return m_tuneable;
}
+ virtual bool HasTuneableComponents() const {
+ return m_numTuneableComponents;
+ }
+
+ virtual bool IsTuneableComponent(size_t i) const {
+ if (m_numTuneableComponents == m_numScoreComponents) {
+ return true;
+ }
+ return m_tuneableComponents[i];
+ }
+
virtual bool RequireSortingAfterSourceContext() const {
return m_requireSortingAfterSourceContext;
}
@@ -151,6 +164,7 @@ public:
virtual void SetParameter(const std::string& key, const std::string& value);
virtual void ReadParameters();
+ virtual void SetTuneableComponents(const std::string& value);
};
}
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index c5677b73b..0289eebff 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -970,11 +970,13 @@ void Parameter::WeightOverwrite()
// should only be on 1 line
UTIL_THROW_IF2(vec.size() != 1,
- "Weight override should only be on 1 line");
+ "weight-overwrite should only be on 1 line");
string name("");
vector<float> weights;
vector<string> toks = Tokenize(vec[0]);
+ size_t cnt = 0;
+ const std::vector<float>* oldWeights = NULL;
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
@@ -988,10 +990,24 @@ void Parameter::WeightOverwrite()
}
name = tok.substr(0, tok.size() - 1);
+ std::map<std::string, std::vector<float> >::const_iterator found = m_weights.find(name);
+ if (found!=m_weights.end()) {
+ oldWeights = &(found->second);
+ } else {
+ oldWeights = NULL;
+ }
+ cnt = 0;
} else {
// a weight for curr ff
- float weight = Scan<float>(toks[i]);
- weights.push_back(weight);
+ if (toks[i] == "x") {
+ UTIL_THROW_IF2(!oldWeights || cnt>=oldWeights->size(),
+ "Keeping previous weight failed in weight-overwrite");
+ weights.push_back(oldWeights->at(cnt));
+ } else {
+ float weight = Scan<float>(toks[i]);
+ weights.push_back(weight);
+ }
+ ++cnt;
}
}
diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp
index a1c864692..e656743ec 100644
--- a/moses/ScoreComponentCollection.cpp
+++ b/moses/ScoreComponentCollection.cpp
@@ -332,14 +332,16 @@ void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
bool labeledOutput = staticData.IsLabeledNBestList();
// regular features (not sparse)
- if (ff->GetNumScoreComponents() != 0) {
+ if (ff->HasTuneableComponents()) {
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
lastName = ff->GetScoreProducerDescription();
out << " " << lastName << "=";
}
vector<float> scores = GetScoresForProducer( ff );
for (size_t j = 0; j<scores.size(); ++j) {
- out << " " << scores[j];
+ if (ff->IsTuneableComponent(j)) {
+ out << " " << scores[j];
+ }
}
}
diff --git a/moses/Util.cpp b/moses/Util.cpp
index 5b6f16e2b..1d1df7d58 100644
--- a/moses/Util.cpp
+++ b/moses/Util.cpp
@@ -348,7 +348,11 @@ void PrintFeatureWeight(const FeatureFunction* ff)
size_t numScoreComps = ff->GetNumScoreComponents();
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
for (size_t i = 0; i < numScoreComps; ++i) {
- cout << " " << values[i];
+ if (ff->IsTuneableComponent(i)) {
+ cout << " " << values[i];
+ } else {
+ cout << " UNTUNEABLECOMPONENT";
+ }
}
cout << endl;
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index 25d12a8ab..5a2160a67 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -576,8 +576,9 @@ if ($___FILTER_PHRASE_TABLE) {
my $featlist = get_featlist_from_moses($___CONFIG);
$featlist = insert_ranges_to_featlist($featlist, $___RANGES);
-# Mark which features are disabled:
+# Mark which features are disabled
if (defined $___ACTIVATE_FEATURES) {
+ $featlist->{"enabled"} = undef;
my %enabled = map { ($_, 1) } split /[, ]+/, $___ACTIVATE_FEATURES;
my %cnt;
for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
@@ -1145,6 +1146,8 @@ if($___RETURN_BEST_DEV) {
}
my $best_featlist = get_featlist_from_file("run$bestit.dense");
$best_featlist->{"untuneables"} = $featlist->{"untuneables"};
+ $best_featlist->{"allcomponentsuntuneable"} = $featlist->{"allcomponentsuntuneable"};
+ $best_featlist->{"skippeduntuneablecomponents"} = $featlist->{"skippeduntuneablecomponents"};
create_config($___CONFIG_ORIG, "./moses.ini", $best_featlist,
$bestit, $bestbleu, $best_sparse_file);
}
@@ -1235,10 +1238,26 @@ sub run_decoder {
}
# moses now does not seem accept "-tm X -tm Y" but needs "-tm X Y"
my %model_weights;
+ my $valcnt = 0;
+ my $offset = 0;
for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
my $name = $featlist->{"names"}->[$i];
- $model_weights{$name} = "$name=" if !defined $model_weights{$name};
+ if (!defined $model_weights{$name}) {
+ $model_weights{$name} = "$name=";
+ $valcnt = 0;
+ while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
+ #$model_weights{$name} .= sprintf " %.6f", $oldvalues{$name}{$valcnt+$offset};
+ $model_weights{$name} .= sprintf " x";
+ $offset++;
+ }
+ }
$model_weights{$name} .= sprintf " %.6f", $vals[$i];
+ $valcnt++;
+ while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
+ #$model_weights{$name} .= sprintf " %.6f", $oldvalues{$name}{$valcnt+$offset};
+ $model_weights{$name} .= sprintf " x";
+ $offset++;
+ }
}
my $decoder_config = "";
$decoder_config = "-weight-overwrite '" . join(" ", values %model_weights) ."'" unless $___USE_CONFIG_WEIGHTS_FIRST && $run==1;
@@ -1362,8 +1381,11 @@ sub get_featlist_from_file {
my @names = ();
my @startvalues = ();
my @untuneables = ();
+ my @allcomponentsuntuneable = ();
+ my %skippeduntuneablecomponents = ();
open my $fh, '<', $featlistfn or die "Can't read $featlistfn : $!";
my $nr = 0;
+ my $i = 0;
my @errs = ();
while (<$fh>) {
$nr++;
@@ -1373,11 +1395,25 @@ sub get_featlist_from_file {
next if (!defined($valuesStr));
my @values = split(/ /, $valuesStr);
- foreach my $value (@values) {
- push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n"
- if $value !~ /^[+-]?[0-9.\-e]+$/;
- push @names, $longname;
- push @startvalues, $value;
+ my $valcnt = 0;
+ my $hastuneablecomponent = 0;
+ foreach my $value (@values) {
+ if ($value =~ /^UNTUNEABLECOMPONENT$/) {
+ $skippeduntuneablecomponents{$longname}{$valcnt} = 1;
+ $i++;
+ $valcnt++;
+ } elsif ($value =~ /^[+-]?[0-9.\-e]+$/) {
+ push @names, $longname;
+ push @startvalues, $value;
+ $i++;
+ $valcnt++;
+ $hastuneablecomponent = 1;
+ } else {
+ push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n"
+ }
+ }
+ if (!$hastuneablecomponent) {
+ push @allcomponentsuntuneable, $longname;
}
}
elsif (/^(\S+) UNTUNEABLE$/) {
@@ -1391,7 +1427,7 @@ sub get_featlist_from_file {
warn join("", @errs);
exit 1;
}
- return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables};
+ return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables, "allcomponentsuntuneable"=>\@allcomponentsuntuneable, "skippeduntuneablecomponents"=>\%skippeduntuneablecomponents};
}
@@ -1487,6 +1523,8 @@ sub create_config {
print $out "# We were before running iteration $iteration\n";
print $out "# finished ".`date`;
+ my %oldvalues = ();
+
my $line = <$ini_fh>;
while(1) {
last unless $line;
@@ -1501,34 +1539,51 @@ sub create_config {
# parameter name
my $parameter = $1;
- if ($parameter eq "weight") {
- # leave weights 'til last. We're changing it
- while ($line = <$ini_fh>) {
- last if $line =~ /^\[/;
- if ($line =~ /^([^_=\s]+)/) {
- for( @{$featlist->{"untuneables"}} ){
- if ($1 eq $_ ) {# if weight is untuneable, copy it into new config
- push @keep_weights, $line;
- }
- }
- }
- }
- }
- elsif (defined($P{$parameter})) {
- # found a param (thread, verbose etc) that we're overriding. Leave to the end
- while ($line = <$ini_fh>) {
- last if $line =~ /^\[/;
- }
- }
- else {
- # unchanged parameter, write old
- print $out "[$parameter]\n";
- while ($line = <$ini_fh>) {
- last if $line =~ /^\[/;
- print $out $line;
- }
- }
- }
+ if ($parameter eq "weight") {
+ # leave weights 'til last. We're changing it
+ while ($line = <$ini_fh>) {
+ last if $line =~ /^\[/;
+ if ($line =~ /^(\S+)= (.+)$/) {
+ for( @{$featlist->{"untuneables"}} ){
+ if ($1 eq $_ ) {# if weight is untuneable, copy it into new config
+ push @keep_weights, $line;
+ }
+ }
+ for( @{$featlist->{"allcomponentsuntuneable"}} ){
+ if ($1 eq $_ ) {# if all dense weights are untuneable, copy it into new config
+ push @keep_weights, $line;
+ }
+ }
+
+ my ($longname, $valuesStr) = ($1, $2);
+ next if (!defined($valuesStr));
+ print $valuesStr;
+ my @values = split(/ /, $valuesStr);
+ my $valcnt = 0;
+ foreach my $value (@values) {
+ if ($value =~ /^[+-]?[0-9.\-e]+$/) {
+ $oldvalues{$longname}{$valcnt} = $value;
+ }
+ $valcnt++;
+ }
+ }
+ }
+ }
+ elsif (defined($P{$parameter})) {
+ # found a param (thread, verbose etc) that we're overriding. Leave to the end
+ while ($line = <$ini_fh>) {
+ last if $line =~ /^\[/;
+ }
+ }
+ else {
+ # unchanged parameter, write old
+ print $out "[$parameter]\n";
+ while ($line = <$ini_fh>) {
+ last if $line =~ /^\[/;
+ print $out $line;
+ }
+ }
+ }
# write all additional parameters
foreach my $parameter (keys %P) {
@@ -1543,20 +1598,30 @@ sub create_config {
my $prevName = "";
my $outStr = "";
+ my $valcnt = 0;
+ my $offset = 0;
for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
my $name = $featlist->{"names"}->[$i];
my $val = $featlist->{"values"}->[$i];
- if ($prevName eq $name) {
- $outStr .= " $val";
+ if ($prevName ne $name) {
+ print $out "$outStr\n";
+ $valcnt = 0;
+ $outStr = "$name=";
+ $prevName = $name;
+ while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
+ $outStr .= " $oldvalues{$name}{$valcnt+$offset}";
+ $offset++;
+ }
}
- else {
- print $out "$outStr\n";
- $outStr = "$name= $val";
- $prevName = $name;
+ $outStr .= " $val";
+ $valcnt++;
+ while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
+ $outStr .= " $oldvalues{$name}{$valcnt+$offset}";
+ $offset++;
}
}
- print $out "$outStr\n";
+ print $out "$outStr\n";
for (@keep_weights) {
print $out $_;