Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <fishandfrolick@gmail.com>2012-06-07 20:54:11 +0400
committerHieu Hoang <fishandfrolick@gmail.com>2012-06-07 20:54:11 +0400
commit842d1cb6fdc39145fa2ea44e9345c12b92982dc5 (patch)
tree5c8e0880d81dae42c4c7b2aeefab5c68c94bab30 /scripts
parent21c80f4fbe4f1fa90a13adb0965b889d6a05d673 (diff)
parent156b5db0fe9caaaded7bd8839133db3be9bbc200 (diff)
Merge github.com:moses-smt/mosesdecoder
Diffstat (limited to 'scripts')
-rw-r--r--scripts/Jamfile2
-rwxr-xr-xscripts/generic/extract-parallel.perl13
-rwxr-xr-xscripts/training/mert-moses.pl27
3 files changed, 34 insertions, 8 deletions
diff --git a/scripts/Jamfile b/scripts/Jamfile
index df9a4dfcf..8df468737 100644
--- a/scripts/Jamfile
+++ b/scripts/Jamfile
@@ -26,7 +26,7 @@ if $(with-giza) {
check-for-bin mkcls ;
} else {
if $(CLEANING) = no {
- echo "If you want scripts/training/train-model.perl, pass --with-giza=/path/to/giza-pp" ;
+ #echo "If you want scripts/training/train-model.perl, pass --with-giza=/path/to/giza-pp" ;
}
constant WITH-GIZA : "no" ;
}
diff --git a/scripts/generic/extract-parallel.perl b/scripts/generic/extract-parallel.perl
index b810d9672..8b61a33e8 100755
--- a/scripts/generic/extract-parallel.perl
+++ b/scripts/generic/extract-parallel.perl
@@ -24,9 +24,11 @@ my $source = $ARGV[5]; # 2nd arg of extract argument
my $align = $ARGV[6]; # 3rd arg of extract argument
my $extract = $ARGV[7]; # 4th arg of extract argument
+my $makeTTable = 1; # whether to build the ttable extract files
my $otherExtractArgs= "";
for (my $i = 8; $i < $#ARGV + 1; ++$i)
{
+ $makeTTable = 0 if $ARGV[$i] eq "--NoTTable";
$otherExtractArgs .= $ARGV[$i] ." ";
}
@@ -123,11 +125,14 @@ $catOCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.o.sorted.gz \n
@children = ();
-$pid = RunFork($catCmd);
-push(@children, $pid);
+if ($makeTTable)
+{
+ $pid = RunFork($catCmd);
+ push(@children, $pid);
-$pid = RunFork($catInvCmd);
-push(@children, $pid);
+ $pid = RunFork($catInvCmd);
+ push(@children, $pid);
+}
my $numStr = NumStr(0);
if (-e "$TMPDIR/extract.$numStr.o.gz")
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index a430aa520..194910750 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -147,6 +147,12 @@ my $mertdir = undef; # path to new mert directory
my $mertargs = undef; # args to pass through to mert & extractor
my $mertmertargs = undef; # args to pass through to mert only
my $extractorargs = undef; # args to pass through to extractor only
+
+# Args to pass through to batch mira only. This flags is useful to
+# change MIRA's hyperparameters such as regularization parameter C,
+# BLEU decay factor, and the number of iterations of MIRA.
+my $batch_mira_args = undef;
+
my $filtercmd = undef; # path to filter-model-given-input.pl
my $filterfile = undef;
my $qsubwrapper = undef;
@@ -210,6 +216,7 @@ GetOptions(
"pro-starting-point" => \$___PRO_STARTING_POINT,
"historic-interpolation=f" => \$___HISTORIC_INTERPOLATION,
"batch-mira" => \$___BATCH_MIRA,
+ "batch-mira-args=s" => \$batch_mira_args,
"threads=i" => \$__THREADS
) or exit(1);
@@ -293,6 +300,10 @@ Options:
(also works with regular optimizer, default: 0)
--pairwise-ranked ... Use PRO for optimisation (Hopkins and May, emnlp 2011)
--pro-starting-point ... Use PRO to get a starting point for MERT
+ --batch-mira ... Use Batch MIRA for optimisation (Cherry and Foster, NAACL 2012)
+ --batch-mira-args=STRING ... args to pass through to batch MIRA. This flag is useful to
+ change MIRA's hyperparameters such as regularization parameter C,
+ BLEU decay factor, and the number of iterations of MIRA.
--threads=NUMBER ... Use multi-threaded mert (must be compiled in).
--historic-interpolation ... Interpolate optimized weights with prior iterations' weight
(parameter sets factor [0;1] given to current weights)
@@ -734,6 +745,10 @@ while (1) {
}
my $mira_settings = "";
+ if ($___BATCH_MIRA && $batch_mira_args) {
+ $mira_settings .= "$batch_mira_args ";
+ }
+
$mira_settings .= " --dense-init run$run.$weights_in_file";
if (-e "run$run.sparse-weights") {
$mira_settings .= " --sparse-init run$run.sparse-weights";
@@ -921,10 +936,12 @@ chdir($cwd);
sub get_weights_from_mert {
my ($outfile, $logfile, $weight_count, $sparse_weights) = @_;
my ($bestpoint, $devbleu);
- if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/) || $___BATCH_MIRA) {
+ if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/)
+ || $___BATCH_MIRA) {
open my $fh, '<', $outfile or die "Can't open $outfile: $!";
- my (@WEIGHT, $sum);
+ my @WEIGHT;
for (my $i = 0; $i < $weight_count; $i++) { push @WEIGHT, 0; }
+ my $sum = 0.0;
while (<$fh>) {
if (/^F(\d+) ([\-\.\de]+)/) { # regular features
$WEIGHT[$1] = $2;
@@ -933,11 +950,14 @@ sub get_weights_from_mert {
$$sparse_weights{$1} = $2;
}
}
+ close $fh;
+ die "It seems feature values are invalid or unable to read $outfile." if $sum < 1e-09;
+
$devbleu = "unknown";
foreach (@WEIGHT) { $_ /= $sum; }
foreach (keys %{$sparse_weights}) { $$sparse_weights{$_} /= $sum; }
$bestpoint = join(" ", @WEIGHT);
- close $fh;
+
if($___BATCH_MIRA) {
open my $fh2, '<', $logfile or die "Can't open $logfile: $!";
while(<$fh2>) {
@@ -945,6 +965,7 @@ sub get_weights_from_mert {
$devbleu = $1;
}
}
+ close $fh2;
}
} else {
open my $fh, '<', $logfile or die "Can't open $logfile: $!";