Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Koehn <pkoehn@inf.ed.ac.uk>2011-12-21 09:50:59 +0400
committerPhilipp Koehn <pkoehn@inf.ed.ac.uk>2011-12-21 09:50:59 +0400
commitcdf735b01b540ad5c4128fe7d23b149643e70f3d (patch)
tree7bcb437d1ca64d1f2cfdf7a6cd107bd783b3c220 /scripts/generic
parentb95c372e3a254e3eee36112efa0c8d4a97cf5ada (diff)
better error message when no corpus defined, better integration of IRSTLM training
Diffstat (limited to 'scripts/generic')
-rwxr-xr-xscripts/generic/trainlm-irst.perl24
1 files changed, 16 insertions, 8 deletions
diff --git a/scripts/generic/trainlm-irst.perl b/scripts/generic/trainlm-irst.perl
index 15e8d7ee9..d6ac3aaf6 100755
--- a/scripts/generic/trainlm-irst.perl
+++ b/scripts/generic/trainlm-irst.perl
@@ -17,34 +17,42 @@ use Getopt::Long;
my $order;
my $corpusPath;
my $lmPath;
-my $cores;
+my $cores = 2;
my $irstPath;
+my $tempPath = "tmp";
GetOptions("order=s" => \$order,
"text=s" => \$corpusPath,
"lm=s" => \$lmPath,
"cores=s" => \$cores,
"irst-dir=s" => \$irstPath,
+ "temp-dir=s" => \$tempPath
) or exit 1;
+die("ERROR: please set order") unless defined($order);
+die("ERROR: please set text") unless defined($corpusPath);
+die("ERROR: please set lm") unless defined($lmPath);
+die("ERROR: please set irst-dir") unless defined($irstPath);
+
my $ext = ($corpusPath =~ m/([^.]+)$/)[0];
print "extension is $ext\n";
-mkdir 'temp';
+$tempPath .= "/irstlm-build-tmp.$$";
+`mkdir -p $tempPath`;
my $cmd;
if ($ext eq "gz")
{
- $cmd = "zcat $corpusPath | $irstPath/bin/add-start-end.sh | gzip -c > temp/monolingual.setagged.gz";
+ $cmd = "zcat $corpusPath | $irstPath/add-start-end.sh | gzip -c > $tempPath/monolingual.setagged.gz";
}
else
{
- $cmd = "cat $corpusPath | $irstPath/bin/add-start-end.sh | gzip -c > temp/monolingual.setagged.gz";
+ $cmd = "cat $corpusPath | $irstPath/add-start-end.sh | gzip -c > $tempPath/monolingual.setagged.gz";
}
print STDERR "EXECUTING $cmd\n";
`$cmd`;
-$cmd = "IRSTLM=$irstPath $irstPath/bin/build-lm.sh -t stat4 -i \"gunzip -c temp/monolingual.setagged.gz\" -n $order -p -o temp/iarpa.gz -k $cores";
+$cmd = "IRSTLM=$irstPath/.. $irstPath/build-lm.sh -t $tempPath/stat4 -i \"gunzip -c $tempPath/monolingual.setagged.gz\" -n $order -p -o $tempPath/iarpa.gz -k $cores";
print STDERR "EXECUTING $cmd\n";
`$cmd`;
@@ -53,17 +61,17 @@ print "extension is $ext\n";
if ($ext eq "gz")
{
- $cmd = "$irstPath/bin/compile-lm temp/iarpa.gz --text yes /dev/stdout | gzip -c > $lmPath";
+ $cmd = "$irstPath/compile-lm $tempPath/iarpa.gz --text yes /dev/stdout | gzip -c > $lmPath";
}
else
{
- $cmd = "$irstPath/bin/compile-lm temp/iarpa.gz --text yes $lmPath";
+ $cmd = "$irstPath/compile-lm $tempPath/iarpa.gz --text yes $lmPath";
}
print STDERR "EXECUTING $cmd\n";
`$cmd`;
-$cmd = "rm -rf temp stat4";
+$cmd = "rm -rf $tempPath";
print STDERR "EXECUTING $cmd\n";
`$cmd`;