Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2015-05-03 10:50:10 +0300
committerHieu Hoang <hieuhoang@gmail.com>2015-05-03 10:50:10 +0300
commit73ae7d7e209b78c40a4106d94c62d35ff98bf83b (patch)
treed8d0e2d89cae874f14df19773e6c8ee706b4cdac /scripts
parent1278b8f5a78f945cbc913cb7b5bfe7c88b332b09 (diff)
option not to use parallel
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/training/wrappers/madamira-tok.perl43
1 files changed, 28 insertions, 15 deletions
diff --git a/scripts/training/wrappers/madamira-tok.perl b/scripts/training/wrappers/madamira-tok.perl
index bc7e55d43..00639b7a7 100755
--- a/scripts/training/wrappers/madamira-tok.perl
+++ b/scripts/training/wrappers/madamira-tok.perl
@@ -16,6 +16,7 @@ my $KEEP_TMP = 0;
my $MADA_DIR;
my $CONFIG;
my $SCHEME;
+my $USE_PARALLEL = 1;
my $FACTORS_STR;
my @FACTORS;
@@ -26,7 +27,8 @@ GetOptions(
"mada-dir=s" => \$MADA_DIR,
"factors=s" => \$FACTORS_STR,
"config=s" => \$CONFIG,
- "scheme=s" => \$SCHEME
+ "scheme=s" => \$SCHEME,
+ "use-parallel=i" => \$USE_PARALLEL
) or die("ERROR: unknown options");
die("must have -scheme arg") unless defined($SCHEME);
@@ -61,25 +63,36 @@ close(TMP);
my $cmd;
-# split input file
-my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
-if($SPLIT_EXEC) {
+if ($USE_PARALLEL) {
+ # split input file
+ my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
+ if($SPLIT_EXEC) {
$SPLIT_EXEC = 'gsplit';
-}
-else {
+ }
+ else {
$SPLIT_EXEC = 'split';
-}
+ }
-$cmd = "$SPLIT_EXEC -l 10000 -a 7 -d $TMPDIR/input $TMPDIR/split/x";
-`$cmd`;
+ $cmd = "$SPLIT_EXEC -l 10000 -a 7 -d $TMPDIR/input $TMPDIR/split/x";
+ `$cmd`;
-$cmd = "cd $MADA_DIR && parallel --jobs 4 java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput {} -rawoutdir $TMPDIR/out -rawconfig $CONFIG ::: $TMPDIR/split/x*";
-print STDERR "Executing: $cmd\n";
-`$cmd`;
+ $cmd = "cd $MADA_DIR && parallel --jobs 4 java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput {} -rawoutdir $TMPDIR/out -rawconfig $CONFIG ::: $TMPDIR/split/x*";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
-$cmd = "cat $TMPDIR/out/x*.$SCHEME.tok > $infile.mada";
-print STDERR "Executing: $cmd\n";
-`$cmd`;
+ $cmd = "cat $TMPDIR/out/x*.$SCHEME.tok > $infile.mada";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
+}
+else {
+ $cmd = "cd $MADA_DIR && java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput $infile -rawoutdir $TMPDIR/out -rawconfig $CONFIG";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
+
+ $cmd = "cat $TMPDIR/out/input.$SCHEME.tok > $infile.mada";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
+}
# get stuff out of mada output
open(MADA_OUT,"<$infile.mada");