diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2015-05-03 10:50:10 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2015-05-03 10:50:10 +0300 |
commit | 73ae7d7e209b78c40a4106d94c62d35ff98bf83b (patch) | |
tree | d8d0e2d89cae874f14df19773e6c8ee706b4cdac /scripts | |
parent | 1278b8f5a78f945cbc913cb7b5bfe7c88b332b09 (diff) |
option not to use parallel
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/training/wrappers/madamira-tok.perl | 43 |
1 files changed, 28 insertions, 15 deletions
diff --git a/scripts/training/wrappers/madamira-tok.perl b/scripts/training/wrappers/madamira-tok.perl index bc7e55d43..00639b7a7 100755 --- a/scripts/training/wrappers/madamira-tok.perl +++ b/scripts/training/wrappers/madamira-tok.perl @@ -16,6 +16,7 @@ my $KEEP_TMP = 0; my $MADA_DIR; my $CONFIG; my $SCHEME; +my $USE_PARALLEL = 1; my $FACTORS_STR; my @FACTORS; @@ -26,7 +27,8 @@ GetOptions( "mada-dir=s" => \$MADA_DIR, "factors=s" => \$FACTORS_STR, "config=s" => \$CONFIG, - "scheme=s" => \$SCHEME + "scheme=s" => \$SCHEME, + "use-parallel=i" => \$USE_PARALLEL ) or die("ERROR: unknown options"); die("must have -scheme arg") unless defined($SCHEME); @@ -61,25 +63,36 @@ close(TMP); my $cmd; -# split input file -my $SPLIT_EXEC = `gsplit --help 2>/dev/null`; -if($SPLIT_EXEC) { +if ($USE_PARALLEL) { + # split input file + my $SPLIT_EXEC = `gsplit --help 2>/dev/null`; + if($SPLIT_EXEC) { $SPLIT_EXEC = 'gsplit'; -} -else { + } + else { $SPLIT_EXEC = 'split'; -} + } -$cmd = "$SPLIT_EXEC -l 10000 -a 7 -d $TMPDIR/input $TMPDIR/split/x"; -`$cmd`; + $cmd = "$SPLIT_EXEC -l 10000 -a 7 -d $TMPDIR/input $TMPDIR/split/x"; + `$cmd`; -$cmd = "cd $MADA_DIR && parallel --jobs 4 java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput {} -rawoutdir $TMPDIR/out -rawconfig $CONFIG ::: $TMPDIR/split/x*"; -print STDERR "Executing: $cmd\n"; -`$cmd`; + $cmd = "cd $MADA_DIR && parallel --jobs 4 java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput {} -rawoutdir $TMPDIR/out -rawconfig $CONFIG ::: $TMPDIR/split/x*"; + print STDERR "Executing: $cmd\n"; + `$cmd`; -$cmd = "cat $TMPDIR/out/x*.$SCHEME.tok > $infile.mada"; -print STDERR "Executing: $cmd\n"; -`$cmd`; + $cmd = "cat $TMPDIR/out/x*.$SCHEME.tok > $infile.mada"; + print STDERR "Executing: $cmd\n"; + `$cmd`; +} +else { + $cmd = "cd $MADA_DIR && java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput $infile -rawoutdir $TMPDIR/out -rawconfig $CONFIG"; + print STDERR "Executing: $cmd\n"; + `$cmd`; + + $cmd = "cat $TMPDIR/out/input.$SCHEME.tok > $infile.mada"; + print STDERR "Executing: $cmd\n"; + `$cmd`; +} # get stuff out of mada output open(MADA_OUT,"<$infile.mada"); |