Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2014-03-31 17:22:27 +0400
committerHieu Hoang <hieu@hoang.co.uk>2014-03-31 17:22:27 +0400
commitf344cbe61b75169da3479d780cf50c0c4afe638b (patch)
tree12be2827935b332dd6d9b5ac7f9bb2d20cd7621c /contrib/other-builds/extract-mixed-syntax
parent4c7ea7fe08eac11d9a6158caef759ebe30a02874 (diff)
parenta36d7c292db7d9b882119d790ef5d3c8c8e0dd83 (diff)
Merge branch 'hieu' of github.com:hieuhoang/mosesdecoder into hieu
Diffstat (limited to 'contrib/other-builds/extract-mixed-syntax')
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/learnable.perl108
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl108
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/reachable.perl147
-rwxr-xr-xcontrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl16
4 files changed, 379 insertions, 0 deletions
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/learnable.perl b/contrib/other-builds/extract-mixed-syntax/learnable/learnable.perl
new file mode 100755
index 000000000..6edcff3f9
--- /dev/null
+++ b/contrib/other-builds/extract-mixed-syntax/learnable/learnable.perl
@@ -0,0 +1,108 @@
+#! /usr/bin/perl -w
+
+use strict;
+
+my $iniPath = $ARGV[0];
+my $isHiero = $ARGV[1];
+my $decoderExec = $ARGV[2];
+my $extractExec = $ARGV[3];
+my $tmpName = $ARGV[4];
+
+my $WORK_DIR = `pwd`;
+chomp($WORK_DIR);
+
+my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu";
+
+$decoderExec = "$MOSES_DIR/bin/$decoderExec";
+$extractExec = "$MOSES_DIR/bin/$extractExec";
+
+my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
+if($SPLIT_EXEC) {
+ $SPLIT_EXEC = 'gsplit';
+}
+else {
+ $SPLIT_EXEC = 'split';
+}
+
+my $SORT_EXEC = `gsort --help 2>/dev/null`;
+if($SORT_EXEC) {
+ $SORT_EXEC = 'gsort';
+}
+else {
+ $SORT_EXEC = 'sort';
+}
+
+
+my $hieroFlag = "";
+if ($isHiero == 1) {
+ $hieroFlag = "--Hierarchical";
+}
+
+print STDERR "WORK_DIR=$WORK_DIR \n";
+
+my $cmd;
+
+open (SOURCE, "source");
+open (TARGET, "target");
+open (ALIGNMENT, "alignment");
+
+my $lineNum = 0;
+my ($source, $target, $alignment);
+while ($source = <SOURCE>) {
+ chomp($source);
+ $target = <TARGET>; chomp($target);
+ $alignment = <ALIGNMENT>; chomp($alignment);
+
+ #print STDERR "$source ||| $target ||| $alignment \n";
+
+ # write out 1 line
+ my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
+ `mkdir -p $tmpDir`;
+
+ open (SOURCE1, ">$tmpDir/source");
+ open (TARGET1, ">$tmpDir/target");
+ open (ALIGNMENT1, ">$tmpDir/alignment");
+
+ print SOURCE1 "$source\n";
+ print TARGET1 "$target\n";
+ print ALIGNMENT1 "$alignment\n";
+
+ close (SOURCE1);
+ close (TARGET1);
+ close (ALIGNMENT1);
+
+ # train
+ if ($isHiero == 1) {
+ $cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract --GZOutput";
+ }
+ else {
+ # pb
+ $cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract 7 --GZOutput";
+ }
+ $cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
+
+ $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
+ `$cmd`;
+
+ $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
+ `$cmd`;
+
+ $cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
+ `$cmd`;
+
+ # decode
+ $cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source -feature-add \"ConstrainedDecoding path=$tmpDir/target\"";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
+
+# `rm -rf $tmpDir`;
+
+ ++$lineNum;
+}
+
+close(SOURCE);
+close(TARGET);
+close(ALIGNMENT);
+
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl b/contrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl
new file mode 100755
index 000000000..6c383ca80
--- /dev/null
+++ b/contrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl
@@ -0,0 +1,108 @@
+#! /usr/bin/perl -w
+
+use strict;
+
+my $iniPath = $ARGV[0];
+my $isHiero = $ARGV[1];
+my $decoderExec = $ARGV[2];
+my $extractExec = $ARGV[3];
+my $tmpName = $ARGV[4];
+
+my $WORK_DIR = `pwd`;
+chomp($WORK_DIR);
+
+my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu";
+
+$decoderExec = "$MOSES_DIR/bin/$decoderExec";
+$extractExec = "$MOSES_DIR/bin/$extractExec";
+
+my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
+if($SPLIT_EXEC) {
+ $SPLIT_EXEC = 'gsplit';
+}
+else {
+ $SPLIT_EXEC = 'split';
+}
+
+my $SORT_EXEC = `gsort --help 2>/dev/null`;
+if($SORT_EXEC) {
+ $SORT_EXEC = 'gsort';
+}
+else {
+ $SORT_EXEC = 'sort';
+}
+
+
+my $hieroFlag = "";
+if ($isHiero == 1) {
+ $hieroFlag = "--Hierarchical";
+}
+
+print STDERR "WORK_DIR=$WORK_DIR \n";
+
+my $cmd;
+
+open (SOURCE, "source");
+open (TARGET, "target");
+open (ALIGNMENT, "alignment");
+
+my $lineNum = 0;
+my ($source, $target, $alignment);
+while ($source = <SOURCE>) {
+ chomp($source);
+ $target = <TARGET>; chomp($target);
+ $alignment = <ALIGNMENT>; chomp($alignment);
+
+ #print STDERR "$source ||| $target ||| $alignment \n";
+
+ # write out 1 line
+ my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
+ `mkdir -p $tmpDir`;
+
+ open (SOURCE1, ">$tmpDir/source");
+ open (TARGET1, ">$tmpDir/target");
+ open (ALIGNMENT1, ">$tmpDir/alignment");
+
+ print SOURCE1 "$source\n";
+ print TARGET1 "$target\n";
+ print ALIGNMENT1 "$alignment\n";
+
+ close (SOURCE1);
+ close (TARGET1);
+ close (ALIGNMENT1);
+
+ # train
+ if ($isHiero == 1) {
+ $cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract --GZOutput";
+ }
+ else {
+ # pb
+ $cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract 7 --GZOutput";
+ }
+ $cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
+
+ $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
+ `$cmd`;
+
+ $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
+ `$cmd`;
+
+ $cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
+ `$cmd`;
+
+ # decode
+ $cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source -n-best-list $tmpDir/nbest 10000";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
+
+# `rm -rf $tmpDir`;
+
+ ++$lineNum;
+}
+
+close(SOURCE);
+close(TARGET);
+close(ALIGNMENT);
+
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/reachable.perl b/contrib/other-builds/extract-mixed-syntax/learnable/reachable.perl
new file mode 100755
index 000000000..7405340e5
--- /dev/null
+++ b/contrib/other-builds/extract-mixed-syntax/learnable/reachable.perl
@@ -0,0 +1,147 @@
+#! /usr/bin/perl -w
+
+use strict;
+
+sub Write1Line;
+sub WriteCorpus1Holdout;
+
+my $iniPath = $ARGV[0];
+my $isHiero = $ARGV[1];
+my $decoderExec = $ARGV[2];
+my $extractExec = $ARGV[3];
+my $tmpName = $ARGV[4];
+my $startLine = $ARGV[5];
+my $endLine = $ARGV[6];
+
+print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
+
+my $WORK_DIR = `pwd`;
+chomp($WORK_DIR);
+
+my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
+
+$decoderExec = "$MOSES_DIR/bin/$decoderExec";
+$extractExec = "$MOSES_DIR/bin/$extractExec";
+
+my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
+if($SPLIT_EXEC) {
+ $SPLIT_EXEC = 'gsplit';
+}
+else {
+ $SPLIT_EXEC = 'split';
+}
+
+my $SORT_EXEC = `gsort --help 2>/dev/null`;
+if($SORT_EXEC) {
+ $SORT_EXEC = 'gsort';
+}
+else {
+ $SORT_EXEC = 'sort';
+}
+
+
+my $hieroFlag = "";
+if ($isHiero == 1) {
+ $hieroFlag = "--Hierarchical";
+}
+
+print STDERR "WORK_DIR=$WORK_DIR \n";
+
+my $cmd;
+
+open (SOURCE, "source");
+open (TARGET, "target");
+open (ALIGNMENT, "alignment");
+
+my $numLines = `cat source | wc -l`;
+
+for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
+ my $source = <SOURCE>; chomp($source);
+ my $target = <TARGET>; chomp($target);
+ my $alignment = <ALIGNMENT>; chomp($alignment);
+
+ if ($lineNum < $startLine || $lineNum >= $endLine) {
+ next;
+ }
+
+ #print STDERR "$source ||| $target ||| $alignment \n";
+ # write out 1 line
+ my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
+ `mkdir -p $tmpDir`;
+
+ Write1Line($source, $tmpDir, "source.1");
+ Write1Line($target, $tmpDir, "target.1");
+ Write1Line($alignment, $tmpDir, "alignment.1");
+
+ WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
+ WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
+ WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
+
+ # train
+ if ($isHiero == 1) {
+ $cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
+ }
+ else {
+ # pb
+ $cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
+ }
+ $cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
+
+ $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
+ `$cmd`;
+
+ $cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
+ `$cmd`;
+
+ $cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
+ `$cmd`;
+
+ # decode
+ $cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -feature-add \"ConstrainedDecoding path=$tmpDir/target.1\"";
+ print STDERR "Executing: $cmd\n";
+ `$cmd`;
+
+ `rm -rf $tmpDir`;
+}
+
+close(SOURCE);
+close(TARGET);
+close(ALIGNMENT);
+
+
+######################
+sub Write1Line
+{
+ my ($line, $tmpDir, $fileName) = @_;
+
+ open (HANDLE, ">$tmpDir/$fileName");
+ print HANDLE "$line\n";
+ close (HANDLE);
+}
+
+sub WriteCorpus1Holdout
+{
+ my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
+
+ open (INFILE, "$inFilePath");
+ open (OUTFILE, ">$tmpDir/$outFileName");
+
+ my $lineNum = 0;
+ while (my $line = <INFILE>) {
+ chomp($line);
+
+ if ($lineNum != $holdoutLineNum) {
+ print OUTFILE "$line\n";
+ }
+
+ ++$lineNum;
+ }
+
+ close (OUTFILE);
+ close(INFILE);
+
+}
+
+
diff --git a/contrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl b/contrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl
new file mode 100755
index 000000000..acf44a66b
--- /dev/null
+++ b/contrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl
@@ -0,0 +1,16 @@
+#! /usr/bin/perl -w
+
+my $iniPath = $ARGV[0];
+
+my $SPLIT_LINES = 100;
+my $lineCount = 5000;
+
+for (my $startLine = 0; $startLine < $lineCount; $startLine += $SPLIT_LINES) {
+ my $endLine = $startLine + $SPLIT_LINES;
+
+ my $cmd = "../../scripts/reachable.perl $iniPath 1 moses_chart extract-rules tmp-reachable $startLine $endLine &>out.reachable.$startLine &";
+ print STDERR "Executing: $cmd \n";
+ system($cmd);
+
+}
+