Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Koehn <phi@jhu.edu>2016-06-04 12:15:34 +0300
committerPhilipp Koehn <phi@jhu.edu>2016-06-04 12:15:34 +0300
commitdefbf8d7c3f9ed5b656488fe61b46f4e759182d8 (patch)
tree88ee49774bb6fe35cf7ae20cb54d16a4f27af3b5 /scripts/ems/support
parentea306f62b7ff6abaf7a91da6a5ac66b8972d1763 (diff)
barebone support for quality estimation in experiment.perl
Diffstat (limited to 'scripts/ems/support')
-rwxr-xr-xscripts/ems/support/create-xml.perl42
-rwxr-xr-xscripts/ems/support/remove-segmentation-markup.perl15
-rw-r--r--scripts/ems/support/ter.perl15
3 files changed, 69 insertions, 3 deletions
diff --git a/scripts/ems/support/create-xml.perl b/scripts/ems/support/create-xml.perl
new file mode 100755
index 000000000..610c2ccf8
--- /dev/null
+++ b/scripts/ems/support/create-xml.perl
@@ -0,0 +1,42 @@
+#!/usr/bin/env perl
+#
+# This file is part of moses. Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+use warnings;
+use strict;
+
+my ($type) = @ARGV;
+if ($type =~ /^s/i) {
+ print "<srcset setid=\"test\" srclang=\"any\">\n";
+ print "<doc docid=\"doc\">\n";
+}
+elsif ($type =~ /^t/i) {
+ print "<tstset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
+ print "<doc sysid=\"moses\" docid=\"doc\">\n";
+}
+elsif ($type =~ /^r/i) {
+ print "<refset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
+ print "<doc sysid=\"ref\" docid=\"doc\">\n";
+}
+else {
+ die("ERROR: specify source / target / ref");
+}
+
+my $i = 0;
+while(<STDIN>) {
+ chomp;
+ print "<seg id=\"".(++$i)."\">$_</seg>\n";
+}
+
+print "</doc>\n";
+
+if ($type =~ /^s/i) {
+ print "</srcset>\n";
+}
+elsif ($type =~ /^t/i) {
+ print "</tstset>\n";
+}
+elsif ($type =~ /^r/i) {
+ print "</refset>\n";
+}
diff --git a/scripts/ems/support/remove-segmentation-markup.perl b/scripts/ems/support/remove-segmentation-markup.perl
index 3b02bceaf..1e5820dd5 100755
--- a/scripts/ems/support/remove-segmentation-markup.perl
+++ b/scripts/ems/support/remove-segmentation-markup.perl
@@ -9,7 +9,16 @@ use strict;
$|++;
while(<STDIN>) {
- s/ \|\d+\-\d+\| / /g;
- s/ \|\d+\-\d+\|$//;
- print $_;
+ chop;
+ s/\|[^\|]+\|//g;
+ s/\s+/ /g;
+ s/^ //;
+ s/ $//;
+ print $_."\n";
}
+
+#while(<STDIN>) {
+# s/ \|\d+\-\d+\| / /g;
+# s/ \|\d+\-\d+\|$//;
+# print $_;
+#}
diff --git a/scripts/ems/support/ter.perl b/scripts/ems/support/ter.perl
new file mode 100644
index 000000000..1bae6f146
--- /dev/null
+++ b/scripts/ems/support/ter.perl
@@ -0,0 +1,15 @@
+#!/usr/bin/env perl
+#
+# This file is part of moses. Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+use strict;
+use FindBin qw($RealBin);
+
+my ($jar, $hyp,$ref,$tmp) = @ARGV;
+`mkdir -p $tmp`;
+`$RealBin/create-xml.perl test < $hyp > $tmp/hyp`;
+`$RealBin/create-xml.perl ref < $ref > $tmp/ref`;
+`java -jar $jar -h $tmp/hyp -r $tmp/ref -o ter -n $tmp/out`;
+print `cat $tmp/out.ter`;
+