Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormphi <mphi@1f5c12ca-751b-0410-a591-d2e778427230>2010-02-07 12:11:09 +0300
committermphi <mphi@1f5c12ca-751b-0410-a591-d2e778427230>2010-02-07 12:11:09 +0300
commit9e8352a04135acfe54d3fca535c06908500f1f0c (patch)
tree066f57bbe0c8ea3da69dbf8ef14b9d3cad7cb1e7 /scripts/analysis
parent05e21dc5e2ba88143fa2c3eb5af4392aa5f2cdd9 (diff)
modified the implementation, removing unnecessary repetition, thus making the whole process approximately fifty times faster
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2866 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'scripts/analysis')
-rwxr-xr-xscripts/analysis/bootstrap-hypothesis-difference-significance.pl97
1 files changed, 73 insertions, 24 deletions
diff --git a/scripts/analysis/bootstrap-hypothesis-difference-significance.pl b/scripts/analysis/bootstrap-hypothesis-difference-significance.pl
index b34e427a2..9592941e9 100755
--- a/scripts/analysis/bootstrap-hypothesis-difference-significance.pl
+++ b/scripts/analysis/bootstrap-hypothesis-difference-significance.pl
@@ -16,7 +16,6 @@ use strict;
#constants
my $TIMES_TO_REPEAT_SUBSAMPLING = 1000;
my $SUBSAMPLE_SIZE = 0; # if 0 then subsample size is equal to the whole set
-my $TMP_PREFIX = "/tmp/signigicance_test_file_";
my $MAX_NGRAMS_FOR_BLEU = 4;
#checking cmdline argument consistency
@@ -35,6 +34,14 @@ print "reading data; " . `date`;
#read all data
my $data = readAllData(@ARGV);
+
+#calculate each sentence's contribution to BP and ngram precision
+print "performing preliminary calculations (hypothesis 1); " . `date`;
+preEvalHypo($data, "hyp1");
+
+print "performing preliminary calculations (hypothesis 2); " . `date`;
+preEvalHypo($data, "hyp2");
+
#start comparing
print "comparing hypotheses; " . `date`;
@@ -72,23 +79,29 @@ print "average subsample bleu: $averageSubSampleBleuDiff " . `date`;
#calculating p-value
my $count = 0;
-my $realBleuDiff = abs(getBleu($data->{refs}, $data->{hyp2}) - getBleu($data->{refs}, $data->{hyp1}));
+my $realBleu1 = getBleu($data->{refs}, $data->{hyp1});
+my $realBleu2 = getBleu($data->{refs}, $data->{hyp2});
+
+print "actual BLEU of hypothesis 1: $realBleu1\n";
+print "actual BLEU of hypothesis 1: $realBleu2\n";
+
+my $realBleuDiff = abs($realBleu2 - $realBleu1);
for my $subSampleDiff (@subSampleBleuDiffArr) {
-# my $op;
+ my $op;
if ($subSampleDiff - $averageSubSampleBleuDiff >= $realBleuDiff) {
$count++;
-# $op = ">=";
+ $op = ">=";
}
else {
-# $op = "< ";
+ $op = "< ";
}
-# print "$subSampleDiff - $averageSubSampleBleuDiff $op $realBleuDiff\n";
+ #print "$subSampleDiff - $averageSubSampleBleuDiff $op $realBleuDiff\n";
}
-my $result = ($count + 1) / $TIMES_TO_REPEAT_SUBSAMPLING;
+my $result = $count / $TIMES_TO_REPEAT_SUBSAMPLING;
print "Assuming that essentially the same system generated the two hypothesis translations (null-hypothesis),\n";
print "the probability of actually getting them (p-value) is: $result.\n";
@@ -144,7 +157,7 @@ sub readData {
open (FILE, $file) or die ("Failed to open `$file' for reading");
while (<FILE>) {
- push @result, [split(/\s+/, $_)];
+ push @result, { words => [split(/\s+/, $_)] };
}
close (FILE);
@@ -153,6 +166,51 @@ sub readData {
}
#####
+# calculate each sentence's contribution to the ngram precision and brevity penalty
+#####
+sub preEvalHypo {
+ my $data = shift;
+ my $hypId = shift;
+
+ my ($correctNgramCounts, $totalNgramCounts);
+ my ($refNgramCounts, $hypNgramCounts);
+
+ for my $lineIdx (0..($data->{size} - 1)) {
+ my $hypSnt = $data->{$hypId}->[$lineIdx];
+
+ #update total hyp len
+ $hypSnt->{hyplen} = scalar @{$hypSnt->{words}};
+
+ #update total ref len with closest current ref len
+ $hypSnt->{reflen} = getClosestLength($data->{refs}, $lineIdx, $hypSnt->{hyplen});
+
+ $hypSnt->{correctNgrams} = [];
+ $hypSnt->{totalNgrams} = [];
+
+ #update ngram precision for each n-gram order
+ for my $order (1..$MAX_NGRAMS_FOR_BLEU) {
+ #hyp ngrams
+ $hypNgramCounts = groupNgrams($hypSnt, $order);
+
+ #ref ngrams
+ $refNgramCounts = groupNgramsMultiSrc($data->{refs}, $lineIdx, $order);
+
+ $correctNgramCounts = 0;
+ $totalNgramCounts = 0;
+
+ #correct, total
+ for my $ngram (keys %$hypNgramCounts) {
+ $correctNgramCounts += min($hypNgramCounts->{$ngram}, $refNgramCounts->{$ngram});
+ $totalNgramCounts += $hypNgramCounts->{$ngram};
+ }
+
+ $hypSnt->{correctNgrams}->[$order] = $correctNgramCounts;
+ $hypSnt->{totalNgrams}->[$order] = $totalNgramCounts;
+ }
+ }
+}
+
+#####
# draw a subsample of size $subSize from set (0..$setSize) with replacement
#####
sub drawWithReplacement {
@@ -190,24 +248,15 @@ sub getBleu {
my $hypSnt = $hyp->[$lineIdx];
#update total hyp len
- $hypothesisLength += scalar @$hypSnt;
+ $hypothesisLength += $hypSnt->{hyplen};
#update total ref len with closest current ref len
- $referenceLength += getClosestLength($refs, $lineIdx, $hypothesisLength);
+ $referenceLength += $hypSnt->{reflen};
#update ngram precision for each n-gram order
for my $order (1..$MAX_NGRAMS_FOR_BLEU) {
- #hyp ngrams
- $hypNgramCounts = groupNgrams($hypSnt, $order);
-
- #ref ngrams
- $refNgramCounts = groupNgramsMultiSrc($refs, $lineIdx, $order);
-
- #correct, total
- for my $ngram (keys %$hypNgramCounts) {
- $correctNgramCounts[$order] += min($hypNgramCounts->{$ngram}, $refNgramCounts->{$ngram});
- $totalNgramCounts[$order] += $hypNgramCounts->{$ngram};
- }
+ $correctNgramCounts[$order] += $hypSnt->{correctNgrams}->[$order];
+ $totalNgramCounts[$order] += $hypSnt->{totalNgrams}->[$order];
}
}
@@ -235,7 +284,7 @@ sub getClosestLength {
my ($currLen, $currDiff);
for my $ref (@$refs) {
- $currLen = scalar @{$ref->[$lineIdx]};
+ $currLen = scalar @{$ref->[$lineIdx]->{words}};
$currDiff = abs($currLen - $hypothesisLength);
if ($currDiff < $bestDiff or ($currDiff == $bestDiff and $currLen < $bestLen)) {
@@ -254,11 +303,11 @@ sub groupNgrams {
my ($snt, $order) = @_;
my %result;
- my $size = scalar @$snt;
+ my $size = scalar @{$snt->{words}};
my $ngram;
for my $i (0..($size-$order)) {
- $ngram = join(" ", @$snt[$i..($i + $order - 1)]);
+ $ngram = join(" ", @{$snt->{words}}[$i..($i + $order - 1)]);
$result{$ngram}++;
}