- fixed caching behavior of Corpus to remove gibberish and cache everything

- fixed javascript sorting in sentence-by-sentence git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@735 1f5c12ca-751b-0410-a591-d2e778427230
author: eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230> 2006-08-15 02:18:54 +0400
committer: eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230> 2006-08-15 02:18:54 +0400
commit: 1374aefc6db277f062f2eddae74bd6079b206cc9 (patch)
tree: 6744b9377a8d14d1550b0dce6118a7d12bfac0de /scripts/analysis
parent: 91521bd911174ae71eaa9136f4c333e5daa729e3 (diff)
2 files changed, 96 insertions, 31 deletions
diff --git a/scripts/analysis/sentence-by-sentence.pl b/scripts/analysis/sentence-by-sentence.pl
index b6e179dd3..f265a07b8 100755
--- a/scripts/analysis/sentence-by-sentence.pl
+++ b/scripts/analysis/sentence-by-sentence.pl
@@ -64,7 +64,7 @@ while(my $sLine = <SYSOUT>)
 	}
 		  
 	my $bleuData = getBLEUSentenceDetails(\@sFactors, \@eFactors, 0);
-	push @bleuScores, [$i, $bleuData->[0]->[0], 0]; #the last number will be the rank
+	push @bleuScores, [$i, $bleuData->[0], 0]; #the last number will be the rank
 	my $pwerData = getPWERSentenceDetails(\@sFactors, \@eFactors, 0);
 	my $html = "<div class=\"sentence\" style=\"background-color: %%%%\" id=\"sentence$i\">"; #the %%%% and other tokens like it are flags to be replaced
 	$html .= "<div class=\"bleu_report\"><b>Sentence $i)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;BLEU:</b> " . sprintf("%.4lg", $bleuData->[0]->[0]) . " (" . join('/', map {sprintf("%.4lg", $_)} @{$bleuData->[0]}[1 .. 4]) . ")</div><table>\n";
@@ -90,17 +90,17 @@ foreach my $truthfh (@TRUTHS) {close($truthfh);}
 rankSentencesByBLEU(\@bleuScores);
 my $stylesheet = <<EOHTML;
 <style type="text/css">
-.legend {background-color: #fff; border: 1px solid #000; padding: 2px; margin-bottom: 10px; margin-right: 15px}
+.legend {background: #fff; border: 1px solid #000; padding: 2px; margin-bottom: 10px; margin-right: 15px}
 .legend_title {font-weight: bold; font-size: medium; text-decoration: underline}
-div.sentence {background-color: #ffffee; border: 1px solid #000088; padding: 0px 8px 0px 8px} //entire composition for a given sentence
+div.sentence {background: #ffffee; border: 1px solid #000088; padding: 0px 8px 0px 8px} //entire composition for a given sentence
 div.sentence td {margin: 8px 0px 8px 0px}
 div.bleu_report {margin-bottom: 5px}
 td.sent_title {font-weight: bold; font-size: medium; margin-bottom: 12px}
-.source_sentence {background-color: #ffcccc; border: 1px solid #bbb}
-.truth_sentence {background-color: #ccffcc; border: 1px solid #bbb}
-.sysout_sentence {background-color: #ccccff; border: 1px solid #bbb}
+.source_sentence {background: #ffcccc; border: 1px solid #bbb}
+.truth_sentence {background: #ccffcc; border: 1px solid #bbb}
+.sysout_sentence {background: #ccccff; border: 1px solid #bbb}
 table.sentence_table {border: none}
-.sysout_ngrams {background-color: #fff; border: 1px solid #bbb}
+.sysout_ngrams {background: #fff; border: 1px solid #bbb}
 table.ngram_table {}
 td.ngram_cell {padding: 1px}
 </style>
@@ -117,7 +117,7 @@ function sortByBLEU()
 	var body = document.getElementById('all_sentences'); var row;\n";
 foreach my $rank (sort {$a <=> $b} keys %rank2index)
 {
-	print "\trow = body.getElementById('everything" . $rank2index{$rank} . "');\n";
+	print "\trow = document.getElementById('everything" . $rank2index{$rank} . "');\n";
 	print "\tbody.removeChild(row); body.appendChild(row);\n";
 }
 print "}
@@ -126,7 +126,7 @@ function sortByCorpusOrder()
 	var body = document.getElementById('all_sentences'); var row;\n";
 for(my $j = 0; $j < scalar(@htmlSentences); $j++)
 {
-	print "\trow = body.getElementById('everything$j');\n";
+	print "\trow = document.getElementById('everything$j');\n";
 	print "\tbody.removeChild(row); body.appendChild(row);\n";
 }
 print "}
@@ -138,20 +138,20 @@ my @maxBLEU = (-1e9) x scalar(@htmlColors);
 for(my $k = 0; $k < scalar(@htmlSentences); $k++)
 {
 	my $tier = int($bleuScores[$k]->[2] / (scalar(@htmlSentences) / scalar(@htmlColors)));
-	if($bleuScores[$k]->[1] < $minBLEU[$tier]) {$minBLEU[$tier] = $bleuScores[$k]->[1];}
-	elsif($bleuScores[$k]->[1] > $maxBLEU[$tier]) {$maxBLEU[$tier] = $bleuScores[$k]->[1];}
+	if($bleuScores[$k]->[1]->[0] < $minBLEU[$tier]) {$minBLEU[$tier] = $bleuScores[$k]->[1]->[0];}
+	elsif($bleuScores[$k]->[1]->[0] > $maxBLEU[$tier]) {$maxBLEU[$tier] = $bleuScores[$k]->[1]->[0];}
 }
-print "<table border=0><tr><td><div class=\"legend\"><span class=\"legend_title\">BLEU Ranges</span> (sentence backgrounds)<table border=0>";
+print "<table border=0><tr><td><div class=\"legend\"><span class=\"legend_title\">Sentence Background Colors => BLEU Ranges</span><table border=0>";
 for(my $k = 0; $k < scalar(@htmlColors); $k++)
 {
-	print "<tr><td style=\"width: 15px; height: 15px; background-color: " . $htmlColors[$k] . "\"></td><td align=left style=\"padding-left: 12px\">" 
+	print "<tr><td style=\"width: 15px; height: 15px; background: " . $htmlColors[$k] . "\"></td><td align=left style=\"padding-left: 12px\">" 
 							. sprintf("%.4lg", $minBLEU[$k]) . " - " . sprintf("%.4lg", $maxBLEU[$k]) . "</td>";
 }
 print "</table></div></td>\n";
 print "<td><div class=\"legend\"><span class=\"legend_title\">N-gram Colors => Number of Matching Reference Translations</span><table border=0>";
 for(my $k = 1; $k <= scalar(@truthfiles); $k++)
 {
-	print "<tr><td style=\"width: 15px; height: 15px; background-color: " . getNgramColorHTML($k, scalar(@truthfiles)) . "\"></td><td align=left style=\"padding-left: 12px\">$k</td>";
+	print "<tr><td style=\"width: 15px; height: 15px; background: " . getNgramColorHTML($k, scalar(@truthfiles)) . "\"></td><td align=left style=\"padding-left: 12px\">$k</td>";
 }
 print "</table></div></td></tr></table><div style=\"font-weight: bold; margin-bottom: 15px\">
 PWER errors are marked in red on output sentence displays.</div>
@@ -335,14 +335,14 @@ sub getPWERSentenceDetails
 }
 
 #assign ranks to sentences by BLEU score
-#arguments: arrayref of arrayrefs of [sentence index, bleu score, rank to be assigned]
+#arguments: arrayref of arrayrefs of [sentence index, arrayref of [bleu score, n-gram precisions], rank to be assigned]
 #return: none
 sub rankSentencesByBLEU
 {
 	my $bleuData = shift;
 	my $i = 0;
-	#sort first on score, secondarily on sentence index
-	foreach my $sentenceData (reverse sort {my $c = $a->[1] <=> $b->[1]; if($c == 0) {$a->[0] cmp $b->[0];} else {$c;}} @$bleuData) {$sentenceData->[2] = $i++;}
+	#sort first on score, then on 1-gram accuracy, then on sentence index
+	foreach my $sentenceData (reverse sort {my $c = $a->[1]->[0] <=> $b->[1]->[0]; if($c == 0) {my $d = $a->[1]->[1] <=> $b->[1]->[1]; if($d == 0) {$a->[0] cmp $b->[0];} else {$d;}} else {$c;}} @$bleuData) {$sentenceData->[2] = $i++;}
 }
 
 ###############################################################################################################################################################
@@ -364,7 +364,7 @@ sub getFactoredSentenceHTML
 	return $html . "</tr></table>";
 }
 
-#arguments: arrayref of [sentence index, bleu score, rank], number of sentences
+#arguments: arrayref of [sentence index, arrayref of [bleu score, n-gram precisions], rank], number of sentences
 #return: HTML color string
 sub getSentenceBGColorHTML
 {
@@ -419,7 +419,7 @@ sub getAllNgramsHTML
 	foreach my $ngram (sort {my $c = $a->[3] <=> $b->[3]; if($c == 0) {$a->[0] <=> $b->[0]} else {$c}} @$ngrams) #sort by row, then word num
 	{
 		while($ngram->[0] > $curCol || $ngram->[3] > $curRow) {$html .= "<td></td>"; $curCol = ($curCol + 1) % $numWords; if($curCol == 0) {$html .= "</tr><tr>"; $curRow++;}}
-		$html .= "<td colspan=" . $ngram->[1] . " align=center class=\"ngram_cell\" style=\"background-color: " . getNgramColorHTML(scalar(@{$ngram->[2]}), $numTruths) . "\">" . join(' ', map {$_->[$factorIndex]} @{$sentence}[$ngram->[0] .. $ngram->[0] + $ngram->[1] - 1]) . "</td>";
+		$html .= "<td colspan=" . $ngram->[1] . " align=center class=\"ngram_cell\" style=\"background: " . getNgramColorHTML(scalar(@{$ngram->[2]}), $numTruths) . "\">" . join(' ', map {$_->[$factorIndex]} @{$sentence}[$ngram->[0] .. $ngram->[0] + $ngram->[1] - 1]) . "</td>";
 		$curCol = ($curCol + $ngram->[1]) % $numWords; if($curCol == 0) {$html .= "</tr><tr>"; $curRow++;}
 	}
 	$html .= "</tr>";
diff --git a/scripts/analysis/smtgui/Corpus.pm b/scripts/analysis/smtgui/Corpus.pm
index 0804cac7f..5a2753fdf 100644
--- a/scripts/analysis/smtgui/Corpus.pm
+++ b/scripts/analysis/smtgui/Corpus.pm
@@ -56,6 +56,8 @@ sub new
 	$self->{'unknownCount'} = {}; #factor name => count of unknown tokens in input
 	$self->{'sysoutWER'} = {}; #system name => (factor name => arrayref with system output total WER and arrayref of WER scores for individual sysout sentences wrt truth)
 	$self->{'sysoutPWER'} = {}; #similarly
+	$self->{'nnAdjWERPWER'} = {}; #system name => arrayref of [normalized WER, normalized PWER]
+	$self->{'perplexity'} = {}; #system name => (factor name => perplexity raw score)
 	$self->{'fileDescriptions'} = {}; #filename associated with us => string description of file
 	$self->{'bleuScores'} = {}; #system name => (factor name => arrayref of (overall score, arrayref of per-sentence scores) )
 	$self->{'bleuConfidence'} = {}; #system name => (factor name => arrayrefs holding statistical test data on BLEU scores)
@@ -101,6 +103,7 @@ sub calcUnknownTokens
 	{
 		return ($self->{'unknownCount'}->{$factorName}, $self->{'tokenCount'}->{'input'});
 	}
+	warn "calcing unknown tokens\n";
 	
 	$self->ensureFilenameDefined('input');
 	$self->ensurePhraseTableDefined($factorName);
@@ -134,6 +137,13 @@ sub calcUnknownTokens
 sub calcNounAdjWER_PWERDiff
 {
 	my ($self, $sysname) = @_;
+	#check in-memory cache first
+	if(exists $self->{'nnAdjWERPWER'}->{$sysname})
+	{
+		return @{$self->{'nnAdjWERPWER'}->{$sysname}};
+	}
+	warn "calcing NN/JJ PWER/WER\n";
+	
 	$self->ensureFilenameDefined('truth');
 	$self->ensureFilenameDefined($sysname);
 	$self->ensureFactorPosDefined('surf');
@@ -156,7 +166,8 @@ sub calcNounAdjWER_PWERDiff
 	#unhog memory
 	$self->releaseSentences('truth');
 	$self->releaseSentences($sysname);
-	return ($werScore / $self->{'tokenCount'}->{'truth'}, $pwerScore / $self->{'tokenCount'}->{'truth'});
+	$self->{'nnAdjWERPWER'}->{$sysname} = [$werScore / $self->{'tokenCount'}->{'truth'}, $pwerScore / $self->{'tokenCount'}->{'truth'}];
+	return @{$self->{'nnAdjWERPWER'}->{$sysname}};
 }
 
 #calculate detailed WER statistics and put them into $self
@@ -172,6 +183,7 @@ sub calcOverallWER
 	{
 		return $self->{'sysoutWER'}->{$sysname}->{$factorName}->[0];
 	}
+	warn "calcing WER\n";
 	
 	$self->ensureFilenameDefined('truth');
 	$self->ensureFilenameDefined($sysname);
@@ -201,6 +213,7 @@ sub calcOverallPWER
 	{
 		return $self->{'sysoutPWER'}->{$sysname}->{$factorName}->[0];
 	}
+	warn "calcing PWER\n";
 	
 	$self->ensureFilenameDefined('truth');
 	$self->ensureFilenameDefined($sysname);
@@ -228,6 +241,7 @@ sub calcBLEU
 	{
 		return $self->{'bleuScores'}->{$sysname}->{$factorName};
 	}
+	warn "calcing BLEU\n";
 	
 	$self->ensureFilenameDefined('truth');
 	$self->ensureFilenameDefined($sysname);
@@ -280,6 +294,13 @@ sub statisticallyTestBLEUResults
 {
 	my ($self, $sysname, $factorName) = (shift, shift, 'surf');
 	if(scalar(@_) > 0) {$factorName = shift;}
+	#check in-memory cache first
+	if(exists $self->{'bleuConfidence'}->{$sysname} && exists $self->{'bleuConfidence'}->{$sysname}->{$factorName})
+	{
+		return $self->{'bleuConfidence'}->{$sysname}->{$factorName};
+	}
+	warn "performing consistency tests\n";
+	
 	my $k = 30; #HARDCODED NUMBER OF SUBSETS (WE DO k-FOLD CROSS-VALIDATION); IF YOU CHANGE THIS YOU MUST ALSO CHANGE getApproxPValue() and $criticalTStat
 	my $criticalTStat = 2.045; #hardcoded value given alpha (.025 here) and degrees of freedom (= $k - 1) ########################################
 	$self->ensureFilenameDefined('truth');
@@ -345,6 +366,13 @@ sub statisticallyTestBLEUResults
 sub calcPerplexity
 {
 	my ($self, $sysname, $factorName) = @_;
+	#check in-memory cache first
+	if(exists $self->{'perplexity'}->{$sysname} && exists $self->{'perplexity'}->{$sysname}->{$factorName})
+	{
+		return $self->{'perplexity'}->{$sysname}->{$factorName};
+	}
+	warn "calcing perplexity\n";
+	
 	$self->ensureFilenameDefined($sysname);
 	my $sysoutFilename;
 	if($sysname eq 'truth' || $sysname eq 'input') {$sysoutFilename = $self->{"${sysname}Filename"};}
@@ -358,7 +386,8 @@ sub calcPerplexity
 	my @output = `./ngram -lm $lmFilename -ppl $tmpfile`; #run the SRI n-gram tool
 	`rm $tmpfile`;
 	$output[1] =~ /ppl1=\s*([0-9\.]+)/;
-	return $1;
+	$self->{'perplexity'}->{$sysname} = $1;
+	return $self->{'perplexity'}->{$sysname}->{$factorName};
 }
 
 #run a paired t test and a sign test on BLEU statistics for subsets of both systems' outputs
@@ -369,6 +398,14 @@ sub calcPerplexity
 sub statisticallyCompareSystemResults
 {
 	my ($self, $sysname1, $sysname2, $factorName) = @_;
+	#check in-memory cache first
+	if(exists $self->{'comparisonStats'}->{$sysname1} && exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2} 
+		&& exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName})
+	{
+		return $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName};
+	}
+	warn "comparing sysoutputs\n";
+	
 	$self->ensureFilenameDefined($sysname1);
 	$self->ensureFilenameDefined($sysname2);
 	$self->ensureFactorPosDefined($factorName);
@@ -470,15 +507,17 @@ sub writeCacheFile
 
 	#store file changetimes to disk
 	print CACHEFILE "File changetimes\n";
-	my $writeCtime = sub
+	my $ensureCtimeIsOutput = sub
 	{
 		my $ext = shift;
-		print CACHEFILE $self->{'corpusName'} . ".$ext " . time . "\n";
+		#check for a previously read value
+		if(exists $self->{'fileCtimes'}->{$ext}) {print CACHEFILE $self->{'corpusName'} . ".$ext " . $self->{'fileCtimes'}->{$ext} . "\n";}
+		else {print CACHEFILE $self->{'corpusName'} . ".$ext " . time . "\n";}
 	};
-	if(exists $self->{'truthFilename'}) {&$writeCtime('e');}
-	if(exists $self->{'inputFilename'}) {&$writeCtime('f');}
-	foreach my $factorName (keys %{$self->{'phraseTableFilenames'}}) {&$writeCtime("pt_$factorName");}
-	foreach my $sysname (keys %{$self->{'sysoutFilenames'}}) {&$writeCtime($sysname);}
+	if(exists $self->{'truthFilename'}) {&$ensureCtimeIsOutput('e');}
+	if(exists $self->{'inputFilename'}) {&$ensureCtimeIsOutput('f');}
+	foreach my $factorName (keys %{$self->{'phraseTableFilenames'}}) {&$ensureCtimeIsOutput("pt_$factorName");}
+	foreach my $sysname (keys %{$self->{'sysoutFilenames'}}) {&$ensureCtimeIsOutput($sysname);}
 	#store bleu scores to disk
 	print CACHEFILE "\nBLEU scores\n";
 	foreach my $sysname (keys %{$self->{'bleuScores'}})
@@ -515,7 +554,7 @@ sub writeCacheFile
 		{
 			foreach my $factorName (keys %{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}})
 			{
-				print CACHEFILE "$sysname1 $sysname2 $factorName " . join('; ', @{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}}) . "\n";
+				print CACHEFILE "$sysname1 $sysname2 $factorName " . join('; ', map {join(' ', @$_)} @{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}}) . "\n";
 			}
 		}
 	}
@@ -547,8 +586,21 @@ sub writeCacheFile
 	};
 	&$printWERFunc('sysoutWER');
 	&$printWERFunc('sysoutPWER');
-	#store misc scores to disk
-	print CACHEFILE "\n";
+	#store corpus perplexities to disk
+	print CACHEFILE "\nPerplexity\n";
+	foreach my $sysname (keys %{$self->{'perplexity'}})
+	{
+		foreach my $factorName (keys %{$self->{'perplexity'}->{$sysname}})
+		{
+			print CACHEFILE "$sysname $factorName " . $self->{'perplexity'}->{$sysname}->{$factorName} . "\n";
+		}
+	}
+	print "\nNN/ADJ WER/PWER\n";
+	foreach my $sysname (keys %{$self->{'nnAdjWERPWER'}})
+	{
+		print CACHEFILE "$sysname " . join(' ', @{$self->{'nnAdjWERPWER'}->{$sysname}}) . "\n";
+	}
+	print "\n";
 	close(CACHEFILE);
 }
 
@@ -575,6 +627,8 @@ sub loadCacheFile
 		elsif($line eq "Statistical comparisons\n") {$mode = 'cmp';}
 		elsif($line eq "Unknown-token counts\n") {$mode = 'unk';}
 		elsif($line eq "WER scores") {$mode = 'wer';}
+		elsif($line eq "Perplexity") {$mode = 'ppl';}
+		elsif($line eq "NN/ADJ WER/PWER") {$mode = 'nawp';}
 		#get data when in a mode already
 		elsif($mode eq 'ctime')
 		{
@@ -608,7 +662,7 @@ sub loadCacheFile
 			if(!exists $self->{'comparisonStats'}->{$sysname1}) {$self->{'comparisonStats'}->{$sysname1} = {};}
 			if(!exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}) {$self->{'comparisonStats'}->{$sysname1}->{$sysname2} = {};}
 			if(!exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}) {$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName} = [];}
-			my @stats = split(/;/, $rest);
+			my @stats = map {my @x = split(' ', $_); \@x} split(/;/, $rest);
 			$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName} = \@stats;
 		}
 		elsif($mode eq 'unk')
@@ -636,6 +690,17 @@ sub loadCacheFile
 				$self->{$werType}->{$sysname}->{$factorName}->[2] = \@indices;
 			}
 		}
+		elsif($mode eq 'ppl')
+		{
+			local ($sysname, $factorName, $perplexity) = split(/\s+/, $line);
+			if(!exists $self->{'perplexity'}->{$sysname}) {$self->{'perplexity'}->{$sysname} = {};}
+			$self->{'perplexity'}->{$sysname}->{$factorName} = $perplexity;
+		}
+		elsif($mode eq 'nawp')
+		{
+			local ($sysname, @scores) = split(/\s+/, $line);
+			$self->{'nnAdjWERPWER'}->{$sysname} = \@scores;
+		}
 	}
 	close(CACHEFILE);
 }
author	eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230>	2006-08-15 02:18:54 +0400
committer	eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230>	2006-08-15 02:18:54 +0400
commit	1374aefc6db277f062f2eddae74bd6079b206cc9 (patch)
tree	6744b9377a8d14d1550b0dce6118a7d12bfac0de /scripts/analysis
parent	91521bd911174ae71eaa9136f4c333e5daa729e3 (diff)