diff options
author | eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230> | 2006-08-15 02:18:54 +0400 |
---|---|---|
committer | eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230> | 2006-08-15 02:18:54 +0400 |
commit | 1374aefc6db277f062f2eddae74bd6079b206cc9 (patch) | |
tree | 6744b9377a8d14d1550b0dce6118a7d12bfac0de /scripts/analysis | |
parent | 91521bd911174ae71eaa9136f4c333e5daa729e3 (diff) |
- fixed caching behavior of Corpus to remove gibberish and cache everything
- fixed javascript sorting in sentence-by-sentence
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@735 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'scripts/analysis')
-rwxr-xr-x | scripts/analysis/sentence-by-sentence.pl | 38 | ||||
-rw-r--r-- | scripts/analysis/smtgui/Corpus.pm | 89 |
2 files changed, 96 insertions, 31 deletions
diff --git a/scripts/analysis/sentence-by-sentence.pl b/scripts/analysis/sentence-by-sentence.pl index b6e179dd3..f265a07b8 100755 --- a/scripts/analysis/sentence-by-sentence.pl +++ b/scripts/analysis/sentence-by-sentence.pl @@ -64,7 +64,7 @@ while(my $sLine = <SYSOUT>) } my $bleuData = getBLEUSentenceDetails(\@sFactors, \@eFactors, 0); - push @bleuScores, [$i, $bleuData->[0]->[0], 0]; #the last number will be the rank + push @bleuScores, [$i, $bleuData->[0], 0]; #the last number will be the rank my $pwerData = getPWERSentenceDetails(\@sFactors, \@eFactors, 0); my $html = "<div class=\"sentence\" style=\"background-color: %%%%\" id=\"sentence$i\">"; #the %%%% and other tokens like it are flags to be replaced $html .= "<div class=\"bleu_report\"><b>Sentence $i) BLEU:</b> " . sprintf("%.4lg", $bleuData->[0]->[0]) . " (" . join('/', map {sprintf("%.4lg", $_)} @{$bleuData->[0]}[1 .. 4]) . ")</div><table>\n"; @@ -90,17 +90,17 @@ foreach my $truthfh (@TRUTHS) {close($truthfh);} rankSentencesByBLEU(\@bleuScores); my $stylesheet = <<EOHTML; <style type="text/css"> -.legend {background-color: #fff; border: 1px solid #000; padding: 2px; margin-bottom: 10px; margin-right: 15px} +.legend {background: #fff; border: 1px solid #000; padding: 2px; margin-bottom: 10px; margin-right: 15px} .legend_title {font-weight: bold; font-size: medium; text-decoration: underline} -div.sentence {background-color: #ffffee; border: 1px solid #000088; padding: 0px 8px 0px 8px} //entire composition for a given sentence +div.sentence {background: #ffffee; border: 1px solid #000088; padding: 0px 8px 0px 8px} //entire composition for a given sentence div.sentence td {margin: 8px 0px 8px 0px} div.bleu_report {margin-bottom: 5px} td.sent_title {font-weight: bold; font-size: medium; margin-bottom: 12px} -.source_sentence {background-color: #ffcccc; border: 1px solid #bbb} -.truth_sentence {background-color: #ccffcc; border: 1px solid #bbb} -.sysout_sentence {background-color: #ccccff; border: 1px solid #bbb} +.source_sentence {background: #ffcccc; border: 1px solid #bbb} +.truth_sentence {background: #ccffcc; border: 1px solid #bbb} +.sysout_sentence {background: #ccccff; border: 1px solid #bbb} table.sentence_table {border: none} -.sysout_ngrams {background-color: #fff; border: 1px solid #bbb} +.sysout_ngrams {background: #fff; border: 1px solid #bbb} table.ngram_table {} td.ngram_cell {padding: 1px} </style> @@ -117,7 +117,7 @@ function sortByBLEU() var body = document.getElementById('all_sentences'); var row;\n"; foreach my $rank (sort {$a <=> $b} keys %rank2index) { - print "\trow = body.getElementById('everything" . $rank2index{$rank} . "');\n"; + print "\trow = document.getElementById('everything" . $rank2index{$rank} . "');\n"; print "\tbody.removeChild(row); body.appendChild(row);\n"; } print "} @@ -126,7 +126,7 @@ function sortByCorpusOrder() var body = document.getElementById('all_sentences'); var row;\n"; for(my $j = 0; $j < scalar(@htmlSentences); $j++) { - print "\trow = body.getElementById('everything$j');\n"; + print "\trow = document.getElementById('everything$j');\n"; print "\tbody.removeChild(row); body.appendChild(row);\n"; } print "} @@ -138,20 +138,20 @@ my @maxBLEU = (-1e9) x scalar(@htmlColors); for(my $k = 0; $k < scalar(@htmlSentences); $k++) { my $tier = int($bleuScores[$k]->[2] / (scalar(@htmlSentences) / scalar(@htmlColors))); - if($bleuScores[$k]->[1] < $minBLEU[$tier]) {$minBLEU[$tier] = $bleuScores[$k]->[1];} - elsif($bleuScores[$k]->[1] > $maxBLEU[$tier]) {$maxBLEU[$tier] = $bleuScores[$k]->[1];} + if($bleuScores[$k]->[1]->[0] < $minBLEU[$tier]) {$minBLEU[$tier] = $bleuScores[$k]->[1]->[0];} + elsif($bleuScores[$k]->[1]->[0] > $maxBLEU[$tier]) {$maxBLEU[$tier] = $bleuScores[$k]->[1]->[0];} } -print "<table border=0><tr><td><div class=\"legend\"><span class=\"legend_title\">BLEU Ranges</span> (sentence backgrounds)<table border=0>"; +print "<table border=0><tr><td><div class=\"legend\"><span class=\"legend_title\">Sentence Background Colors => BLEU Ranges</span><table border=0>"; for(my $k = 0; $k < scalar(@htmlColors); $k++) { - print "<tr><td style=\"width: 15px; height: 15px; background-color: " . $htmlColors[$k] . "\"></td><td align=left style=\"padding-left: 12px\">" + print "<tr><td style=\"width: 15px; height: 15px; background: " . $htmlColors[$k] . "\"></td><td align=left style=\"padding-left: 12px\">" . sprintf("%.4lg", $minBLEU[$k]) . " - " . sprintf("%.4lg", $maxBLEU[$k]) . "</td>"; } print "</table></div></td>\n"; print "<td><div class=\"legend\"><span class=\"legend_title\">N-gram Colors => Number of Matching Reference Translations</span><table border=0>"; for(my $k = 1; $k <= scalar(@truthfiles); $k++) { - print "<tr><td style=\"width: 15px; height: 15px; background-color: " . getNgramColorHTML($k, scalar(@truthfiles)) . "\"></td><td align=left style=\"padding-left: 12px\">$k</td>"; + print "<tr><td style=\"width: 15px; height: 15px; background: " . getNgramColorHTML($k, scalar(@truthfiles)) . "\"></td><td align=left style=\"padding-left: 12px\">$k</td>"; } print "</table></div></td></tr></table><div style=\"font-weight: bold; margin-bottom: 15px\"> PWER errors are marked in red on output sentence displays.</div> @@ -335,14 +335,14 @@ sub getPWERSentenceDetails } #assign ranks to sentences by BLEU score -#arguments: arrayref of arrayrefs of [sentence index, bleu score, rank to be assigned] +#arguments: arrayref of arrayrefs of [sentence index, arrayref of [bleu score, n-gram precisions], rank to be assigned] #return: none sub rankSentencesByBLEU { my $bleuData = shift; my $i = 0; - #sort first on score, secondarily on sentence index - foreach my $sentenceData (reverse sort {my $c = $a->[1] <=> $b->[1]; if($c == 0) {$a->[0] cmp $b->[0];} else {$c;}} @$bleuData) {$sentenceData->[2] = $i++;} + #sort first on score, then on 1-gram accuracy, then on sentence index + foreach my $sentenceData (reverse sort {my $c = $a->[1]->[0] <=> $b->[1]->[0]; if($c == 0) {my $d = $a->[1]->[1] <=> $b->[1]->[1]; if($d == 0) {$a->[0] cmp $b->[0];} else {$d;}} else {$c;}} @$bleuData) {$sentenceData->[2] = $i++;} } ############################################################################################################################################################### @@ -364,7 +364,7 @@ sub getFactoredSentenceHTML return $html . "</tr></table>"; } -#arguments: arrayref of [sentence index, bleu score, rank], number of sentences +#arguments: arrayref of [sentence index, arrayref of [bleu score, n-gram precisions], rank], number of sentences #return: HTML color string sub getSentenceBGColorHTML { @@ -419,7 +419,7 @@ sub getAllNgramsHTML foreach my $ngram (sort {my $c = $a->[3] <=> $b->[3]; if($c == 0) {$a->[0] <=> $b->[0]} else {$c}} @$ngrams) #sort by row, then word num { while($ngram->[0] > $curCol || $ngram->[3] > $curRow) {$html .= "<td></td>"; $curCol = ($curCol + 1) % $numWords; if($curCol == 0) {$html .= "</tr><tr>"; $curRow++;}} - $html .= "<td colspan=" . $ngram->[1] . " align=center class=\"ngram_cell\" style=\"background-color: " . getNgramColorHTML(scalar(@{$ngram->[2]}), $numTruths) . "\">" . join(' ', map {$_->[$factorIndex]} @{$sentence}[$ngram->[0] .. $ngram->[0] + $ngram->[1] - 1]) . "</td>"; + $html .= "<td colspan=" . $ngram->[1] . " align=center class=\"ngram_cell\" style=\"background: " . getNgramColorHTML(scalar(@{$ngram->[2]}), $numTruths) . "\">" . join(' ', map {$_->[$factorIndex]} @{$sentence}[$ngram->[0] .. $ngram->[0] + $ngram->[1] - 1]) . "</td>"; $curCol = ($curCol + $ngram->[1]) % $numWords; if($curCol == 0) {$html .= "</tr><tr>"; $curRow++;} } $html .= "</tr>"; diff --git a/scripts/analysis/smtgui/Corpus.pm b/scripts/analysis/smtgui/Corpus.pm index 0804cac7f..5a2753fdf 100644 --- a/scripts/analysis/smtgui/Corpus.pm +++ b/scripts/analysis/smtgui/Corpus.pm @@ -56,6 +56,8 @@ sub new $self->{'unknownCount'} = {}; #factor name => count of unknown tokens in input $self->{'sysoutWER'} = {}; #system name => (factor name => arrayref with system output total WER and arrayref of WER scores for individual sysout sentences wrt truth) $self->{'sysoutPWER'} = {}; #similarly + $self->{'nnAdjWERPWER'} = {}; #system name => arrayref of [normalized WER, normalized PWER] + $self->{'perplexity'} = {}; #system name => (factor name => perplexity raw score) $self->{'fileDescriptions'} = {}; #filename associated with us => string description of file $self->{'bleuScores'} = {}; #system name => (factor name => arrayref of (overall score, arrayref of per-sentence scores) ) $self->{'bleuConfidence'} = {}; #system name => (factor name => arrayrefs holding statistical test data on BLEU scores) @@ -101,6 +103,7 @@ sub calcUnknownTokens { return ($self->{'unknownCount'}->{$factorName}, $self->{'tokenCount'}->{'input'}); } + warn "calcing unknown tokens\n"; $self->ensureFilenameDefined('input'); $self->ensurePhraseTableDefined($factorName); @@ -134,6 +137,13 @@ sub calcUnknownTokens sub calcNounAdjWER_PWERDiff { my ($self, $sysname) = @_; + #check in-memory cache first + if(exists $self->{'nnAdjWERPWER'}->{$sysname}) + { + return @{$self->{'nnAdjWERPWER'}->{$sysname}}; + } + warn "calcing NN/JJ PWER/WER\n"; + $self->ensureFilenameDefined('truth'); $self->ensureFilenameDefined($sysname); $self->ensureFactorPosDefined('surf'); @@ -156,7 +166,8 @@ sub calcNounAdjWER_PWERDiff #unhog memory $self->releaseSentences('truth'); $self->releaseSentences($sysname); - return ($werScore / $self->{'tokenCount'}->{'truth'}, $pwerScore / $self->{'tokenCount'}->{'truth'}); + $self->{'nnAdjWERPWER'}->{$sysname} = [$werScore / $self->{'tokenCount'}->{'truth'}, $pwerScore / $self->{'tokenCount'}->{'truth'}]; + return @{$self->{'nnAdjWERPWER'}->{$sysname}}; } #calculate detailed WER statistics and put them into $self @@ -172,6 +183,7 @@ sub calcOverallWER { return $self->{'sysoutWER'}->{$sysname}->{$factorName}->[0]; } + warn "calcing WER\n"; $self->ensureFilenameDefined('truth'); $self->ensureFilenameDefined($sysname); @@ -201,6 +213,7 @@ sub calcOverallPWER { return $self->{'sysoutPWER'}->{$sysname}->{$factorName}->[0]; } + warn "calcing PWER\n"; $self->ensureFilenameDefined('truth'); $self->ensureFilenameDefined($sysname); @@ -228,6 +241,7 @@ sub calcBLEU { return $self->{'bleuScores'}->{$sysname}->{$factorName}; } + warn "calcing BLEU\n"; $self->ensureFilenameDefined('truth'); $self->ensureFilenameDefined($sysname); @@ -280,6 +294,13 @@ sub statisticallyTestBLEUResults { my ($self, $sysname, $factorName) = (shift, shift, 'surf'); if(scalar(@_) > 0) {$factorName = shift;} + #check in-memory cache first + if(exists $self->{'bleuConfidence'}->{$sysname} && exists $self->{'bleuConfidence'}->{$sysname}->{$factorName}) + { + return $self->{'bleuConfidence'}->{$sysname}->{$factorName}; + } + warn "performing consistency tests\n"; + my $k = 30; #HARDCODED NUMBER OF SUBSETS (WE DO k-FOLD CROSS-VALIDATION); IF YOU CHANGE THIS YOU MUST ALSO CHANGE getApproxPValue() and $criticalTStat my $criticalTStat = 2.045; #hardcoded value given alpha (.025 here) and degrees of freedom (= $k - 1) ######################################## $self->ensureFilenameDefined('truth'); @@ -345,6 +366,13 @@ sub statisticallyTestBLEUResults sub calcPerplexity { my ($self, $sysname, $factorName) = @_; + #check in-memory cache first + if(exists $self->{'perplexity'}->{$sysname} && exists $self->{'perplexity'}->{$sysname}->{$factorName}) + { + return $self->{'perplexity'}->{$sysname}->{$factorName}; + } + warn "calcing perplexity\n"; + $self->ensureFilenameDefined($sysname); my $sysoutFilename; if($sysname eq 'truth' || $sysname eq 'input') {$sysoutFilename = $self->{"${sysname}Filename"};} @@ -358,7 +386,8 @@ sub calcPerplexity my @output = `./ngram -lm $lmFilename -ppl $tmpfile`; #run the SRI n-gram tool `rm $tmpfile`; $output[1] =~ /ppl1=\s*([0-9\.]+)/; - return $1; + $self->{'perplexity'}->{$sysname} = $1; + return $self->{'perplexity'}->{$sysname}->{$factorName}; } #run a paired t test and a sign test on BLEU statistics for subsets of both systems' outputs @@ -369,6 +398,14 @@ sub calcPerplexity sub statisticallyCompareSystemResults { my ($self, $sysname1, $sysname2, $factorName) = @_; + #check in-memory cache first + if(exists $self->{'comparisonStats'}->{$sysname1} && exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2} + && exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}) + { + return $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}; + } + warn "comparing sysoutputs\n"; + $self->ensureFilenameDefined($sysname1); $self->ensureFilenameDefined($sysname2); $self->ensureFactorPosDefined($factorName); @@ -470,15 +507,17 @@ sub writeCacheFile #store file changetimes to disk print CACHEFILE "File changetimes\n"; - my $writeCtime = sub + my $ensureCtimeIsOutput = sub { my $ext = shift; - print CACHEFILE $self->{'corpusName'} . ".$ext " . time . "\n"; + #check for a previously read value + if(exists $self->{'fileCtimes'}->{$ext}) {print CACHEFILE $self->{'corpusName'} . ".$ext " . $self->{'fileCtimes'}->{$ext} . "\n";} + else {print CACHEFILE $self->{'corpusName'} . ".$ext " . time . "\n";} }; - if(exists $self->{'truthFilename'}) {&$writeCtime('e');} - if(exists $self->{'inputFilename'}) {&$writeCtime('f');} - foreach my $factorName (keys %{$self->{'phraseTableFilenames'}}) {&$writeCtime("pt_$factorName");} - foreach my $sysname (keys %{$self->{'sysoutFilenames'}}) {&$writeCtime($sysname);} + if(exists $self->{'truthFilename'}) {&$ensureCtimeIsOutput('e');} + if(exists $self->{'inputFilename'}) {&$ensureCtimeIsOutput('f');} + foreach my $factorName (keys %{$self->{'phraseTableFilenames'}}) {&$ensureCtimeIsOutput("pt_$factorName");} + foreach my $sysname (keys %{$self->{'sysoutFilenames'}}) {&$ensureCtimeIsOutput($sysname);} #store bleu scores to disk print CACHEFILE "\nBLEU scores\n"; foreach my $sysname (keys %{$self->{'bleuScores'}}) @@ -515,7 +554,7 @@ sub writeCacheFile { foreach my $factorName (keys %{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}}) { - print CACHEFILE "$sysname1 $sysname2 $factorName " . join('; ', @{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}}) . "\n"; + print CACHEFILE "$sysname1 $sysname2 $factorName " . join('; ', map {join(' ', @$_)} @{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}}) . "\n"; } } } @@ -547,8 +586,21 @@ sub writeCacheFile }; &$printWERFunc('sysoutWER'); &$printWERFunc('sysoutPWER'); - #store misc scores to disk - print CACHEFILE "\n"; + #store corpus perplexities to disk + print CACHEFILE "\nPerplexity\n"; + foreach my $sysname (keys %{$self->{'perplexity'}}) + { + foreach my $factorName (keys %{$self->{'perplexity'}->{$sysname}}) + { + print CACHEFILE "$sysname $factorName " . $self->{'perplexity'}->{$sysname}->{$factorName} . "\n"; + } + } + print "\nNN/ADJ WER/PWER\n"; + foreach my $sysname (keys %{$self->{'nnAdjWERPWER'}}) + { + print CACHEFILE "$sysname " . join(' ', @{$self->{'nnAdjWERPWER'}->{$sysname}}) . "\n"; + } + print "\n"; close(CACHEFILE); } @@ -575,6 +627,8 @@ sub loadCacheFile elsif($line eq "Statistical comparisons\n") {$mode = 'cmp';} elsif($line eq "Unknown-token counts\n") {$mode = 'unk';} elsif($line eq "WER scores") {$mode = 'wer';} + elsif($line eq "Perplexity") {$mode = 'ppl';} + elsif($line eq "NN/ADJ WER/PWER") {$mode = 'nawp';} #get data when in a mode already elsif($mode eq 'ctime') { @@ -608,7 +662,7 @@ sub loadCacheFile if(!exists $self->{'comparisonStats'}->{$sysname1}) {$self->{'comparisonStats'}->{$sysname1} = {};} if(!exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}) {$self->{'comparisonStats'}->{$sysname1}->{$sysname2} = {};} if(!exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}) {$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName} = [];} - my @stats = split(/;/, $rest); + my @stats = map {my @x = split(' ', $_); \@x} split(/;/, $rest); $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName} = \@stats; } elsif($mode eq 'unk') @@ -636,6 +690,17 @@ sub loadCacheFile $self->{$werType}->{$sysname}->{$factorName}->[2] = \@indices; } } + elsif($mode eq 'ppl') + { + local ($sysname, $factorName, $perplexity) = split(/\s+/, $line); + if(!exists $self->{'perplexity'}->{$sysname}) {$self->{'perplexity'}->{$sysname} = {};} + $self->{'perplexity'}->{$sysname}->{$factorName} = $perplexity; + } + elsif($mode eq 'nawp') + { + local ($sysname, @scores) = split(/\s+/, $line); + $self->{'nnAdjWERPWER'}->{$sysname} = \@scores; + } } close(CACHEFILE); } |