#!/usr/bin/perl -w # $Id$ use strict; use CGI; use Corpus; #Evan's code use Error qw(:try); #files with extensions other than these are interpreted as system translations; see the file 'file-descriptions', if it exists, for the comments that go with them my %FILETYPE = ('e' => 'Reference Translation', 'f' => 'Foreign Original', 'ref.sgm' => 'Reference Translations', 'e.sgm' => 'Reference Translations', 'src.sgm' => 'Foreign Originals', 'f.sgm' => 'Foreign Originals'); my %DONTSCORE = ('f' => 1, 'f.sgm' => 1, 'src.sgm' => 1, 'e' => 1, 'e.sgm' => 1, 'ref.sgm' => 1); my @SHOW = ('f', 'e', 'comm'); my %SHOW_COLOR = ('f' => "BLUE", 'e' => "GREEN"); my $FOREIGN = 'f'; #FILEDESC: textual descriptions associated with specific filenames; to be displayed on the single-corpus view my %FILEDESC = (); &load_descriptions(); my %factorData = loadFactorData('file-factors'); my %MEMORY; &load_memory(); my (@mBLEU,@NIST); @mBLEU=`cat mbleu-memory.dat` if -e "mbleu-memory.dat"; chop(@mBLEU); @NIST = `cat nist-memory.dat` if -e "nist-memory.dat"; chop(@NIST); my %in; &ReadParse(); #parse arguments if (scalar(@ARGV) > 0 && $ARGV[0] eq 'bleu') { $in{CORPUS} = $ARGV[1]; $in{ACTION} = "VIEW_CORPUS"; } my %MULTI_REF; if ($in{CORPUS} && -e "$in{CORPUS}.ref.sgm") { my $sysid; open(REF,"$in{CORPUS}.ref.sgm"); while() { $sysid = $1 if /]*> *(\S.+\S) *<\/seg>/) { push @{$MULTI_REF{$sysid}}, $1; } } close(REF); } if ($in{ACTION} eq '') { &show_corpora(); } elsif ($in{ACTION} eq 'VIEW_CORPUS') { &view_corpus(); } elsif ($in{ACTION} eq 'SCORE_FILE') { &score_file(); } elsif ($in{ACTION} eq 'RESCORE_FILE') { &score_file(); } elsif ($in{ACTION} eq 'COMPARE') { &compare(); } else { &htmlhead("Unknown Action $in{ACTION}"); } print "\n"; ###### SHOW CORPORA IN EVALUATION DIRECTORY sub show_corpora { my %CORPUS = (); # find corpora in evaluation directory: see the factor-index file, which was already read in foreach my $corpusName (keys %factorData) { $CORPUS{$corpusName} = 1; } # list corpora &htmlhead("All Corpora"); print "\n"; } ###### SHOW INFORMATION FOR ONE CORPUS sub view_corpus { my @TABLE; &htmlhead("View Corpus $in{CORPUS}"); # find corpora in evaluation directory my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-info_line' => $factorData{$in{CORPUS}}); # $corpus->printDetails(); #debugging info my ($sentence_count, $lineInfo); if(-e "$in{CORPUS}.f") { $lineInfo = `wc -l $in{CORPUS}.f`; $lineInfo =~ /^\s*(\d+)\s+/; $sentence_count = 0 + $1; } else { $lineInfo = `wc -l $in{CORPUS}.e`; $lineInfo =~ /^\s*(\d+)\s+/; $sentence_count = 0 + $1; } print "Corpus '$in{CORPUS}' consists of $sentence_count sentences\n"; print "(with mBLEU)" if ((!defined($in{mBLEU})) && (scalar keys %MEMORY) && -e "$in{CORPUS}.e" && -e "$in{CORPUS}.f"); print "

\n"; print "

\n"; print "\n"; print "\n"; print ""; if (-e "$in{CORPUS}.e") { print ""; } if (-e "$in{CORPUS}.ref.sgm" && -e "$in{CORPUS}.src.sgm") { print ""; if (! -e "$in{CORPUS}.e") { print ""; } } if ($in{mBLEU} && (scalar keys %MEMORY) && -e "$in{CORPUS}.e" && -e "$in{CORPUS}.f") { print ""; } print ""; #can't sort on; only applies to the input print ""; #applies to truth and system outputs print ""; print ""; #can't sort on; only applies to sysoutputs print ""; #can't sort on; only applies to sysoutputs print ""; opendir(DIR, ".") or die "couldn't open '.' for read"; my @filenames = readdir(DIR); #includes . and .. closedir(DIR); foreach $_ (@filenames) { next if -d $_; #if is a directory my $sgm = 0; if (/.sgm$/) { `grep '"; # README if (-e "$in{CORPUS}.$file.README") { my $readme = `cat $in{CORPUS}.$file.README`; $readme =~ s/([\"\'])/\\\"/g; $readme =~ s/[\n\r]/\\n/g; $readme =~ s/\t/\\t/g; $row .= ""; } # filename $row .= "$file"; # description (hard-coded) my @TRANSLATION_SENTENCE = `cat $in{CORPUS}.$file`; chop(@TRANSLATION_SENTENCE); #count sentences that contain null words my $null_count = 0; foreach (@TRANSLATION_SENTENCE) { $null_count++ if /^NULL$/ || /^NONE$/; } if ($null_count > 0) { $row .= "$null_count NULL "; } $row .= " (".$FILETYPE{$file}.")" if defined($FILETYPE{$file}); $row .= " (".$FILEDESC{$in{CORPUS}.".".$file}.")" if defined($FILEDESC{$in{CORPUS}.".".$file}); $row .= " (".$FILEDESC{$file}.")" if defined($FILEDESC{$file}); # filedate my @STAT = stat("$in{CORPUS}.$file"); my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($STAT[8]); #STAT[8] should be last modify time my $time = sprintf("%04d-%02d-%02d %02d:%02d:%02d",$year+1900,$mon+1,$mday,$hour,$min,$sec); $row .= "\n\n"; if (defined($in{SORT}) && $in{SORT} eq 'TIME') { $sort = $time; } # IBM BLEU score my $no_bleu =0; if (!$sgm && -e "$in{CORPUS}.e") { $row .= "\n"; } else { $no_bleu=1; } # NIST score if (-e "$in{CORPUS}.ref.sgm" && -e "$in{CORPUS}.src.sgm" && !$DONTSCORE{$file}) { $row .= "\n"; if ($no_bleu) { $row .= "\n"; } } # multi-bleu if ($in{mBLEU} && (scalar keys %MEMORY) && -e "$in{CORPUS}.e") { $row .= "\n"; } my $isSystemOutput = ($file ne 'e' && $file ne 'f' && $file !~ /^pt/); # misc stats (note the unknown words should come first so the total word count is available for WER) $row .= "\n\n\n\n\n\n"; # correct sentence score my($correct,$wrong,$unknown); $row .= "\n"; if ($in{SORT} eq 'SCORE') { $sort = sprintf("%03d %04d",$correct,$just_syn+$just_sem); } } else { $row .= "\n"; } $row .= "\n"; push @TABLE, "\n$row"; } close(DIR); foreach (reverse sort @TABLE) { print $_; } print "
File (sort) Date (sort)IBM BLEU (sort)NIST (sort)BLEU (sort)mBLEU (sort)Unknown WordsPerplexityWER (sort)Noun & adj WER-PWERSurface vs. lemma PWERStatistical Measures".$time.""; if (!defined($DONTSCORE{$file}) && $file !~ /^f$/ && $file ne "e" && $file !~ /^pt/) { my ($score,$p1,$p2,$p3,$p4,$bp) = $corpus->calcBLEU($file, 'surf'); print STDERR "193: `$score `$p1 `$p2 `$p3 `$p4 `$bp\n"; $row .= sprintf("%.04f %.01f/%.01f/%.01f/%.01f *%.03f", $score, $p1, $p2, $p3, $p4, $bp); if (defined($in{SORT}) && $in{SORT} eq 'IBM') { $sort = $score; } } $row .= ""; print "$DONTSCORE{$file}+"; my ($nist,$nist_bleu); if ($file =~ /sgm$/) { ($nist,$nist_bleu) = get_nist_score("$in{CORPUS}.ref.sgm","$in{CORPUS}.src.sgm","$in{CORPUS}.$file"); $row .= sprintf("%.04f",$nist); if ($in{SORT} eq 'NIST') { $sort = $nist; } } $row .= ""; if ($file =~ /sgm$/) { $row .= sprintf("%.04f",$nist_bleu); if ($in{SORT} eq 'BLEU') { $sort = $nist_bleu; } } $row .= ""; if (!defined($DONTSCORE{$file}) && $file !~ /^f$/ && $file ne "e") { my ($score,$p1,$p2,$p3,$p4,$bp) = get_multi_bleu_score("$in{CORPUS}.f","$in{CORPUS}.e","$in{CORPUS}.$file"); $row .= sprintf("%.04f %.01f/%.01f/%.01f/%.01f *%.03f",$score,$p1,$p2,$p3,$p4,$bp); if ($in{SORT} eq 'mBLEU') { $sort = $score; } } $row .= ""; if($file eq 'f') #input { try { my ($unknownCount, $totalCount) = calc_unknown_words($corpus, 'surf'); $row .= sprintf("%.4lf (%d / %d)", $unknownCount / $totalCount, $unknownCount, $totalCount); } catch Error::Simple with {$row .= "[system error]";}; } $row .= ""; if($file eq 'e' || $file eq 'f' || $isSystemOutput) { try { my $perplexity = $corpus->calcPerplexity(($file eq 'e') ? 'truth' : (($file eq 'f') ? 'input' : $file), 'surf'); $row .= sprintf("%.2lf", $perplexity); } catch Error::Simple with {$row .= "[system error]";} } $row .= ""; if($isSystemOutput) { try { my $surfaceWER = $corpus->calcOverallWER($file); $row .= sprintf("%.4lf", $surfaceWER); } catch Error::Simple with {$row .= "[system error]";}; } $row .= ""; my ($nnAdjWER, $nnAdjPWER, $surfPWER, $lemmaPWER); if($isSystemOutput) { try { ($nnAdjWER, $nnAdjPWER, $surfPWER, $lemmaPWER) = calc_misc_stats($corpus, $file); $row .= sprintf("WER = %.4lg
PWER = %.4lg
ratio = %.3lf", $nnAdjWER, $nnAdjPWER, $nnAdjPWER / $nnAdjWER); } catch Error::Simple with {$row .= "[system error]";}; } $row .= "
"; if($isSystemOutput) { if($surfPWER == -1) { $row .= "[system error]"; } else { my ($lemmaBLEU, $p1, $p2, $p3, $p4, $brevity) = $corpus->calcBLEU($file, 'lemma'); $row .= sprintf("surface = %.3lf
lemma = %.3lf
lemma BLEU = %.04f %.01f/%.01f/%.01f/%.01f *%.03f", $surfPWER, $lemmaPWER, $lemmaBLEU, $p1, $p2, $p3, $p4, $brevity); } } $row .= "
"; if($isSystemOutput) { try { my $testInfo = $corpus->statisticallyTestBLEUResults($file, 'surf'); my @tTestPValues = @{$testInfo->[0]}; my @confidenceIntervals = @{$testInfo->[1]}; $row .= "n-gram precision p-values (high p <=> consistent score):
t test " . join("/", map {sprintf("%.4lf", $_)} @tTestPValues); $row .= "

n-gram precision 95% intervals:
" . join(",
", map {sprintf("[%.4lf - %.4lf]", $_->[0], $_->[1])} @confidenceIntervals); my @bleuInterval = (approxBLEUFromNgramScores(map {$_->[0]} @confidenceIntervals), approxBLEUFromNgramScores(map {$_->[1]} @confidenceIntervals)); $row .= sprintf("
(BLEU: ~[%.4lf - %.4lf])", $bleuInterval[0], $bleuInterval[1]); } catch Error::Simple with {$row .= "[system error]";} } $row .= "

"; if (!defined($DONTSCORE{$file}) && (scalar keys %MEMORY)) { my ($correct,$just_syn,$just_sem,$wrong,$unknown) = get_score_from_memory("$in{CORPUS}.$FOREIGN", "$in{CORPUS}.$file"); $row .= "$correct"; $row .= "/$just_syn"; $row .= "/$just_sem"; $row .= "/$wrong ($unknown)
\n"; print "\n"; print " Compare all different sentences (instead of just differently evaluated sentences) with evaluation

\n"; print "

The score is to be read as: correct/just-syn-correct/just-sem-correct/wrong (unscored)\n"; print "
IBM BLEU is to be read as: metric unigram/bigram/trigram/quadgram *brevity-penalty

"; print "

"; print "

Comparison of System Translations (p-values)

"; my @sysnames = $corpus->getSystemNames(); for(my $i = 0; $i < scalar(@sysnames); $i++) { for(my $j = $i + 1; $j < scalar(@sysnames); $j++) { my $comparison = $corpus->statisticallyCompareSystemResults($sysnames[$i], $sysnames[$j], 'surf'); print "

" . $sysnames[$i] . " vs. " . $sysnames[$j] . ": [t test] "; for(my $k = 0; $k < scalar(@{$comparison->[0]}); $k++) { print sprintf(($k == 0) ? "%.4lg" : "; %.4lg ", $comparison->[0]->[$k]); if($comparison->[1]->[$k] == 0) {print "(←)";} else {print "(→)";} } print "  ---  [sign test] "; for(my $k = 0; $k < scalar(@{$comparison->[2]}); $k++) { print sprintf(($k == 0) ? "%.4lg " : "; %.4lg ", $comparison->[2]->[$k]); if($comparison->[3]->[$k] == 0) {print "(←)";} else {print "(→)";} } print "\n"; } } print "All corpora\n"; } ###### SCORE TRANSLATIONS sub score_file { if ($in{VIEW}) { &htmlhead("View Translations"); } else { &htmlhead("Score Translations"); } print "View Corpus $in{CORPUS}

\n"; print "

\n"; print "\n"; print "\n"; print "\n"; # get sentences my @SENTENCES; if ($in{FILE} =~ /.sgm$/) { @SENTENCES = `grep ']+> *(\S.+\S) *<\/seg> *$/$1/; } } else { @SENTENCES = `cat $in{CORPUS}.$in{FILE}`; chop(@SENTENCES); } my %REFERENCE; foreach (@SHOW) { if (-e "$in{CORPUS}.$_") { @{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}}); } } # update memory foreach (keys %in) { next unless /^SYN_SCORE_(\d+)$/; next unless $in{"SEM_SCORE_$1"}; &store_in_memory($REFERENCE{$FOREIGN}[$1], $SENTENCES[$1], "syn_".$in{"SYN_SCORE_$1"}." sem_".$in{"SEM_SCORE_$1"}); } # display sentences for(my $i=0;$i<=$#SENTENCES;$i++) { my $evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES[$i]); next if ($in{ACTION} eq 'SCORE_FILE' && ! $in{VIEW} && $evaluation ne '' && $evaluation ne 'wrong'); print "

Sentence ".($i+1).":
\n"; # color coding &color_highlight_ngrams($i,&nist_normalize_text($SENTENCES[$i]),$REFERENCE{"e"}[$i]); if (%MULTI_REF) { foreach my $sysid (keys %MULTI_REF) { print "".$MULTI_REF{$sysid}[$i]." (Reference $sysid)
\n"; } } # all sentences print "$SENTENCES[$i] (System output)
\n"; foreach my $ref (@SHOW) { if (-e "$in{CORPUS}.$ref") { print "".$REFERENCE{$ref}[$i]." (".$FILETYPE{$ref}.")
\n" if $REFERENCE{$ref}[$i]; } } if (! $in{VIEW}) { print " perfect English\n"; print " imperfect English
\n"; print " correct meaning\n"; print " incorrect meaning\n"; } } if (! $in{VIEW}) { print "

\n"; print "

\n"; } } sub color_highlight_ngrams { my($i,$sentence,$single_reference) = @_; my @REF = (); my %NGRAM = (); if (%MULTI_REF) { foreach my $sysid (keys %MULTI_REF) { push @REF,&nist_normalize_text($MULTI_REF{$sysid}[$i]); } } elsif ($single_reference) { @REF = ($single_reference); } if (@REF) { foreach my $ref (@REF) { my @WORD = split(/\s+/,$ref); for(my $n=1;$n<=4;$n++) { for(my $w=0;$w<=$#WORD-($n-1);$w++) { my $ngram = "$n: "; for(my $j=0;$j<$n;$j++) { $ngram .= $WORD[$w+$j]." "; } $NGRAM{$ngram}++; } } } $sentence =~ s/^\s+//; $sentence =~ s/\s+/ /; $sentence =~ s/\s+$//; my @WORD = split(/\s+/,$sentence); my @CORRECT; for(my $w=0;$w<=$#WORD;$w++) { $CORRECT[$w] = 0; } for(my $n=1;$n<=4;$n++) { for(my $w=0;$w<=$#WORD-($n-1);$w++) { my $ngram = "$n: "; for(my $j=0;$j<$n;$j++) { $ngram .= $WORD[$w+$j]." "; } next unless defined($NGRAM{$ngram}) && $NGRAM{$ngram}>0; $NGRAM{$ngram}--; for(my $j=0;$j<$n;$j++) { $CORRECT[$w+$j] = $n; } } } my @COLOR; $COLOR[0] = "#FF0000"; $COLOR[1] = "#C000C0"; $COLOR[2] = "#0000FF"; $COLOR[3] = "#00C0C0"; $COLOR[4] = "#00C000"; for(my $w=0;$w<=$#WORD;$w++) { print "$WORD[$w]".$CORRECT[$w]." "; } print "\n
"; } } ###### OTHER STATS #print (in some unspecified way) the offending exception of type Error::Simple #arguments: the error object, a context string #return: none sub printError { my ($err, $context) = @_; warn "$context: " . $err->{'-text'} . " @ " . $err->{'-file'} . " (" .$err->{'-line'} . ")\n"; } #compute number and percentage of unknown tokens for a given factor in foreign corpus #arguments: corpus object ref, factor name #return (unkwordCount, totalWordCount), or (-1, -1) if an error occurs sub calc_unknown_words { my ($corpus, $factorName) = @_; try { my ($unknownCount, $totalCount) = $corpus->calcUnknownTokens($factorName); return ($unknownCount, $totalCount); } catch Error::Simple with { my $err = shift; printError($err, 'calc_unknown_words()'); return (-1, -1); }; } #compute (if we have the necessary factors) info for: #- diff btwn wer and pwer for NNs & ADJs -- if large, many reordering errors #- diff btwn pwer for surface forms and pwer for lemmas -- if large, morphology errors #arguments: corpus object, system name #return (NN/ADJ (wer, pwer), surf pwer, lemma pwer), or (-1, -1, -1, -1) if an error occurs sub calc_misc_stats { my ($corpus, $sysname) = @_; try { my ($nnAdjWER, $nnAdjPWER) = $corpus->calcNounAdjWER_PWERDiff($sysname); my ($surfPWER, $lemmaPWER) = ($corpus->calcOverallPWER($sysname, 'surf'), $corpus->calcOverallPWER($sysname, 'lemma')); return ($nnAdjWER, $nnAdjPWER, $surfPWER, $lemmaPWER); } catch Error::Simple with { my $err = shift; printError($err, 'calc_misc_stats()'); return (-1, -1, -1, -1); }; } #approximate BLEU score from n-gram precisions (currently assume no length penalty) #arguments: n-gram precisions as an array #return: BLEU score sub approxBLEUFromNgramScores { my $logsum = 0; foreach my $p (@_) {$logsum += log($p);} return exp($logsum / scalar(@_)); } ###### NIST SCORE sub get_nist_score { my($reference_file,$source_file,$translation_file) = @_; my @STAT = stat($translation_file); my $current_timestamp = $STAT[9]; foreach (@NIST) { my ($file,$time,$nist,$bleu) = split; return ($nist,$bleu) if ($file eq $translation_file && $current_timestamp == $time); } my $nist_eval = `/home/pkoehn/statmt/bin/mteval-v10.pl -c -r $reference_file -s $source_file -t $translation_file`; return (0,0) unless ($nist_eval =~ /NIST score = (\d+\.\d+) BLEU score = (\d+\.\d+)/i); open(NIST,">>nist-memory.dat"); printf NIST "$translation_file $current_timestamp %f %f\n",$1,$2; close(NIST); return ($1,$2); } sub nist_normalize_text { my ($norm_text) = @_; # language-independent part: $norm_text =~ s///g; # strip "skipped" tags $norm_text =~ s/-\n//g; # strip end-of-line hyphenation and join lines $norm_text =~ s/\n/ /g; # join lines $norm_text =~ s/(\d)\s+(\d)/$1$2/g; #join digits $norm_text =~ s/"/"/g; # convert SGML tag for quote to " $norm_text =~ s/&/&/g; # convert SGML tag for ampersand to & $norm_text =~ s/</ $norm_text =~ s/>/>/g; # convert SGML tag for greater-than to < # language-dependent part (assuming Western languages): $norm_text = " $norm_text "; # $norm_text =~ tr/[A-Z]/[a-z]/ unless $preserve_case; $norm_text =~ s/([\{-\~\[-\` -\&\(-\+\:-\@\/])/ $1 /g; # tokenize punctuation $norm_text =~ s/([^0-9])([\.,])/$1 $2 /g; # tokenize period and comma unless preceded by a digit $norm_text =~ s/([\.,])([^0-9])/ $1 $2/g; # tokenize period and comma unless followed by a digit $norm_text =~ s/([0-9])(-)/$1 $2 /g; # tokenize dash when preceded by a digit $norm_text =~ s/\s+/ /g; # one space only between words $norm_text =~ s/^\s+//; # no leading space $norm_text =~ s/\s+$//; # no trailing space return $norm_text; } ###### BLEU SCORE sub get_multi_bleu_score { my($foreign_file,$reference_file,$translation_file) = @_; my @STAT = stat($translation_file); my $current_timestamp = $STAT[9]; foreach (@mBLEU) { my ($file,$time,$score,$g1,$g2,$g3,$g4,$bp) = split; if ($file eq $translation_file && $current_timestamp == $time) { return ($score,$g1*100,$g2*100,$g3*100,$g4*100,$bp); } } # load reference translation from reference file my @REFERENCE_SENTENCE = `cat $reference_file`; chop(@REFERENCE_SENTENCE); my @TRANSLATION_SENTENCE = `cat $translation_file`; chop(@TRANSLATION_SENTENCE); my %REF; my @FOREIGN_SENTENCE = `cat $foreign_file`; chop(@FOREIGN_SENTENCE); for(my $i=0;$i<=$#TRANSLATION_SENTENCE;$i++) { push @{$REF{$FOREIGN_SENTENCE[$i]}},$REFERENCE_SENTENCE[$i]; } # load reference translation from translation memory foreach my $memory (keys %MEMORY) { next if $MEMORY{$memory} ne 'syn_correct sem_correct'; my ($foreign,$english) = split(/ .o0O0o. /,$memory); next unless defined($REF{$foreign}); push @{$REF{$foreign}},$english; } my(@CORRECT,@TOTAL,$length_translation,$length_reference); # compute bleu for(my $i=0;$i<=$#TRANSLATION_SENTENCE;$i++) { my %REF_NGRAM = (); my @WORD = split(/ /,$TRANSLATION_SENTENCE[$i]); my $length_translation_this_sentence = scalar(@WORD); my ($closest_diff,$closest_length) = (9999,9999); foreach my $reference (@{$REF{$FOREIGN_SENTENCE[$i]}}) { my @WORD = split(/ /,$reference); my $length = scalar(@WORD); if (abs($length_translation_this_sentence-$length) < $closest_diff) { $closest_diff = abs($length_translation_this_sentence-$length); $closest_length = $length; } for(my $n=1;$n<=4;$n++) { my %REF_NGRAM_N = (); for(my $start=0;$start<=$#WORD-($n-1);$start++) { my $ngram = "$n"; for(my $w=0;$w<$n;$w++) { $ngram .= " ".$WORD[$start+$w]; } $REF_NGRAM_N{$ngram}++; } foreach my $ngram (keys %REF_NGRAM_N) { if (!defined($REF_NGRAM{$ngram}) || $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) { $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram}; } } } } $length_translation += $length_translation_this_sentence; $length_reference += $closest_length; for(my $n=1;$n<=4;$n++) { my %T_NGRAM = (); for(my $start=0;$start<=$#WORD-($n-1);$start++) { my $ngram = "$n"; for(my $w=0;$w<$n;$w++) { $ngram .= " ".$WORD[$start+$w]; } $T_NGRAM{$ngram}++; } foreach my $ngram (keys %T_NGRAM) { my $n = 0+$ngram; # print "$i e $ngram $T_NGRAM{$ngram}
\n"; $TOTAL[$n] += $T_NGRAM{$ngram}; if (defined($REF_NGRAM{$ngram})) { if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) { $CORRECT[$n] += $T_NGRAM{$ngram}; # print "$i e correct1 $T_NGRAM{$ngram}
\n"; } else { $CORRECT[$n] += $REF_NGRAM{$ngram}; # print "$i e correct2 $REF_NGRAM{$ngram}
\n"; } } } } } my $brevity_penalty = 1; if ($length_translation<$length_reference) { $brevity_penalty = exp(1-$length_reference/$length_translation); } my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) + my_log( $CORRECT[2]/$TOTAL[2] ) + my_log( $CORRECT[3]/$TOTAL[3] ) + my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4); open(BLEU,">>mbleu-memory.dat"); @STAT = stat($translation_file); printf BLEU "$translation_file $STAT[9] %f %f %f %f %f %f\n",$bleu,$CORRECT[1]/$TOTAL[1],$CORRECT[2]/$TOTAL[2],$CORRECT[3]/$TOTAL[3],$CORRECT[4]/$TOTAL[4],$brevity_penalty; close(BLEU); return ($bleu, 100*$CORRECT[1]/$TOTAL[1], 100*$CORRECT[2]/$TOTAL[2], 100*$CORRECT[3]/$TOTAL[3], 100*$CORRECT[4]/$TOTAL[4], $brevity_penalty); } sub my_log { return -9999999999 unless $_[0]; return log($_[0]); } ###### SCORE TRANSLATIONS ################################ IN PROGRESS ############################### sub compare2 { &htmlhead("Compare Translations"); print "View Corpus $in{CORPUS}

\n"; print "

\n"; print "\n"; print "\n"; my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-info_line' => $factorData{$in{CORPUS}}); $corpus->writeComparisonPage(\*STDOUT, /^.*$/); print "
\n"; } sub compare { &htmlhead("Compare Translations"); print "View Corpus $in{CORPUS}

\n"; print "

\n"; print "\n"; print "\n"; # get sentences my %SENTENCES; my $sentence_count; foreach (keys %in) { if (/^FILE_(.+)$/) { my $file = $1; print "\n"; my @SENTENCES; if ($file =~ /.sgm$/) { @{$SENTENCES{$file}} = `grep ']+> *(\S.+\S) *<\/seg> *$/$1/; } } else { @{$SENTENCES{$file}} = `cat $in{CORPUS}.$1`; chop(@{$SENTENCES{$file}}); } $sentence_count = scalar @{$SENTENCES{$file}}; } } my %REFERENCE; foreach (@SHOW) { if (-e "$in{CORPUS}.$_") { @{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}}); } } # update memory foreach (keys %in) { next unless /^SYN_SCORE_(.+)_(\d+)$/; next unless $in{"SEM_SCORE_$1_$2"}; &store_in_memory($REFERENCE{$FOREIGN}[$2], $SENTENCES{$1}[$2], "syn_".$in{"SYN_SCORE_$1_$2"}." sem_".$in{"SEM_SCORE_$1_$2"}); } # display sentences for(my $i=0;$i<$sentence_count;$i++) { my $evaluation = ""; my $show = 0; my $surface = ""; foreach my $file (keys %SENTENCES) { if ($in{SURFACE}) { $SENTENCES{$file}[$i] =~ s/ *$//; $surface = $SENTENCES{$file}[$i] if ($surface eq ''); $show = 1 if ($SENTENCES{$file}[$i] ne $surface); } else { my $this_ev = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]); $this_ev = "syn_wrong sem_wrong" unless $this_ev; $evaluation = $this_ev if ($evaluation eq ''); $show = 1 if ($evaluation ne $this_ev); } } next unless $show; print "
Sentence ".($i+1).":
\n"; foreach my $ref (@SHOW) { if (-e "$in{CORPUS}.$ref") { print "".$REFERENCE{$ref}[$i]." (".$FILETYPE{$ref}.")
\n"; } } foreach my $file (keys %SENTENCES) { print "$SENTENCES{$file}[$i] ($file)
\n"; &color_highlight_ngrams($i,&nist_normalize_text($SENTENCES{$file}[$i]),$REFERENCE{"e"}[$i]); if (0 && $in{WITH_EVAL}) { $evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]); print " perfect English\n"; print " imperfect English
\n"; print " correct meaning\n"; print " incorrect meaning
\n"; } } } print "

\n"; print "

\n"; } ###### MEMORY SUBS sub load_memory { open(MEMORY,"evaluation-memory.dat") or return; while() { chop; my($foreign,$translation,$evaluation) = split(/ \.o0O0o\. /); $evaluation = 'syn_correct sem_correct' if ($evaluation eq 'correct'); $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation; } close(MEMORY); } sub get_score_from_memory { my($foreign_file,$translation_file) = @_; my $unknown=0; my $correct=0; my $just_syn=0; my $just_sem=0; my $wrong=0; my @FOREIGN = `cat $foreign_file`; chop(@FOREIGN); my @TRANSLATION = `cat $translation_file`; chop(@TRANSLATION); for(my $i=0;$i<=$#FOREIGN;$i++) { if (my $evaluation = &get_from_memory($FOREIGN[$i],$TRANSLATION[$i])) { if ($evaluation eq 'syn_correct sem_correct') { $correct++ } elsif ($evaluation eq 'syn_correct sem_wrong') { $just_syn++ } elsif ($evaluation eq 'syn_wrong sem_correct') { $just_sem++ } elsif ($evaluation eq 'syn_wrong sem_wrong') { $wrong++ } else { $unknown++; } } else { $unknown++; } } return($correct,$just_syn,$just_sem,$wrong,$unknown); } sub store_in_memory { my($foreign,$translation,$evaluation) = @_; &trim(\$translation); return if $MEMORY{"$foreign .o0O0o. $translation"} eq $evaluation; $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation; open(MEMORY,">>evaluation-memory.dat") or die "store_in_memory(): couldn't open 'evaluation-memory.dat' for append\n"; print MEMORY "$foreign .o0O0o. $translation .o0O0o. $evaluation\n"; close(MEMORY); } sub get_from_memory { my($foreign,$translation) = @_; &trim(\$translation); return $MEMORY{"$foreign .o0O0o. $translation"}; } sub trim { my($translation) = @_; $$translation =~ s/ +/ /g; $$translation =~ s/^ +//; $$translation =~ s/ +$//; } sub load_descriptions { open(FD,"file-descriptions") or die "load_descriptions(): couldn't open 'file-descriptions' for read\n"; while() { chomp; my($file,$description) = split(/\s+/,$_,2); $FILEDESC{$file} = $description; } close(FD); } #read config file giving various corpus config info #arguments: filename to read #return: hash of corpus names to strings containing formatted info sub loadFactorData { my $filename = shift; my %data = (); open(INFILE, "<$filename") or die "loadFactorData(): couldn't open '$filename' for read\n"; while(my $line = ) { if($line =~ /^\#/) {next;} #skip comment lines $line =~ /^\s*(\S+)\s*:\s*(\S.*\S)\s*$/; my $corpusName = $1; $data{$corpusName} = $2; } close(INFILE); return %data; } ###### SUBS sub htmlhead { print <<"___ENDHTML"; Content-type: text/html MTEval: $_[0]

Evaluation Tool for Machine Translation
$_[0]

___ENDHTML } ############################# parts of cgi-lib.pl sub ReadParse { my ($i, $key, $val); # Read in text my $in; if (&MethGet) { $in = $ENV{'QUERY_STRING'}; } elsif (&MethPost) { read(STDIN,$in,$ENV{'CONTENT_LENGTH'}); } my @in = split(/[&;]/,$in); foreach $i (0 .. $#in) { # Convert plus's to spaces $in[$i] =~ s/\+/ /g; # Split into key and value. ($key, $val) = split(/=/,$in[$i],2); # splits on the first =. # Convert %XX from hex numbers to alphanumeric $key =~ s/%(..)/pack("c",hex($1))/ge; $val =~ s/%(..)/pack("c",hex($1))/ge; # Associate key and value $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator $in{$key} .= $val; } return scalar(@in); } sub MethGet { return ($ENV{'REQUEST_METHOD'} eq "GET"); } sub MethPost { return ($ENV{'REQUEST_METHOD'} eq "POST"); }