1 files changed, 0 insertions, 935 deletions
diff --git a/scripts/analysis/smtgui/newsmtgui.cgi b/scripts/analysis/smtgui/newsmtgui.cgi
deleted file mode 100755
index 0e969a788..000000000
--- a/scripts/analysis/smtgui/newsmtgui.cgi
+++ /dev/null
@@ -1,935 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-
-use CGI;
-use Corpus; #Evan's code
-use Error qw(:try);
-
-#files with extensions other than these are interpreted as system translations; see the file 'file-descriptions', if it exists, for the comments that go with them
-my %FILETYPE = ('e' => 'Reference Translation',
-		'f' => 'Foreign Original',
-		'ref.sgm' => 'Reference Translations',
-		'e.sgm' => 'Reference Translations',
-		'src.sgm' => 'Foreign Originals',
-		'f.sgm' => 'Foreign Originals');
-my %DONTSCORE = ('f' => 1, 'f.sgm' => 1, 'src.sgm' => 1,
-		 'e' => 1, 'e.sgm' => 1, 'ref.sgm' => 1);
-my @SHOW = ('f', 'e', 'comm');
-my %SHOW_COLOR = ('f' => "BLUE",
-		  'e' => "GREEN");
-my $FOREIGN = 'f';
-
-#FILEDESC: textual descriptions associated with specific filenames; to be displayed on the single-corpus view
-my %FILEDESC = (); &load_descriptions();
-my %factorIndices = loadFactorIndices('file-factors');
-my %MEMORY;        &load_memory();
-my (@mBLEU,@NIST);
-@mBLEU=`cat mbleu-memory.dat` if -e "mbleu-memory.dat"; chop(@mBLEU);
-@NIST = `cat nist-memory.dat` if -e "nist-memory.dat"; chop(@NIST);
-my %in;            &ReadParse(); #parse arguments
-
-if (scalar(@ARGV) > 0 && $ARGV[0] eq 'bleu') {
-  $in{CORPUS} = $ARGV[1];
-  $in{ACTION} = "VIEW_CORPUS";
-}
-
-my %MULTI_REF;
-if ($in{CORPUS} && -e "$in{CORPUS}.ref.sgm") {
-  my $sysid;
-  open(REF,"$in{CORPUS}.ref.sgm");
-  while(<REF>) {
-    $sysid = $1 if /<DOC.+sysid=\"([^\"]+)\"/;
-    if (/<seg[^>]*> *(\S.+\S) *<\/seg>/) {
-      push @{$MULTI_REF{$sysid}}, $1;
-    }
-  }
-  close(REF);
-}
-
-if ($in{ACTION} eq '') { &show_corpora(); }
-elsif ($in{ACTION} eq 'VIEW_CORPUS') { &view_corpus(); }
-elsif ($in{ACTION} eq 'SCORE_FILE') { &score_file(); }
-elsif ($in{ACTION} eq 'RESCORE_FILE') { &score_file(); }
-elsif ($in{ACTION} eq 'COMPARE') { &compare(); }
-else { &htmlhead("Unknown Action $in{ACTION}"); }
-print "</BODY></HTML>\n";
-
-###### SHOW CORPORA IN EVALUATION DIRECTORY
-
-sub show_corpora {
-  my %CORPUS = ();
-  
-  # find corpora in evaluation directory: see the factor-index file, which was already read in
-  foreach my $corpusName (keys %factorIndices)
-  {
-  	$CORPUS{$corpusName} = 1;
-  }
-  
-  # list corpora
-  &htmlhead("All Corpora");
-  print "<UL>\n";
-  foreach (sort (keys %CORPUS)) {
-    print "<LI><A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($_)."\">Corpus $_</A>\n";
-  }
-  print "</UL>\n";
-}
-
-###### SHOW INFORMATION FOR ONE CORPUS
-
-sub view_corpus {
-  my @TABLE;
-  &htmlhead("View Corpus $in{CORPUS}");
-  
-  # find corpora in evaluation directory
-  my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-indices' => $factorIndices{$in{CORPUS}});
-  
-  my ($sentence_count, $lineInfo);
-  if(-e "$in{CORPUS}.f")
-  {
-  	$lineInfo = `wc -l $in{CORPUS}.f`;
-  	$lineInfo =~ /^\s*(\d+)\s+/;
-  	$sentence_count = 0 + $1;
-	}
-	else
-	{
-	  $lineInfo = `wc -l $in{CORPUS}.e`;
-	  $lineInfo =~ /^\s*(\d+)\s+/;
-	  $sentence_count = 0 + $1;
-	}
-  
-  print "Corpus '$in{CORPUS}' consists of $sentence_count sentences\n";
-  print "(<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&mBLEU=1>with mBLEU</A>)" if ((!defined($in{mBLEU})) && (scalar keys %MEMORY) && -e "$in{CORPUS}.e" && -e "$in{CORPUS}.f");
-  print "<P>\n";
-  print "<FORM ACTION=''>\n";
-  print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=COMPARE>\n";
-  print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
-  print "<TABLE BORDER=1 CELLSPACING=0><TR>
-<TD>Filename (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS}).">sort</A>)</TD>
-<TD>Date     (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=TIME>sort</A>)</TD>";
-  if (-e "$in{CORPUS}.e") {
-    print "<TD>IBM BLEU (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=IBM>sort</A>)</TD>";
-  }
-  if (-e "$in{CORPUS}.ref.sgm" && -e "$in{CORPUS}.src.sgm") {
-    print "<TD>NIST (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=NIST>sort</A>)</TD>";
-    if (! -e "$in{CORPUS}.e") {
-      print "<TD>BLEU (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=BLEU>sort</A>)</TD>";
-    }
-  }
-  if ($in{mBLEU} && (scalar keys %MEMORY) && -e "$in{CORPUS}.e" && -e "$in{CORPUS}.f") {
-    print "<TD>mBLEU (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=mBLEU>sort</A>)</TD>";
-  }
-  print "<TD>Unknown Words</TD>"; #can't sort on; only applies to the input
-  print "<TD>Noun & adj WER-PWER</TD>"; #can't sort on; only applies to sysoutputs
-  print "<TD>Surface vs. lemma PWER</TD>"; #can't sort on; only applies to sysoutputs
-  print "<TD>Score (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=SCORE>sort</A>)</TD><TD>Actions</TD></TR>";
-
-  open(DIR,"ls $in{CORPUS}.*|");
-  while(<DIR>) {
-    my $sort = "";
-    chop;
-    my $sgm = 0;
-    if (/.sgm$/)
-	 {
-	 	`grep '<seg' $_ | wc -l` =~ /^\s*(\d+)\s+/;
-		next unless $1 == $sentence_count;
-		$sgm = 1;
-    }
-    else
-	 {
-	 	`wc -l $_` =~ /^\s*(\d+)\s+/;
-		next unless $1 == $sentence_count;
-    }
-    /^$in{CORPUS}.([^\/]+)$/;
-    my $file = $1;
-    # checkbox for compare
-    my $row = "<TR><TD><INPUT TYPE=CHECKBOX NAME=FILE_$file VALUE=1>";
-    # README
-    if (-e "$in{CORPUS}.$file.README") {
-      my $readme = `cat $in{CORPUS}.$file.README`;
-      $readme =~ s/([\"\'])/\\\"/g;
-      $readme =~ s/[\n\r]/\\n/g;
-      $readme =~ s/\t/\\t/g;
-      $row .= "<A HREF='javascript:FieldInfo(\"$in{CORPUS}.$file\",\"$readme\")'>";
-    }
-    # filename
-    $row .= "$in{CORPUS}.$file</A>";
-    # description (hard-coded)
-    my @TRANSLATION_SENTENCE = `cat $in{CORPUS}.$file`; 
-    chop(@TRANSLATION_SENTENCE);
-    
-	 #count sentences that contain null words
-	 my $null_count = 0;
-    foreach (@TRANSLATION_SENTENCE)
-	 {
-      $null_count++ if /^NULL$/ || /^NONE$/;
-    }
-    if ($null_count > 0) { 
-      $row .= "$null_count NULL ";
-    }
-
-    $row .= " (".$FILETYPE{$file}.")" if defined($FILETYPE{$file});
-    $row .= " (".$FILEDESC{$in{CORPUS}.".".$file}.")" if defined($FILEDESC{$in{CORPUS}.".".$file});
-    $row .= " (".$FILEDESC{$file}.")" if defined($FILEDESC{$file});
-    # filedate
-    my @STAT = stat("$in{CORPUS}.$file");
-    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($STAT[8]); #STAT[8] should be last modify time
-    my $time = sprintf("%04d-%02d-%02d %02d:%02d:%02d",$year+1900,$mon+1,$mday,$hour,$min,$sec);
-    $row .= "</TD>\n<TD>".$time."</TD>\n";
-    if (defined($in{SORT}) && $in{SORT} eq 'TIME') { $sort = $time; }
-    # IBM BLEU score
-    my $no_bleu =0;
-    if (!$sgm && -e "$in{CORPUS}.e") {
-      $row .= "<TD>";
-      if (!defined($DONTSCORE{$file}) && $file !~ /^f$/ && $file ne "e" && $file !~ /^pt/) {
-	my ($score,$p1,$p2,$p3,$p4,$bp) = $corpus->calcBLEU($file, 'surf');
-	$row .= sprintf("<B>%.04f</B> %.01f/%.01f/%.01f/%.01f *%.03f", $score, $p1, $p2, $p3, $p4, $bp);
-	if (defined($in{SORT}) && $in{SORT} eq 'IBM') { $sort = $score; }
-      }
-      $row .= "</TD>\n";
-    }
-    else {
-      $no_bleu=1;
-    }
-    # NIST score
-    if (-e "$in{CORPUS}.ref.sgm" && -e "$in{CORPUS}.src.sgm" 
-	&& !$DONTSCORE{$file}) {    
-      $row .= "<TD>";
-      print "$DONTSCORE{$file}+";
-      my ($nist,$nist_bleu);
-      if ($file =~ /sgm$/) {
-	($nist,$nist_bleu) = &get_nist_score("$in{CORPUS}.ref.sgm","$in{CORPUS}.src.sgm","$in{CORPUS}.$file");
-	$row .= sprintf("<B>%.04f</B>",$nist);
-	if ($in{SORT} eq 'NIST') { $sort = $nist; }
-      }
-      $row .= "</TD>\n";
-      if ($no_bleu) {
-	$row .= "<TD>";
-	if ($file =~ /sgm$/) {
-	  $row .= sprintf("<B>%.04f</B>",$nist_bleu);
-	  if ($in{SORT} eq 'BLEU') { $sort = $nist_bleu; }
-	}
-	$row .= "</TD>\n";
-      }
-    }
-    # multi-bleu
-    if ($in{mBLEU} && (scalar keys %MEMORY) && -e "$in{CORPUS}.e") {
-      $row .= "<TD>";
-      if (!defined($DONTSCORE{$file}) && $file !~ /^f$/ && $file ne "e") {
-	my ($score,$p1,$p2,$p3,$p4,$bp) = &get_multi_bleu_score("$in{CORPUS}.f","$in{CORPUS}.e","$in{CORPUS}.$file");
-	$row .= sprintf("<B>%.04f</B> %.01f/%.01f/%.01f/%.01f *%.03f",$score,$p1,$p2,$p3,$p4,$bp);
-	if ($in{SORT} eq 'mBLEU') { $sort = $score; }
-      }
-      $row .= "</TD>\n";
-    }
-	 
-	 # misc stats
-	 $row .= "<TD align=\"center\">";
-	 if($file eq 'f') #input
-	 {
-	 	try
-		{
-			my ($unknownCount, $totalCount) = calc_unknown_words($corpus, 'surf');
-	 		$row .= sprintf("%.4lf (%d / %d)", $unknownCount / $totalCount, $unknownCount, $totalCount);
-		}
-		catch Error::Simple with {$row .= "[system error]";};
-	 }
-	 $row .= "</TD>\n<TD align=\"center\">";
-	 my ($nnAdjWER, $nnAdjPWER, $surfPWER, $lemmaPWER);
-	 if($file ne 'e' && $file ne 'f' && $file !~ /^pt/) #system output
-	 {
-		try
-		{
-			($nnAdjWER, $nnAdjPWER, $surfPWER, $lemmaPWER) = calc_misc_stats($corpus, $file);
-			$row .= "WER = $nnAdjWER<br>PWER = $nnAdjPWER<br><b>diff = " . ($nnAdjWER - $nnAdjPWER) . "</b>";
-		}
-		catch Error::Simple with {$row .= "[system error]";};
-	}
-	$row .= "</TD>\n<TD align=\"center\">";
-	if($file ne 'e' && $file ne 'f' && $file !~ /^pt/) #system output
-	{
-		if($surfPWER == -1)
-		{
-			$row .= "[system error]";
-		}
-		else
-		{
-			my ($lemmaBLEU, $p1, $p2, $p3, $p4, $brevity) = $corpus->calcBLEU($file, 'lemma');
-			$row .= sprintf("surface = %d<br>lemma = %d<br><b>lemma BLEU = %.04f</b> %.01f/%.01f/%.01f/%.01f *%.03f", 
-									$surfPWER, $lemmaPWER, $lemmaBLEU, $p1, $p2, $p3, $p4, $brevity);
-		}
-	}
-	$row .= "</TD>\n";
-
-    # correct sentence score
-    my($correct,$wrong,$unknown);
-    $row .= "<TD>";
-    if (!defined($DONTSCORE{$file}) && (scalar keys %MEMORY)) {
-      my ($correct,$just_syn,$just_sem,$wrong,$unknown) = 
-	&get_score_from_memory("$in{CORPUS}.$FOREIGN",
-			       "$in{CORPUS}.$file");
-      $row .= "<B><FONT COLOR=GREEN>$correct</FONT></B>";
-      $row .= "/<FONT COLOR=ORANGE>$just_syn</FONT>";
-      $row .= "/<FONT COLOR=ORANGE>$just_sem</FONT>";
-      $row .= "/<FONT COLOR=RED>$wrong</FONT> ($unknown)</TD>\n";
-      if ($in{SORT} eq 'SCORE') { 
-	$sort = sprintf("%03d %04d",$correct,$just_syn+$just_sem);
-      }
-    }
-	 else
-	 {
-	 	$row .= "</TD>\n";
-	}
-
-    # score / review links
-    $row .= "<TD>";
-    $row .= "<A HREF=\"?ACTION=SCORE_FILE&VIEW=1&CORPUS=".CGI::escape($in{CORPUS})."&FILE=".CGI::escape($file)."\">view</A>" if (!defined($DONTSCORE{$file}));
-    $row .= " <A HREF=\"?ACTION=SCORE_FILE&CORPUS=".CGI::escape($in{CORPUS})."&FILE=".CGI::escape($file)."\">score</A>" if (!defined($DONTSCORE{$file}) && ($unknown || scalar keys %MEMORY == 0));
-    $row .= " <A HREF=\"?ACTION=RESCORE_FILE&CORPUS=".CGI::escape($in{CORPUS})."&FILE=".CGI::escape($file)."\">review</A>" if (!defined($DONTSCORE{$file}) && scalar keys %MEMORY);
-    $row .= "</TD></TR>\n"; 
-    push @TABLE, "<!-- $sort -->\n$row";
-  }
-  close(DIR);
-  foreach (reverse sort @TABLE) { print $_; }
-  print "</TABLE>\n";
-  print "<INPUT TYPE=SUBMIT VALUE=\"Compare\">\n";
-  print "<INPUT TYPE=CHECKBOX NAME=SURFACE VALUE=1 CHECKED> Compare all different sentences (instead of just differently <I>evaluated</I> sentences) <INPUT TYPE=CHECKBOX NAME=WITH_EVAL VALUE=1 CHECKED> with evaluation</FORM><P>\n";
-  print "<P>The score is to be read as: <FONT COLOR=GREEN>correct</FONT>/<FONT COLOR=ORANGE>just-syn-correct</FONT>/<FONT COLOR=ORANGE>just-sem-correct</FONT>/<FONT COLOR=RED>wrong</FONT> (unscored)\n";
-  print "<BR>IBM BLEU is to be read as: <B>metric</B> unigram/bigram/trigram/quadgram *brevity-penalty\n"; 
-  print "<P><A HREF=\"newsmtgui.cgi?action=\">All corpora</A>\n";
-}
-
-###### SCORE TRANSLATIONS
-
-sub score_file {
-  if ($in{VIEW}) {
-    &htmlhead("View Translations");
-  }
-  else {
-    &htmlhead("Score Translations");
-  }
-  print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
-  print "<FORM ACTION=\"\" METHOD=POST>\n";
-  print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
-  print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
-  print "<INPUT TYPE=HIDDEN NAME=FILE VALUE=\"$in{FILE}\">\n";
-
-  # get sentences
-  my @SENTENCES;
-  if ($in{FILE} =~ /.sgm$/) {
-      @SENTENCES = `grep '<seg' $in{CORPUS}.$in{FILE}`;
-      for(my $i=0;$i<$#SENTENCES;$i++) {
-	  $SENTENCES[$i] =~ s/^<seg[^>]+> *(\S.+\S) *<\/seg> *$/$1/;
-      }
-  }
-  else {
-      @SENTENCES = `cat $in{CORPUS}.$in{FILE}`; chop(@SENTENCES);
-  }
-
-  my %REFERENCE;
-  foreach (@SHOW) {
-    if (-e "$in{CORPUS}.$_") {
-      @{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}});
-    }
-  }
-
-  # update memory
-  foreach (keys %in) {
-    next unless /^SYN_SCORE_(\d+)$/;
-    next unless $in{"SEM_SCORE_$1"};
-    &store_in_memory($REFERENCE{$FOREIGN}[$1],
-		     $SENTENCES[$1],
-		     "syn_".$in{"SYN_SCORE_$1"}." sem_".$in{"SEM_SCORE_$1"});
-  }
-
-  # display sentences
-  for(my $i=0;$i<=$#SENTENCES;$i++) {
-    my $evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES[$i]);
-    next if ($in{ACTION} eq 'SCORE_FILE' &&
-	     ! $in{VIEW} && 
-	     $evaluation ne '' && $evaluation ne 'wrong');
-    print "<P>Sentence ".($i+1).":<BR>\n";
-    # color coding
-    &color_highlight_ngrams($i,&nist_normalize_text($SENTENCES[$i]),$REFERENCE{"e"}[$i]);
-    if (%MULTI_REF) {
-	foreach my $sysid (keys %MULTI_REF) {
-	    print "<FONT COLOR=GREEN>".$MULTI_REF{$sysid}[$i]."</FONT> (Reference $sysid)<BR>\n";
-	}
-    }
-
-    # all sentences 
-    print "$SENTENCES[$i] (System output)<BR>\n";
-    foreach my $ref (@SHOW) {
-      if (-e "$in{CORPUS}.$ref") {
-	print "<FONT COLOR=$SHOW_COLOR{$ref}>".$REFERENCE{$ref}[$i]."</FONT> (".$FILETYPE{$ref}.")<BR>\n" if $REFERENCE{$ref}[$i];
-      }
-    }
-    if (! $in{VIEW}) {
-      print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$i VALUE=correct";
-      print " CHECKED" if ($evaluation =~ /syn_correct/);
-      print "> perfect English\n";
-      print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$i VALUE=wrong";
-      print " CHECKED" if ($evaluation =~ /syn_wrong/);
-      print "> imperfect English<BR>\n";
-      print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$i VALUE=correct";
-      print " CHECKED" if ($evaluation =~ /sem_correct/);
-      print "> correct meaning\n";
-      print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$i VALUE=wrong";
-      print " CHECKED" if ($evaluation =~ /sem_wrong/);
-      print "> incorrect meaning\n";
-    }
-  }
-  if (! $in{VIEW}) {
-    print "<P><INPUT TYPE=SUBMIT VALUE=\"Add evaluation\">\n";
-    print "</FORM>\n";
-  }
-}
-
-sub color_highlight_ngrams {
-  my($i,$sentence,$single_reference) = @_;
-  my @REF = ();
-  my %NGRAM = ();
-  if (%MULTI_REF) {
-    foreach my $sysid (keys %MULTI_REF) {
-      push @REF,&nist_normalize_text($MULTI_REF{$sysid}[$i]);
-    }
-  }
-  elsif ($single_reference) {
-    @REF = ($single_reference);
-  }
-  if (@REF) {
-    foreach my $ref (@REF) {
-      my @WORD = split(/\s+/,$ref);
-      for(my $n=1;$n<=4;$n++) {
-	for(my $w=0;$w<=$#WORD-($n-1);$w++) {
-	  my $ngram = "$n: ";
-	  for(my $j=0;$j<$n;$j++) {
-	    $ngram .= $WORD[$w+$j]." ";
-	  }
-	  $NGRAM{$ngram}++;
-	}
-      }
-    }
-    $sentence =~ s/^\s+//;
-    $sentence =~ s/\s+/ /;
-    $sentence =~ s/\s+$//;
-    my @WORD = split(/\s+/,$sentence);
-    my @CORRECT;
-    for(my $w=0;$w<=$#WORD;$w++) {
-      $CORRECT[$w] = 0;
-    }
-    for(my $n=1;$n<=4;$n++) {
-      for(my $w=0;$w<=$#WORD-($n-1);$w++) {
-	my $ngram = "$n: ";
-	for(my $j=0;$j<$n;$j++) {
-	  $ngram .= $WORD[$w+$j]." ";
-	}
-	next unless defined($NGRAM{$ngram}) && $NGRAM{$ngram}>0;
-	$NGRAM{$ngram}--;
-	for(my $j=0;$j<$n;$j++) {
-	  $CORRECT[$w+$j] = $n;
-	}
-      }
-    }
-    my @COLOR;
-    $COLOR[0] = "#FF0000";
-    $COLOR[1] = "#C000C0";
-    $COLOR[2] = "#0000FF";
-    $COLOR[3] = "#00C0C0";
-    $COLOR[4] = "#00C000";
-    for(my $w=0;$w<=$#WORD;$w++) {
-      print "<B><FONT COLOR=".$COLOR[$CORRECT[$w]].">$WORD[$w]<SUB>".$CORRECT[$w]."</SUB></FONT></B> ";
-    }
-    print "\n<BR>";
-  }
-}
-
-###### OTHER STATS
-
-#print (in some unspecified way) the offending exception of type Error::Simple
-#arguments: the error object, a context string
-#return: none
-sub printError
-{
-	my ($err, $context) = @_;
-	warn "$context: " . $err->{'-text'} . " @ " . $err->{'-file'} . " (" .$err->{'-line'} . ")\n";
-}
-
-#compute number and percentage of unknown tokens for a given factor in foreign corpus
-#arguments: corpus object ref, factor name
-#return (unkwordCount, totalWordCount), or (-1, -1) if an error occurs
-sub calc_unknown_words
-{
-	my ($corpus, $factorName) = @_;
-	try
-	{
-		my ($unknownCount, $totalCount) = $corpus->calcUnknownTokens($factorName);
-		return ($unknownCount, $totalCount);
-	}
-	catch Error::Simple with
-	{
-		my $err = shift;
-		printError($err, 'calc_unknown_words()');
-		return (-1, -1);
-	};
-}
-
-#compute (if we have the necessary factors) info for:
-#- diff btwn wer and pwer for NNs & ADJs -- if large, many reordering errors
-#- diff btwn pwer for surface forms and pwer for lemmas -- if large, morphology errors
-#arguments: corpus object, system name
-#return (NN/ADJ (wer, pwer), surf pwer, lemma pwer), or (-1, -1, -1, -1) if an error occurs
-sub calc_misc_stats
-{
-	my ($corpus, $sysname) = @_;
-	try
-	{
-		my ($nnAdjWER, $nnAdjPWER) = $corpus->calcNounAdjWER_PWERDiff($sysname);
-		my ($surfPWER, $lemmaPWER) = ($corpus->calcOverallPWER($sysname, 'surf'), $corpus->calcOverallPWER($sysname, 'lemma'));
-		return ($nnAdjWER, $nnAdjPWER, $surfPWER, $lemmaPWER);
-	}
-	catch Error::Simple with
-	{
-		my $err = shift;
-		printError($err, 'calc_misc_stats()');
-		return (-1, -1, -1, -1);
-	};
-}
-
-###### NIST SCORE
-
-sub get_nist_score {
-  my($reference_file,$source_file,$translation_file) = @_;
-  my @STAT = stat($translation_file);
-  my $current_timestamp = $STAT[9];
-  foreach (@NIST) {
-    my ($file,$time,$nist,$bleu) = split;
-    return ($nist,$bleu) 
-      if ($file eq $translation_file && $current_timestamp == $time);
-  }
-
-  my $nist_eval = `/home/pkoehn/statmt/bin/mteval-v10.pl -c -r $reference_file -s $source_file -t $translation_file`;
-  return (0,0) unless ($nist_eval =~ /NIST score = (\d+\.\d+)  BLEU score = (\d+\.\d+)/i);
-
-  open(NIST,">>nist-memory.dat");
-  printf NIST "$translation_file $current_timestamp %f %f\n",$1,$2;
-  close(NIST);
-  return ($1,$2);
-}
-
-sub nist_normalize_text {
-    my ($norm_text) = @_;
-
-# language-independent part:
-    $norm_text =~ s/<skipped>//g; # strip "skipped" tags
-    $norm_text =~ s/-\n//g; # strip end-of-line hyphenation and join lines
-    $norm_text =~ s/\n/ /g; # join lines
-    $norm_text =~ s/(\d)\s+(\d)/$1$2/g; #join digits
-    $norm_text =~ s/&quot;/"/g;  # convert SGML tag for quote to "
-    $norm_text =~ s/&amp;/&/g;   # convert SGML tag for ampersand to &
-    $norm_text =~ s/&lt;/</g;    # convert SGML tag for less-than to >
-    $norm_text =~ s/&gt;/>/g;    # convert SGML tag for greater-than to <
-
-# language-dependent part (assuming Western languages):
-    $norm_text = " $norm_text ";
-#    $norm_text =~ tr/[A-Z]/[a-z]/ unless $preserve_case;
-    $norm_text =~ s/([\{-\~\[-\` -\&\(-\+\:-\@\/])/ $1 /g;   # tokenize punctuation
-    $norm_text =~ s/([^0-9])([\.,])/$1 $2 /g; # tokenize period and comma unless preceded by a digit
-    $norm_text =~ s/([\.,])([^0-9])/ $1 $2/g; # tokenize period and comma unless followed by a digit
-    $norm_text =~ s/([0-9])(-)/$1 $2 /g; # tokenize dash when preceded by a digit
-    $norm_text =~ s/\s+/ /g; # one space only between words
-    $norm_text =~ s/^\s+//;  # no leading space
-    $norm_text =~ s/\s+$//;  # no trailing space
-
-    return $norm_text;
-}
-
-###### BLEU SCORE
-
-sub get_multi_bleu_score {
-  my($foreign_file,$reference_file,$translation_file) = @_;
-  my @STAT = stat($translation_file);
-  my $current_timestamp = $STAT[9];
-  foreach (@mBLEU) {
-    my ($file,$time,$score,$g1,$g2,$g3,$g4,$bp) = split;
-    if ($file eq $translation_file && $current_timestamp == $time) {
-      return ($score,$g1*100,$g2*100,$g3*100,$g4*100,$bp);
-    }
-  }
-
-  # load reference translation from reference file
-  my @REFERENCE_SENTENCE = `cat $reference_file`; chop(@REFERENCE_SENTENCE);
-  my @TRANSLATION_SENTENCE = `cat $translation_file`; chop(@TRANSLATION_SENTENCE);
-  my %REF;
-  my @FOREIGN_SENTENCE = `cat $foreign_file`; chop(@FOREIGN_SENTENCE);
-  for(my $i=0;$i<=$#TRANSLATION_SENTENCE;$i++) {
-    push @{$REF{$FOREIGN_SENTENCE[$i]}},$REFERENCE_SENTENCE[$i];
-  }
-  # load reference translation from translation memory
-  foreach my $memory (keys %MEMORY) {
-    next if $MEMORY{$memory} ne 'syn_correct sem_correct';
-    my ($foreign,$english) = split(/ .o0O0o. /,$memory);
-    next unless defined($REF{$foreign});
-    push @{$REF{$foreign}},$english;
-  }
-  my(@CORRECT,@TOTAL,$length_translation,$length_reference);
-  # compute bleu
-  for(my $i=0;$i<=$#TRANSLATION_SENTENCE;$i++) {
-    my %REF_NGRAM = ();
-    my @WORD = split(/ /,$TRANSLATION_SENTENCE[$i]);
-    my $length_translation_this_sentence = scalar(@WORD);
-    my ($closest_diff,$closest_length) = (9999,9999);
-    foreach my $reference (@{$REF{$FOREIGN_SENTENCE[$i]}}) {
-      my @WORD = split(/ /,$reference);
-      my $length = scalar(@WORD);
-      if (abs($length_translation_this_sentence-$length) < $closest_diff) {
-	$closest_diff = abs($length_translation_this_sentence-$length);
-	$closest_length = $length;
-      }
-      for(my $n=1;$n<=4;$n++) {
-	my %REF_NGRAM_N = ();
-	for(my $start=0;$start<=$#WORD-($n-1);$start++) {
-	  my $ngram = "$n";
-	  for(my $w=0;$w<$n;$w++) {
-	    $ngram .= " ".$WORD[$start+$w];
-	  }
-	  $REF_NGRAM_N{$ngram}++;
-	}
-	foreach my $ngram (keys %REF_NGRAM_N) {
-	  if (!defined($REF_NGRAM{$ngram}) || 
-	      $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
-	    $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
-	  }
-	}
-      }
-    }
-    $length_translation += $length_translation_this_sentence;
-    $length_reference += $closest_length;
-    for(my $n=1;$n<=4;$n++) {
-      my %T_NGRAM = ();
-      for(my $start=0;$start<=$#WORD-($n-1);$start++) {
-	my $ngram = "$n";
-	for(my $w=0;$w<$n;$w++) {
-	  $ngram .= " ".$WORD[$start+$w];
-	}
-	$T_NGRAM{$ngram}++;
-      }
-      foreach my $ngram (keys %T_NGRAM) {
-	my $n = 0+$ngram;
-#	print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
-	$TOTAL[$n] += $T_NGRAM{$ngram};
-	if (defined($REF_NGRAM{$ngram})) {
-	  if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
-	    $CORRECT[$n] += $T_NGRAM{$ngram};
-#	    print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
-	  }
-	  else {
-	    $CORRECT[$n] += $REF_NGRAM{$ngram};
-#	    print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
-	  }
-	}
-      }
-    }
-  }
-  my $brevity_penalty = 1;
-  if ($length_translation<$length_reference) {
-    $brevity_penalty = exp(1-$length_reference/$length_translation);
-  }
-  my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) +
-				     my_log( $CORRECT[2]/$TOTAL[2] ) +
-				     my_log( $CORRECT[3]/$TOTAL[3] ) +
-				     my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4);
-
-  open(BLEU,">>mbleu-memory.dat");
-  @STAT = stat($translation_file);
-  printf BLEU "$translation_file $STAT[9] %f %f %f %f %f %f\n",$bleu,$CORRECT[1]/$TOTAL[1],$CORRECT[2]/$TOTAL[2],$CORRECT[3]/$TOTAL[3],$CORRECT[4]/$TOTAL[4],$brevity_penalty;
-  close(BLEU);
-  
-  return ($bleu,
-	  100*$CORRECT[1]/$TOTAL[1],
-	  100*$CORRECT[2]/$TOTAL[2],
-	  100*$CORRECT[3]/$TOTAL[3],
-	  100*$CORRECT[4]/$TOTAL[4],
-	  $brevity_penalty);
-}
-
-sub my_log {
-  return -9999999999 unless $_[0];
-  return log($_[0]);
-}
-
-
-###### SCORE TRANSLATIONS
-
-################################ IN PROGRESS ###############################
-sub compare2
-{
-	&htmlhead("Compare Translations");
-	print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
-	print "<FORM ACTION=\"\" METHOD=POST>\n";
-	print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
-	print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
-	my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-indices' => \%factorIndices);
-	$corpus->writeComparisonPage(\*STDOUT, /^.*$/);
-	print "</FORM>\n";
-}
-
-sub compare {
-  &htmlhead("Compare Translations");
-  print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
-  print "<FORM ACTION=\"\" METHOD=POST>\n";
-  print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
-  print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
-
-  # get sentences
-  my %SENTENCES;
-  my $sentence_count;
-  foreach (keys %in) {
-    if (/^FILE_(.+)$/) {
-      my $file = $1;
-      print "<INPUT TYPE=HIDDEN NAME=\"$file\" VALUE=1>\n";
-      my @SENTENCES;
-      if ($file =~ /.sgm$/) {
-	  @{$SENTENCES{$file}} = `grep '<seg' $in{CORPUS}.$file`;
-	  for(my $i=0;$i<$#{$SENTENCES{$file}};$i++) {
-	      $SENTENCES{$file}[$i] =~ s/^<seg[^>]+> *(\S.+\S) *<\/seg> *$/$1/;
-	  }
-      }
-      else {
-	  @{$SENTENCES{$file}} = `cat $in{CORPUS}.$1`;
-	  chop(@{$SENTENCES{$file}});
-      }
-
-      $sentence_count = scalar @{$SENTENCES{$file}};
-    }
-  }
-  my %REFERENCE;
-  foreach (@SHOW) {
-    if (-e "$in{CORPUS}.$_") {
-      @{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}});
-    }
-  }
-
-  # update memory
-  foreach (keys %in) {
-    next unless /^SYN_SCORE_(.+)_(\d+)$/;
-    next unless $in{"SEM_SCORE_$1_$2"};
-    &store_in_memory($REFERENCE{$FOREIGN}[$2],
-		     $SENTENCES{$1}[$2],
-                     "syn_".$in{"SYN_SCORE_$1_$2"}." sem_".$in{"SEM_SCORE_$1_$2"});
-  }
-
-  # display sentences
-  for(my $i=0;$i<$sentence_count;$i++)
-  {
-    my $evaluation = "";
-    my $show = 0;
-    my $surface = "";
-    foreach my $file (keys %SENTENCES)
-	 {
-      if ($in{SURFACE}) {
-	$SENTENCES{$file}[$i] =~ s/ *$//;
-	$surface = $SENTENCES{$file}[$i] if ($surface eq '');
-	$show = 1 if ($SENTENCES{$file}[$i] ne $surface);
-      }
-      else {
-	my $this_ev = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);
-	$this_ev = "syn_wrong sem_wrong" unless $this_ev;
-	$evaluation = $this_ev if ($evaluation eq '');
-	$show = 1 if ($evaluation ne $this_ev);
-      }
-    }
-    next unless $show;
-    print "<HR>Sentence ".($i+1).":<BR>\n";
-    foreach my $ref (@SHOW) {
-      if (-e "$in{CORPUS}.$ref") {
-	print "<FONT COLOR=$SHOW_COLOR{$ref}>".$REFERENCE{$ref}[$i]."</FONT> (".$FILETYPE{$ref}.")<BR>\n";
-      }
-    }
-    foreach my $file (keys %SENTENCES) {
-      print "<B>$SENTENCES{$file}[$i]</B> ($file)<BR>\n";
-      &color_highlight_ngrams($i,&nist_normalize_text($SENTENCES{$file}[$i]),$REFERENCE{"e"}[$i]);
-      if (0 && $in{WITH_EVAL}) {
-	$evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);
-	print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=correct";
-	print " CHECKED" if ($evaluation =~ /syn_correct/);
-	print "> perfect English\n";
-	print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=wrong";
-	print " CHECKED" if ($evaluation =~ /syn_wrong/);
-	print "> imperfect English<BR>\n";
-	print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=correct";
-	print " CHECKED" if ($evaluation =~ /sem_correct/);
-	print "> correct meaning\n";
-	print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=wrong";
-	print " CHECKED" if ($evaluation =~ /sem_wrong/);
-	print "> incorrect meaning<BR>\n";
-      }
-    }
-  }
-  print "<P><INPUT TYPE=SUBMIT VALUE=\"Add evaluation\">\n";
-  print "</FORM>\n";
-}
-
-###### MEMORY SUBS
-
-sub load_memory {
-  open(MEMORY,"evaluation-memory.dat") or return;
-  while(<MEMORY>) {
-    chop;
-    my($foreign,$translation,$evaluation) = split(/ \.o0O0o\. /);
-    $evaluation = 'syn_correct sem_correct' if ($evaluation eq 'correct');
-    $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;
-  }
-  close(MEMORY);
-}
-
-sub get_score_from_memory {
-  my($foreign_file,$translation_file) = @_;
-  my $unknown=0;
-  my $correct=0;
-  my $just_syn=0;
-  my $just_sem=0;
-  my $wrong=0;
-  my @FOREIGN = `cat $foreign_file`; chop(@FOREIGN);
-  my @TRANSLATION = `cat $translation_file`; chop(@TRANSLATION);
-  for(my $i=0;$i<=$#FOREIGN;$i++) {
-    if (my $evaluation = &get_from_memory($FOREIGN[$i],$TRANSLATION[$i])) {
-      if ($evaluation eq 'syn_correct sem_correct') { $correct++ }
-      elsif ($evaluation eq 'syn_correct sem_wrong') { $just_syn++ }
-      elsif ($evaluation eq 'syn_wrong sem_correct') { $just_sem++ }
-      elsif ($evaluation eq 'syn_wrong sem_wrong') { $wrong++ }
-      else { $unknown++; }
-    }
-    else { $unknown++; }
-  }
-  return($correct,$just_syn,$just_sem,$wrong,$unknown);
-}
-
-sub store_in_memory {
-  my($foreign,$translation,$evaluation) = @_;
-  &trim(\$translation);
-  return if $MEMORY{"$foreign .o0O0o. $translation"} eq $evaluation;
-  $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;
-  open(MEMORY,">>evaluation-memory.dat") or die "store_in_memory(): couldn't open 'evaluation-memory.dat' for append\n";
-  print MEMORY "$foreign .o0O0o. $translation .o0O0o. $evaluation\n";
-  close(MEMORY);
-}
-
-sub get_from_memory {
-  my($foreign,$translation) = @_;
-  &trim(\$translation);
-  return $MEMORY{"$foreign .o0O0o. $translation"};
-}
-
-sub trim {
-  my($translation) = @_;
-  $$translation =~ s/ +/ /g;
-  $$translation =~ s/^ +//;
-  $$translation =~ s/ +$//;
-#  $$translation =~ s/ +[\.]$//;
-}
-
-sub load_descriptions {
-  open(FD,"file-descriptions") or die "load_descriptions(): couldn't open 'file-descriptions' for read\n";
-  while(<FD>) {
-    my($file,$description) = split(/\s+/,$_,2);
-    $FILEDESC{$file} = $description;
-  }
-  close(FD);
-}
-
-#read config file giving names of corpi and fill factor-index map
-#arguments: filename to read
-#return: hash of corpus names to hashrefs of factor names to indices
-sub loadFactorIndices
-{
-	my $filename = shift;
-	my %data = ();
-	open(INFILE, "<$filename") or die "loadFactorIndices(): couldn't open '$filename' for read\n";
-	while(my $line = <INFILE>)
-	{
-		if($line =~ /^\#/) {next;} #skip comment lines
-		my @tokens = split(/\s+/, $line);
-		my $corpusName = shift @tokens;
-		$data{$corpusName} = {};
-		for(my $i = 0; $i < scalar(@tokens); $i++)
-		{
-			$data{$corpusName}->{$tokens[$i]} = $i;
-		}
-	}
-	close(INFILE);
-	return %data;
-}
-
-###### SUBS
-
-sub htmlhead {
-  print <<"___ENDHTML";
-Content-type: text/html
-
-<HTML><HEAD>
-<TITLE>MTEval: $_[0]</TITLE>
-<SCRIPT LANGUAGE="JavaScript">
-
-<!-- hide from old browsers
-
-function FieldInfo(field,description) {
-  popup = window.open("","popDialog","height=500,width=600,scrollbars=yes,resizable=yes");
-  popup.document.write("<HTML><HEAD><TITLE>"+field+"</TITLE></HEAD><BODY BGCOLOR=#FFFFCC><CENTER><B>"+field+"</B><HR SIZE=2 NOSHADE></CENTER><PRE>"+description+"</PRE><CENTER><FORM><INPUT TYPE='BUTTON' VALUE='Okay' onClick='self.close()'></FORM><CENTER></BODY></HTML>");
-  popup.focus();
-  popup.document.close();
-}
-
-<!-- done hiding -->
-
-</SCRIPT>
-</HEAD>
-<BODY BGCOLOR=white>
-<H2>Evaluation Tool for Machine Translation<BR>$_[0]</H2>
-___ENDHTML
-}
-
-
-############################# parts of cgi-lib.pl
-
-
-sub ReadParse {
-  my ($i, $key, $val);
-
-  # Read in text
-  my $in;
-  if (&MethGet) {
-    $in = $ENV{'QUERY_STRING'};
-  } elsif (&MethPost) {
-    read(STDIN,$in,$ENV{'CONTENT_LENGTH'});
-  }
-
-  my @in = split(/[&;]/,$in);
-
-  foreach $i (0 .. $#in) {
-    # Convert plus's to spaces
-    $in[$i] =~ s/\+/ /g;
-
-    # Split into key and value.
-    ($key, $val) = split(/=/,$in[$i],2); # splits on the first =.
-
-    # Convert %XX from hex numbers to alphanumeric
-    $key =~ s/%(..)/pack("c",hex($1))/ge;
-    $val =~ s/%(..)/pack("c",hex($1))/ge;
-
-    # Associate key and value
-    $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator
-    $in{$key} .= $val;
-
-  }
-
-  return scalar(@in);
-}
-
-sub MethGet {
-  return ($ENV{'REQUEST_METHOD'} eq "GET");
-}
-
-sub MethPost {
-  return ($ENV{'REQUEST_METHOD'} eq "POST");
-}