Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-17 16:31:52 +0400
committereherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-17 16:31:52 +0400
commitc646717009c0622054bfbe82556c284b785bf69c (patch)
tree3e31c3bfbe6fffed45840656114b134eef84f138 /scripts/analysis
parent55d63ca63f5d0d9d428ecc321393095733a782ef (diff)
trying to fix caching
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@775 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'scripts/analysis')
-rw-r--r--scripts/analysis/smtgui/Corpus.pm26
-rwxr-xr-xscripts/analysis/smtgui/newsmtgui.cgi3
2 files changed, 15 insertions, 14 deletions
diff --git a/scripts/analysis/smtgui/Corpus.pm b/scripts/analysis/smtgui/Corpus.pm
index f4629d5a2..2a7493b39 100644
--- a/scripts/analysis/smtgui/Corpus.pm
+++ b/scripts/analysis/smtgui/Corpus.pm
@@ -67,6 +67,8 @@ sub new
bless $self, $class;
$self->locateFiles($refFileDescs); #find all relevant files in the current directory; set filenames and descriptions
$self->loadCacheFile();
+ print STDERR "on load:\n";
+ $self->printDetails();
return $self;
}
@@ -231,7 +233,7 @@ sub calcOverallPWER
}
#arguments: system name, factor name to consider (default 'surf')
-#return: BLEU score, n-gram precisions, brevity penalty
+#return: array of (BLEU score, n-gram precisions, brevity penalty)
sub calcBLEU
{
my ($self, $sysname, $factorName) = (shift, shift, 'surf');
@@ -365,6 +367,7 @@ sub statisticallyTestBLEUResults
sub calcPerplexity
{
my ($self, $sysname, $factorName) = @_;
+ print STDERR "ppl $sysname $factorName\n";
#check in-memory cache first
if(exists $self->{'perplexity'}->{$sysname} && exists $self->{'perplexity'}->{$sysname}->{$factorName})
{
@@ -440,7 +443,9 @@ sub statisticallyCompareSystemResults
$dev = sqrt($dev / (($j - 1) * $j)); #need the extra j because the variance of Xbar is 1/n the variance of X
#t test
my $t = $mean / $dev; #this isn't the standard form; remember the difference of the means is equal to the mean of the differences
- push @$tConfidences, getUpperBoundPValue($t);
+ my $cc = getUpperBoundPValue($t);
+ print STDERR "comparing at n=$i: mu $mean, sigma $dev, t $t -> conf >= " . (1 - $cc) . "\n";
+ push @$tConfidences, $cc;
push @$tWinningIndices, ($mean > 0) ? 0 : 1;
#sign test
my %binomialCoefficients; #map (n+ - n-) to a coefficient; compute on the fly!
@@ -526,7 +531,7 @@ sub writeCacheFile
print CACHEFILE "$sysname $factorName " . join(' ', @{$self->{'bleuScores'}->{$sysname}->{$factorName}->[0]});
foreach my $sentenceBLEU (@{$self->{'bleuScores'}->{$sysname}->{$factorName}->[1]})
{
- print CACHEFILE "; " . join(' ', @$sentenceBLEU);
+ print CACHEFILE ";" . join(' ', @$sentenceBLEU);
}
print CACHEFILE "\n";
}
@@ -540,7 +545,7 @@ sub writeCacheFile
print CACHEFILE "$sysname $factorName " . join(' ', @{$self->{'bleuConfidence'}->{$sysname}->{$factorName}->[0]});
foreach my $subsetConfidence (@{$self->{'bleuConfidence'}->{$sysname}->{$factorName}->[1]})
{
- print CACHEFILE "; " . join(' ', @$subsetConfidence);
+ print CACHEFILE ";" . join(' ', @$subsetConfidence);
}
print CACHEFILE "\n";
}
@@ -553,7 +558,7 @@ sub writeCacheFile
{
foreach my $factorName (keys %{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}})
{
- print CACHEFILE "$sysname1 $sysname2 $factorName " . join('; ', map {join(' ', @$_)} @{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}}) . "\n";
+ print CACHEFILE "$sysname1 $sysname2 $factorName " . join(';', map {join(' ', @$_)} @{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}}) . "\n";
}
}
}
@@ -641,8 +646,10 @@ sub loadCacheFile
if(!exists $self->{'bleuScores'}->{$sysname}) {$self->{'bleuScores'}->{$sysname} = {};}
if(!exists $self->{'bleuScores'}->{$sysname}->{$factorName}) {$self->{'bleuScores'}->{$sysname}->{$factorName} = [[], []];}
my @stats = map {my @tmp = split(/\s+/, $_); \@tmp;} split(/;/, $rest);
- $self->{'bleuScores'}->{$sysname}->{$factorName}->[0] = shift @stats;
- $self->{'bleuScores'}->{$sysname}->{$factorName}->[1] = \@stats;
+ print STDERR "bleu 1: " . join(', ', @{shift @stats}) . "\n";
+ print STDERR "bleu 2: " . join(' ', map {"{" . join(', ', @$_) . "}"} @stats) . "\n";
+ # $self->{'bleuScores'}->{$sysname}->{$factorName}->[0] = shift @stats;
+ # $self->{'bleuScores'}->{$sysname}->{$factorName}->[1] = \@stats;
}
elsif($mode eq 'bstats')
{
@@ -704,8 +711,6 @@ sub loadCacheFile
}
}
close(CACHEFILE);
-# print STDERR "\nafter load cache:\n";
-# $self->printDetails();
}
#arguments: cache type ('bleu' | ...), system name, factor name
@@ -727,11 +732,8 @@ sub flushCache
sub cacheIsCurrentForFile
{
my ($self, $ext) = @_;
- print STDERR "cicff($self, $ext)\n";
return 0 if !exists $self->{'fileCtimes'}->{$ext} ;
- print STDERR " $ext exists in ctimes\n";
my @liveStats = stat($self->{'corpusName'} . ".$ext");
- print STDERR " time stat: " . $liveStats[9] . "\n";
return ($liveStats[9] <= $self->{'fileCtimes'}->{$ext}) ? 1 : 0;
}
diff --git a/scripts/analysis/smtgui/newsmtgui.cgi b/scripts/analysis/smtgui/newsmtgui.cgi
index 0e5cd24b5..5ead59529 100755
--- a/scripts/analysis/smtgui/newsmtgui.cgi
+++ b/scripts/analysis/smtgui/newsmtgui.cgi
@@ -83,7 +83,7 @@ sub view_corpus {
# find corpora in evaluation directory
my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-info_line' => $factorData{$in{CORPUS}});
- $corpus->printDetails();
+# $corpus->printDetails(); #debugging info
my ($sentence_count, $lineInfo);
if(-e "$in{CORPUS}.f")
@@ -133,7 +133,6 @@ sub view_corpus {
foreach $_ (@filenames)
{
next if -d $_; #if is a directory
- print STDERR "file: $_\n";
my $sgm = 0;
if (/.sgm$/)
{