mert/tests/testmert.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220

#!/usr/bin/env python

#
# Mert test suite.
# Created by Barry Haddow
#
# This script downloads data from www.statmt.org, and runs tests of mert,
# comparing weights against expected and producing timing  information.
#

import ConfigParser
import logging
import optparse
import os
import os.path
import re
import string
import subprocess
import sys
import time
import urllib
import warnings

warnings.filterwarnings(action="ignore",message="tmpnam")

log = logging.getLogger("testmert")
dataurl = "http://www.statmt.org/moses/reg-testing/mert/"

def getMertDirectory():
    scriptdir = os.path.dirname(__file__)
    if not os.path.isabs(scriptdir):
        scriptdir = os.path.join(os.getcwd(),scriptdir)
        scriptdir = os.path.normpath(scriptdir)
    return os.path.dirname(scriptdir)

class Mert:
    """Controls operation of mert loop"""
    def __init__(self,weightfile,reffile,scorertype="BLEU",retries="20"):
        self.reffile = reffile
        self.scorertype = scorertype
        self.workingdir = os.tmpnam()
        os.mkdir(self.workingdir)
        self.mertdir = getMertDirectory()
        self.iteration = 1 # iteration number of inner loop
        self.retries = retries
        self.extractortimes = []
        self.merttimes = []
        os.system("cp %s %s" % \
            (weightfile,self.getFileName("weights",self.iteration-1)))
        # calculate dimension from weight file
        weightfh = open(weightfile)
        line = weightfh.readline()
        self.dimension = repr(len(line.split()))
        weightfh.close()

    def innerLoop(self, nbestfile):
        """Perform iteration of the inner loop. Returns location of 
        weights file"""
        log.debug("Inner loop: %d" % self.iteration)
        # run extractor
        scorefile = self.getFileName("scores",self.iteration)
        featurefile = self.getFileName("features",self.iteration)
        weightinfile = self.getFileName("weights",self.iteration-1)
        cmd = [os.path.join(self.mertdir,"extractor"),"--reference", 
          self.reffile, "--nbest",nbestfile,"--sctype",self.scorertype,\
          "--scfile", scorefile,"--ffile",featurefile]
        if self.iteration > 1:
            prevscorefile = self.getFileName("scores",self.iteration-1)
            prevfeaturefile = self.getFileName("features",self.iteration-1)
            cmd = cmd + ["--prev-scfile",prevscorefile , "--prev-ffile", prevfeaturefile ]
        log.debug("Running: " + string.join(cmd))
        start = time.time()
        ret = subprocess.call(cmd)
        self.extractortimes.append(time.time()-start)
        if ret != 0:
            raise RuntimeError("Failed to execute extractor: return code %d" % ret)

        # run mert
        cmd = [os.path.join(self.mertdir,"mert"),"--sctype",\
         self.scorertype, "--scfile", scorefile, "--ffile", featurefile,\
         "--ifile",weightinfile, "-d", self.dimension,"-n",self.retries]
        log.debug("Running: " + string.join(cmd))
        start = time.time()
        ret = subprocess.call(cmd, cwd=self.workingdir)
        self.merttimes.append(time.time()-start)
        if ret != 0:
            raise RuntimeError("Failed to execute mert: return code %d" % ret)
        weightoutfile = self.getFileName("weights",self.iteration)
        os.system("mv %s %s" % (os.path.join(self.workingdir,\
            "weights.txt"), weightoutfile))
        self.iteration = self.iteration + 1
        return weightoutfile

    def getFileName(self,stem,iteration):
        return os.path.join(self.workingdir,stem+"."+repr(iteration))

    def cleanup(self):
        os.system("rm -rf %s" % self.workingdir)

class Test:
    """A mert test"""
    def __init__(self,datadir):
        self.datadir = datadir
        config = ConfigParser.ConfigParser()
        config.read(os.path.join(datadir,"config"))
        self.iterations = config.getint("test","iterations")
        log.debug("Test iterations: %d" % self.iterations)
        self.tolerance = 0.00001

    def run(self):
        """Run the test, return a boolean indicating success or failure"""
        weightfile = os.path.join(self.datadir,"weights.0")
        reffile = os.path.join(self.datadir,"reference")
        self.mert = Mert(weightfile,reffile)
        self.diffs = []
        for i in range(self.iterations):
            nbestfile = os.path.join(self.datadir,"nbest." + repr(i+1) + ".gz")
            weightfile = self.mert.innerLoop(nbestfile)
            expectedweightfile = os.path.join(self.datadir,"weights."+repr(i+1))
            expectedweights = self.getWeights(expectedweightfile)
            weights = self.getWeights(weightfile)
            log.debug("Expected weights: " + repr(expectedweights))
            log.debug("Actual weights: " + repr(weights))
            diff = False
            for j in range(len(weights)):
                if abs(weights[j]-expectedweights[j]) > self.tolerance:
                    log.debug("Weight %d does not match: " % j)
                    diff = True
                    break
            else:
                log.debug("Weights match expected")
            self.diffs.append(diff)
        self.mert.cleanup()
    
    def getWeights(self,weightfile):
        """Load a weight set from a file"""
        weightfh = open(weightfile)
        line = weightfh.readline()
        weights = [float(w) for w in line.split()]
        weightfh.close()
        return weights

    def printSummary(self):
        """Print a summary of the results"""
        print "RESULTS: ", self.datadir
        print "Weights matching expected: ",
        for diff in self.diffs:
            print not diff,
        print
        print "Extractor times: ",
        for etime in self.mert.extractortimes:
            print "%7.3f" % etime,
        print "ave: %7.3f" % (sum(self.mert.extractortimes)/self.iterations) 
        print "Optimisation times: ",
        for mtime in self.mert.merttimes:
            print "%7.3f" % mtime,
        print "ave: %7.3f" % (sum(self.mert.merttimes)/self.iterations)

def getTestList():
   listfh  = urllib.urlopen(os.path.join(dataurl,"tests.txt")) 
   tests = []
   for line in listfh:
       tests.append(line[:-1])
   listfh.close()
   return tests

def list():
    """List all available tests"""
    tests = getTestList()
    print "Available tests:"
    for test in tests:
        print test

def runAll(datadir):
    """Run all available tests"""
    for test in getTestList():
        runTest(test,datadir)

def runTest(testname,datadir):
    log.info("Test started: " + testname)
    if not os.path.isdir(datadir):
        os.mkdir(datadir)
    testdir = os.path.join(datadir,testname)
    # Check if the test exists, download if necessary
    if os.path.isdir(testdir):
        log.debug("Directory %s already exists: not downloading" % testdir)
    else:
        testurl = os.path.join(dataurl,testname + ".tgz")
        log.debug("Retrieving test data from " + testurl)
        (arname,headers) = urllib.urlretrieve(testurl)
        os.system("cd %s; tar zxf %s" % (datadir,arname))
        log.debug("Done")
        if not os.path.isdir(testdir):
            raise RuntimeError("Test %s did not unpack properly" % testname)
    test = Test(testdir)
    test.run()
    test.printSummary()
    log.info("Test ended: " + testname)

def main():
    logging.basicConfig(level = logging.DEBUG)
    parser = optparse.OptionParser("usage: %prog [options] list|run|runall [testname]")
    parser.add_option("-d", "--datadir", action="store", default="data",
        dest="datadir", help="Data directory to use", metavar="DIR")
    (options,args) = parser.parse_args() 
    if len(args) < 1:
        parser.error("Need to specify an action")
    if args[0] == "list":
        list()
    else:
        datadir = options.datadir
        if args[0] == "runall":
            runAll(datadir)
        elif args[0] == "run":
            if len(args) < 2:
                parser.error("The run action requires a test name")
            runTest(args[1],datadir)

if __name__ == "__main__":
    main()