diff options
-rw-r--r-- | ConfusionMatrix.lua | 2 | ||||
-rw-r--r-- | DataList.lua | 58 | ||||
-rw-r--r-- | DataSet.lua | 351 | ||||
-rw-r--r-- | StochasticTrainer.lua | 8 | ||||
-rw-r--r-- | init.lua | 4 | ||||
-rw-r--r-- | nnx-1.0-1.rockspec | 3 |
6 files changed, 421 insertions, 5 deletions
diff --git a/ConfusionMatrix.lua b/ConfusionMatrix.lua index 78809ca..bf581bf 100644 --- a/ConfusionMatrix.lua +++ b/ConfusionMatrix.lua @@ -37,7 +37,7 @@ function ConfusionMatrix:updateValids() self.averageValid = 0 local nvalids = 0 for t = 1,self.mat:size(1) do - if not xlua.isNaN(self.valids[t]) then + if not sys.isNaN(self.valids[t]) then self.averageValid = self.averageValid + self.valids[t] nvalids = nvalids + 1 end diff --git a/DataList.lua b/DataList.lua new file mode 100644 index 0000000..5b95c48 --- /dev/null +++ b/DataList.lua @@ -0,0 +1,58 @@ +-------------------------------------------------------------------------------- +-- DataList: a container for plain DataSets. +-- Each sub dataset represents one class. +-- +-- Authors: Corda, Farabet +-------------------------------------------------------------------------------- + +local DataList, parent = torch.class('nn.DataList','nn.DataSet') + +function DataList:__init() + parent.__init(self) + self.datasets = {} + self.nbClass = 0 + self.ClassName = {} + self.nbSamples = 0 +end + +function DataList:__tostring__() + str = 'DataList' + str = str .. ' + nb samples : '..self.nbSamples + str = str .. ' + nb classes : '..self.nbClass + return str +end + +function DataList:__index__(key) + if type(key)=='number' and self.nbClass>0 and key <= self.nbSamples then + local class = ((key-1) % self.nbClass) + 1 + local classSize = self.datasets[class]:size() + local elmt = math.floor((key-1)/self.nbClass) + 1 + elmt = ((elmt-1) % classSize) + 1 + + -- create target vector on the fly + self.datasets[class][elmt][2] = torch.Tensor(1,1,self.nbClass):fill(-1) + self.datasets[class][elmt][2][1][1][class] = 1 + + -- apply hook on sample + local sample = self.datasets[class][elmt] + if self.hookOnSample then + sample = self.hookOnSample(self,sample) + end + + return sample,true + end + -- if key is not a number this should return nil + return rawget(self, key) +end + +function DataList:appendDataSet(dataSet,className) + table.insert(self.datasets,dataSet) + if self.nbSamples == 0 then + self.nbSamples = dataSet:size() + else + self.nbSamples = math.floor(math.max(self.nbSamples/self.nbClass,dataSet:size())) + end + self.nbClass = self.nbClass + 1 + self.nbSamples = self.nbSamples * self.nbClass + table.insert(self.ClassName,self.nbClass,className) +end diff --git a/DataSet.lua b/DataSet.lua new file mode 100644 index 0000000..693c524 --- /dev/null +++ b/DataSet.lua @@ -0,0 +1,351 @@ +-------------------------------------------------------------------------------- +-- DataSet: a class to handle standard datasets. +-- +-- Authors: Corda, Farabet +-------------------------------------------------------------------------------- + +local lDataSet = torch.class('nn.DataSet') + +function lDataSet:__init(...) + xlua.require('image',true) + self.nbSamples = 0 + if select('#',...) > 0 then + self:load(...) + end +end + +function lDataSet:size() + return self.nbSamples +end + +function lDataSet:load(...) + -- parse args + local args, dataSetFolder, nbSamplesRequired, cacheFile, channels, sampleSize + = xlua.unpack( + {...}, + 'DataSet.load', nil, + {arg='dataSetFolder', type='string', help='path to dataset', req=true}, + {arg='nbSamplesRequired', type='number', help='number of patches to load', default='all'}, + {arg='cacheFile', type='string', help='path to file to cache files'}, + {arg='channels', type='number', help='nb of channels', default=1}, + {arg='sampleSize', type='table', help='resize all sample: {w,h}'} + ) + self.cacheFileName = cacheFile or self.cacheFileName + + -- Clear current dataset + self:emptySet() + + -- Then try to find if cache file exists + -- the base name of this file can be provided by useCacheFile() + -- and the suffixe is the nb of samples needed, 'all' if not specified + local fileName + local datasetLoadedFromFile = false + if (self.cacheFileName ~= nil) then + fileName = self.cacheFileName .. '-' .. nbSamplesRequired + if sys.filep(fileName) then + -- File found + print('<DataSet> Loading samples from cached file ' .. fileName) + f = torch.DiskFile(fileName, 'rw') + self:read(f) + f.close(f) + datasetLoadedFromFile = true + end + end + + -- If dataset couldn't be loaded from cache, load it + if (datasetLoadedFromFile == false) then + self:append{dataSetFolder=dataSetFolder, channels=channels, + nbSamplesRequired=nbSamplesRequired, + sampleSize=sampleSize} + -- if cache name given, create it now + if (fileName ~= nil) then + print('<DataSet> Dumping dataset to cache file ' .. fileName .. ' for fast retrieval') + f = torch.DiskFile(fileName, 'rw') + self:write(f) + f.close(f) + end + end +end + +function lDataSet:emptySet(dataSetFolder) + for i = 1,table.getn(self) do + self[i] = nil + end + self.nbSamples = 0 +end + +function lDataSet:apply(toapply) + print('<DataSet> Applying function to dataset') + for i=1,self.nbSamples do + xlua.progress(i, self.nbSamples) + self[i][1] = toapply(self[i][1]) + end +end + +function lDataSet:cropAndResize(side) + for i=1,self.nbSamples do + local newSample = torch.Tensor(1, side, side) + local initSide = math.min(self[i][1]:size()[1], self[i][1]:size()[2]) + local x1 = math.floor((self[i][1]:size(3) - initSide) / 2) + local y1 = math.floor((self[i][1]:size(2) - initSide) / 2) + local x2 = x1 + initSide + local y2 = y1 + initSide + image.crop(newSample,self[i][1],x1,y1,x2,y2) + self[i][1] = newSample + end +end + +function lDataSet:add(args) + local input = args.input + local output = args.output + self.nbSamples = self.nbSamples + 1 + self[self.nbSamples] = {input, output} +end + +function lDataSet:append(...) + -- parse args + local args, dataSetFolder, channels, nbSamplesRequired, useLabelPiped, + useDirAsLabel, nbLabels, sampleSize + = xlua.unpack( + {...}, + 'DataSet:append', 'append a folder to the dataset object', + {arg='dataSetFolder', type='string', help='path to dataset', req=true}, + {arg='channels', type='number', help='number of channels for the image to load', default=3}, + {arg='nbSamplesRequired', type='number', help='max number of samples to load'}, + {arg='useLabelPiped', type='boolean', help='flag to use the filename as output value',default=false}, + {arg='useDirAsLabel', type='boolean', help='flag to use the directory as label',default=false}, + {arg='nbLabels', type='number', help='how many classes (goes with useDirAsLabel)', default=1}, + {arg='sampleSize', type='table', help='resize all sample: {w,h}'} + ) + -- parse args + local files = sys.dir(dataSetFolder) + + print('<DataSet> Loading samples from ' .. args.dataSetFolder .. '/') + + -- nb of samples to load: + local toLoad = table.getn(files) + if (nbSamplesRequired ~= nil and nbSamplesRequired ~= 'all') then + toLoad = math.min(toLoad, nbSamplesRequired) + end + local loaded = 0 + + for k,file in pairs(files) do + local input, inputs, rawOutput + + -- disp progress + xlua.progress(k, toLoad) + + if (string.find(file,'.png')) then + -- load the PNG into a new Tensor + pathToPng = sys.concat(dataSetFolder, file) + input = image.loadPNG(pathToPng,channels) + + -- parse the file name and set the ouput from it + rawOutput = sys.split(string.gsub(file, ".png", ""),'|') + + elseif (string.find(file,'.p[pgn]m')) then + -- load the PPM into a new Tensor + pathToPpm = sys.concat(dataSetFolder, file) + input = image.loadPPM(pathToPpm,channels) + + -- parse the file name and set the ouput from it + rawOutput = sys.split(string.gsub(file, ".p[pgn]m", ""),'|') + + elseif (string.find(file,'.jpg')) then + -- load the JPG into a new Tensor + pathToPpm = sys.concat(dataSetFolder, file) + input = image.load(pathToPpm,channels) + + -- parse the file name and set the ouput from it + rawOutput = sys.split(string.gsub(file, ".jpg", ""),'|') + end + + -- if image loaded then add into the set + if (input and rawOutput) then + table.remove(rawOutput,1) --remove file ID + + -- put input in 3D tensor + input:resize(channels, input:size(2), input:size(3)) + + -- rescale ? + if sampleSize then + inputs = torch.Tensor(channels, sampleSize[2], sampleSize[3]) + image.scale(input, inputs, 'bilinear') + else + inputs = input + end + + -- and generate output + local output = torch.Tensor(table.getn(rawOutput), 1) + for i,v in ipairs(rawOutput) do + output[i][1]=v + end + + -- add input/output in the set + self.nbSamples = self.nbSamples + 1 + self[self.nbSamples] = {inputs, output} + + loaded = loaded + 1 + if (loaded == toLoad) then + break + end + end + + -- some cleanup, for memory + collectgarbage() + end +end + +function lDataSet:appendDataSet(dataset) + print("<DataSet> Merging dataset of size = "..dataset:size().. + " into dataset of size = "..self:size()) + for i = 1,dataset:size() do + self.nbSamples = self.nbSamples + 1 + self[self.nbSamples] = {} + self[self.nbSamples][1] = torch.Tensor(dataset[i][1]):copy(dataset[i][1]) + if (dataset[i][2] ~= nil) then + self[self.nbSamples][2] = torch.Tensor(dataset[i][2]):copy(dataset[i][2]) + end + end +end + +function lDataSet:popSubset(args) + -- parse args + local nElement = args.nElement + local ratio = args.ratio or 0.1 + local subset = args.outputSet or DataSet() + + -- get nb of samples to pop + local start_index + if (nElement ~= nil) then + start_index = self:size() - nElement + 1 + else + start_index = math.floor((1-ratio)*self:size()) + 1 + end + + -- info + print('<DataSet> Popping ' .. self:size() - start_index + 1 .. ' samples dataset') + + -- extract samples + for i = self:size(), start_index, -1 do + subset.nbSamples = subset.nbSamples + 1 + subset[subset.nbSamples] = {} + subset[subset.nbSamples][1] = torch.Tensor(self[i][1]):copy(self[i][1]) + subset[subset.nbSamples][2] = torch.Tensor(self[i][2]):copy(self[i][2]) + self[i] = nil + self.nbSamples = self.nbSamples - 1 + end + + -- return network + return subset +end + +function lDataSet:resize(w,h) + self.resized = true + xlua.error('not implemented yet', 'DataSet') +end + +function lDataSet:shuffle() + if (self.nbSamples == 0) then + print('Warning, trying to shuffle empty Dataset, no effect...') + return + end + local n = self.nbSamples + + while n > 2 do + local k = math.random(n) + -- swap elements + self[n], self[k] = self[k], self[n] + n = n - 1 + end +end + +function lDataSet:display(args) -- opt args : scale, nbSamples + -- arg list: + local min, max, nbSamples, scale, w + local title = 'DataSet' + local resX = 800 + local resY = 600 + -- parse args: + args = args or {} + min = args.min + max = args.max + nbSamples = args.nbSamples or self.nbSamples + nbSamples = math.min(nbSamples,self.nbSamples) + scale = args.scale + title = args.title or title + w = window or gfx.Window(resX, resY, title) + resX = args.resX or resX + resY = args.resY or resY + print('<DataSet> displaying ' .. nbSamples .. ' samples') + + local step_x = 0 + local step_y = 0 + self.window = w + + if (scale == nil) then + --get the best scale to feet all data + local sizeX = self[1][1]:size()[1] + local sizeY = self[1][1]:size()[2] + scale = math.sqrt(resX*resY/ (sizeX*sizeY*nbSamples)) + end + + for i=1,nbSamples do + if (step_x >= resX) then + step_x = 0 + step_y = step_y + self[i][1]:size()[2]*scale + if (step_y >= resY) then + break + end + end + local tmp = image.scaleForDisplay{tensor=self[i][1], min=min, max=max} + w:blit(tmp, scale, step_x, step_y, title) + step_x = step_x + self[i][1]:size()[1]*scale + end +end + +function lDataSet:__show() + self:display{nbSamples=100} +end + +function lDataSet:useCacheFile(fileName) + self.cacheFileName = fileName +end + + +function lDataSet:save(fileName) + local fileName = fileName or self.fileName + self.fileName = fileName + print('<DataSet> Saving DataSet to:',fileName) + local file = torch.DiskFile(fileName, 'w') + self:write(file) + file:close() +end + +function lDataSet:open(fileName) + local fileName = fileName or self.fileName + self.fileName = fileName + print('<DataSet> Loading DataSet from File:',fileName) + local file = torch.DiskFile(fileName, 'r') + self:read(file) + file:close() + print('<DataSet> '..self.nbSamples..' samples loaded') +end + +function lDataSet:write(file) + file:writeBool(self.resized) + file:writeInt(self.nbSamples) + -- write all the samples + for i = 1,self.nbSamples do + file:writeObject(self[i]) + end +end + +function lDataSet:read(file) + self.resized = file:readBool() + self.nbSamples = file:readInt() + -- read all the samples + for i = 1,self.nbSamples do + self[i] = file:readObject() + end +end diff --git a/StochasticTrainer.lua b/StochasticTrainer.lua index 526ec49..a1a78bc 100644 --- a/StochasticTrainer.lua +++ b/StochasticTrainer.lua @@ -23,7 +23,7 @@ function StochasticTrainer:__init(...) {arg='learningRateDecay', type='number', help='learning rate decay (rate = rate * (1-decay), at each epoch)', default=0}, {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0}, {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW + momentum*prev(dE/dW))', default=0}, - {arg='maxIteration', type='number', help='maximum number of epochs', default=50}, + {arg='maxEpoch', type='number', help='maximum number of epochs', default=50}, {arg='maxTarget', type='boolean', help='replaces an CxHxW target map by a HxN target of max values (for NLL criterions)', default=false}, {arg='dispProgress', type='boolean', help='display a progress bar during training/testing', default=true}, @@ -65,7 +65,7 @@ function StochasticTrainer:train(dataset) for t = 1,dataset:size() do -- disp progress if self.dispProgress then - xlua.dispProgress(t, dataset:size()) + xlua.progress(t, dataset:size()) end -- load new sample @@ -117,7 +117,7 @@ function StochasticTrainer:train(dataset) end -- weight decay ? - if self.weightDecay ~= 0 then + if self.weightDecay ~= 0 and module.decayParameters then module:decayParameters(self.weightDecay) end @@ -175,7 +175,7 @@ function StochasticTrainer:test(dataset) for t = 1,dataset:size() do -- disp progress if self.dispProgress then - xlua.dispProgress(t, dataset:size()) + xlua.progress(t, dataset:size()) end -- get new sample @@ -74,3 +74,7 @@ torch.include('nnx', 'SuperCriterion.lua') -- trainers: torch.include('nnx', 'Trainer.lua') torch.include('nnx', 'StochasticTrainer.lua') + +-- datasets: +torch.include('nnx', 'DataSet.lua') +torch.include('nnx', 'DataList.lua')
\ No newline at end of file diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec index 94f9d9c..785fad1 100644 --- a/nnx-1.0-1.rockspec +++ b/nnx-1.0-1.rockspec @@ -22,6 +22,7 @@ description = { dependencies = { "lua >= 5.1", "torch", + "sys", "xlua", "lunit" } @@ -71,6 +72,8 @@ build = { install_files(/lua/nnx SuperCriterion.lua) install_files(/lua/nnx Trainer.lua) install_files(/lua/nnx StochasticTrainer.lua) + install_files(/lua/nnx DataSet.lua) + install_files(/lua/nnx DataList.lua) add_subdirectory (test) install_targets(/lib nnx) ]], |