Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/clementfarabet/lua---nnx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ConfusionMatrix.lua2
-rw-r--r--DataList.lua58
-rw-r--r--DataSet.lua351
-rw-r--r--StochasticTrainer.lua8
-rw-r--r--init.lua4
-rw-r--r--nnx-1.0-1.rockspec3
6 files changed, 421 insertions, 5 deletions
diff --git a/ConfusionMatrix.lua b/ConfusionMatrix.lua
index 78809ca..bf581bf 100644
--- a/ConfusionMatrix.lua
+++ b/ConfusionMatrix.lua
@@ -37,7 +37,7 @@ function ConfusionMatrix:updateValids()
self.averageValid = 0
local nvalids = 0
for t = 1,self.mat:size(1) do
- if not xlua.isNaN(self.valids[t]) then
+ if not sys.isNaN(self.valids[t]) then
self.averageValid = self.averageValid + self.valids[t]
nvalids = nvalids + 1
end
diff --git a/DataList.lua b/DataList.lua
new file mode 100644
index 0000000..5b95c48
--- /dev/null
+++ b/DataList.lua
@@ -0,0 +1,58 @@
+--------------------------------------------------------------------------------
+-- DataList: a container for plain DataSets.
+-- Each sub dataset represents one class.
+--
+-- Authors: Corda, Farabet
+--------------------------------------------------------------------------------
+
+local DataList, parent = torch.class('nn.DataList','nn.DataSet')
+
+function DataList:__init()
+ parent.__init(self)
+ self.datasets = {}
+ self.nbClass = 0
+ self.ClassName = {}
+ self.nbSamples = 0
+end
+
+function DataList:__tostring__()
+ str = 'DataList'
+ str = str .. ' + nb samples : '..self.nbSamples
+ str = str .. ' + nb classes : '..self.nbClass
+ return str
+end
+
+function DataList:__index__(key)
+ if type(key)=='number' and self.nbClass>0 and key <= self.nbSamples then
+ local class = ((key-1) % self.nbClass) + 1
+ local classSize = self.datasets[class]:size()
+ local elmt = math.floor((key-1)/self.nbClass) + 1
+ elmt = ((elmt-1) % classSize) + 1
+
+ -- create target vector on the fly
+ self.datasets[class][elmt][2] = torch.Tensor(1,1,self.nbClass):fill(-1)
+ self.datasets[class][elmt][2][1][1][class] = 1
+
+ -- apply hook on sample
+ local sample = self.datasets[class][elmt]
+ if self.hookOnSample then
+ sample = self.hookOnSample(self,sample)
+ end
+
+ return sample,true
+ end
+ -- if key is not a number this should return nil
+ return rawget(self, key)
+end
+
+function DataList:appendDataSet(dataSet,className)
+ table.insert(self.datasets,dataSet)
+ if self.nbSamples == 0 then
+ self.nbSamples = dataSet:size()
+ else
+ self.nbSamples = math.floor(math.max(self.nbSamples/self.nbClass,dataSet:size()))
+ end
+ self.nbClass = self.nbClass + 1
+ self.nbSamples = self.nbSamples * self.nbClass
+ table.insert(self.ClassName,self.nbClass,className)
+end
diff --git a/DataSet.lua b/DataSet.lua
new file mode 100644
index 0000000..693c524
--- /dev/null
+++ b/DataSet.lua
@@ -0,0 +1,351 @@
+--------------------------------------------------------------------------------
+-- DataSet: a class to handle standard datasets.
+--
+-- Authors: Corda, Farabet
+--------------------------------------------------------------------------------
+
+local lDataSet = torch.class('nn.DataSet')
+
+function lDataSet:__init(...)
+ xlua.require('image',true)
+ self.nbSamples = 0
+ if select('#',...) > 0 then
+ self:load(...)
+ end
+end
+
+function lDataSet:size()
+ return self.nbSamples
+end
+
+function lDataSet:load(...)
+ -- parse args
+ local args, dataSetFolder, nbSamplesRequired, cacheFile, channels, sampleSize
+ = xlua.unpack(
+ {...},
+ 'DataSet.load', nil,
+ {arg='dataSetFolder', type='string', help='path to dataset', req=true},
+ {arg='nbSamplesRequired', type='number', help='number of patches to load', default='all'},
+ {arg='cacheFile', type='string', help='path to file to cache files'},
+ {arg='channels', type='number', help='nb of channels', default=1},
+ {arg='sampleSize', type='table', help='resize all sample: {w,h}'}
+ )
+ self.cacheFileName = cacheFile or self.cacheFileName
+
+ -- Clear current dataset
+ self:emptySet()
+
+ -- Then try to find if cache file exists
+ -- the base name of this file can be provided by useCacheFile()
+ -- and the suffixe is the nb of samples needed, 'all' if not specified
+ local fileName
+ local datasetLoadedFromFile = false
+ if (self.cacheFileName ~= nil) then
+ fileName = self.cacheFileName .. '-' .. nbSamplesRequired
+ if sys.filep(fileName) then
+ -- File found
+ print('<DataSet> Loading samples from cached file ' .. fileName)
+ f = torch.DiskFile(fileName, 'rw')
+ self:read(f)
+ f.close(f)
+ datasetLoadedFromFile = true
+ end
+ end
+
+ -- If dataset couldn't be loaded from cache, load it
+ if (datasetLoadedFromFile == false) then
+ self:append{dataSetFolder=dataSetFolder, channels=channels,
+ nbSamplesRequired=nbSamplesRequired,
+ sampleSize=sampleSize}
+ -- if cache name given, create it now
+ if (fileName ~= nil) then
+ print('<DataSet> Dumping dataset to cache file ' .. fileName .. ' for fast retrieval')
+ f = torch.DiskFile(fileName, 'rw')
+ self:write(f)
+ f.close(f)
+ end
+ end
+end
+
+function lDataSet:emptySet(dataSetFolder)
+ for i = 1,table.getn(self) do
+ self[i] = nil
+ end
+ self.nbSamples = 0
+end
+
+function lDataSet:apply(toapply)
+ print('<DataSet> Applying function to dataset')
+ for i=1,self.nbSamples do
+ xlua.progress(i, self.nbSamples)
+ self[i][1] = toapply(self[i][1])
+ end
+end
+
+function lDataSet:cropAndResize(side)
+ for i=1,self.nbSamples do
+ local newSample = torch.Tensor(1, side, side)
+ local initSide = math.min(self[i][1]:size()[1], self[i][1]:size()[2])
+ local x1 = math.floor((self[i][1]:size(3) - initSide) / 2)
+ local y1 = math.floor((self[i][1]:size(2) - initSide) / 2)
+ local x2 = x1 + initSide
+ local y2 = y1 + initSide
+ image.crop(newSample,self[i][1],x1,y1,x2,y2)
+ self[i][1] = newSample
+ end
+end
+
+function lDataSet:add(args)
+ local input = args.input
+ local output = args.output
+ self.nbSamples = self.nbSamples + 1
+ self[self.nbSamples] = {input, output}
+end
+
+function lDataSet:append(...)
+ -- parse args
+ local args, dataSetFolder, channels, nbSamplesRequired, useLabelPiped,
+ useDirAsLabel, nbLabels, sampleSize
+ = xlua.unpack(
+ {...},
+ 'DataSet:append', 'append a folder to the dataset object',
+ {arg='dataSetFolder', type='string', help='path to dataset', req=true},
+ {arg='channels', type='number', help='number of channels for the image to load', default=3},
+ {arg='nbSamplesRequired', type='number', help='max number of samples to load'},
+ {arg='useLabelPiped', type='boolean', help='flag to use the filename as output value',default=false},
+ {arg='useDirAsLabel', type='boolean', help='flag to use the directory as label',default=false},
+ {arg='nbLabels', type='number', help='how many classes (goes with useDirAsLabel)', default=1},
+ {arg='sampleSize', type='table', help='resize all sample: {w,h}'}
+ )
+ -- parse args
+ local files = sys.dir(dataSetFolder)
+
+ print('<DataSet> Loading samples from ' .. args.dataSetFolder .. '/')
+
+ -- nb of samples to load:
+ local toLoad = table.getn(files)
+ if (nbSamplesRequired ~= nil and nbSamplesRequired ~= 'all') then
+ toLoad = math.min(toLoad, nbSamplesRequired)
+ end
+ local loaded = 0
+
+ for k,file in pairs(files) do
+ local input, inputs, rawOutput
+
+ -- disp progress
+ xlua.progress(k, toLoad)
+
+ if (string.find(file,'.png')) then
+ -- load the PNG into a new Tensor
+ pathToPng = sys.concat(dataSetFolder, file)
+ input = image.loadPNG(pathToPng,channels)
+
+ -- parse the file name and set the ouput from it
+ rawOutput = sys.split(string.gsub(file, ".png", ""),'|')
+
+ elseif (string.find(file,'.p[pgn]m')) then
+ -- load the PPM into a new Tensor
+ pathToPpm = sys.concat(dataSetFolder, file)
+ input = image.loadPPM(pathToPpm,channels)
+
+ -- parse the file name and set the ouput from it
+ rawOutput = sys.split(string.gsub(file, ".p[pgn]m", ""),'|')
+
+ elseif (string.find(file,'.jpg')) then
+ -- load the JPG into a new Tensor
+ pathToPpm = sys.concat(dataSetFolder, file)
+ input = image.load(pathToPpm,channels)
+
+ -- parse the file name and set the ouput from it
+ rawOutput = sys.split(string.gsub(file, ".jpg", ""),'|')
+ end
+
+ -- if image loaded then add into the set
+ if (input and rawOutput) then
+ table.remove(rawOutput,1) --remove file ID
+
+ -- put input in 3D tensor
+ input:resize(channels, input:size(2), input:size(3))
+
+ -- rescale ?
+ if sampleSize then
+ inputs = torch.Tensor(channels, sampleSize[2], sampleSize[3])
+ image.scale(input, inputs, 'bilinear')
+ else
+ inputs = input
+ end
+
+ -- and generate output
+ local output = torch.Tensor(table.getn(rawOutput), 1)
+ for i,v in ipairs(rawOutput) do
+ output[i][1]=v
+ end
+
+ -- add input/output in the set
+ self.nbSamples = self.nbSamples + 1
+ self[self.nbSamples] = {inputs, output}
+
+ loaded = loaded + 1
+ if (loaded == toLoad) then
+ break
+ end
+ end
+
+ -- some cleanup, for memory
+ collectgarbage()
+ end
+end
+
+function lDataSet:appendDataSet(dataset)
+ print("<DataSet> Merging dataset of size = "..dataset:size()..
+ " into dataset of size = "..self:size())
+ for i = 1,dataset:size() do
+ self.nbSamples = self.nbSamples + 1
+ self[self.nbSamples] = {}
+ self[self.nbSamples][1] = torch.Tensor(dataset[i][1]):copy(dataset[i][1])
+ if (dataset[i][2] ~= nil) then
+ self[self.nbSamples][2] = torch.Tensor(dataset[i][2]):copy(dataset[i][2])
+ end
+ end
+end
+
+function lDataSet:popSubset(args)
+ -- parse args
+ local nElement = args.nElement
+ local ratio = args.ratio or 0.1
+ local subset = args.outputSet or DataSet()
+
+ -- get nb of samples to pop
+ local start_index
+ if (nElement ~= nil) then
+ start_index = self:size() - nElement + 1
+ else
+ start_index = math.floor((1-ratio)*self:size()) + 1
+ end
+
+ -- info
+ print('<DataSet> Popping ' .. self:size() - start_index + 1 .. ' samples dataset')
+
+ -- extract samples
+ for i = self:size(), start_index, -1 do
+ subset.nbSamples = subset.nbSamples + 1
+ subset[subset.nbSamples] = {}
+ subset[subset.nbSamples][1] = torch.Tensor(self[i][1]):copy(self[i][1])
+ subset[subset.nbSamples][2] = torch.Tensor(self[i][2]):copy(self[i][2])
+ self[i] = nil
+ self.nbSamples = self.nbSamples - 1
+ end
+
+ -- return network
+ return subset
+end
+
+function lDataSet:resize(w,h)
+ self.resized = true
+ xlua.error('not implemented yet', 'DataSet')
+end
+
+function lDataSet:shuffle()
+ if (self.nbSamples == 0) then
+ print('Warning, trying to shuffle empty Dataset, no effect...')
+ return
+ end
+ local n = self.nbSamples
+
+ while n > 2 do
+ local k = math.random(n)
+ -- swap elements
+ self[n], self[k] = self[k], self[n]
+ n = n - 1
+ end
+end
+
+function lDataSet:display(args) -- opt args : scale, nbSamples
+ -- arg list:
+ local min, max, nbSamples, scale, w
+ local title = 'DataSet'
+ local resX = 800
+ local resY = 600
+ -- parse args:
+ args = args or {}
+ min = args.min
+ max = args.max
+ nbSamples = args.nbSamples or self.nbSamples
+ nbSamples = math.min(nbSamples,self.nbSamples)
+ scale = args.scale
+ title = args.title or title
+ w = window or gfx.Window(resX, resY, title)
+ resX = args.resX or resX
+ resY = args.resY or resY
+ print('<DataSet> displaying ' .. nbSamples .. ' samples')
+
+ local step_x = 0
+ local step_y = 0
+ self.window = w
+
+ if (scale == nil) then
+ --get the best scale to feet all data
+ local sizeX = self[1][1]:size()[1]
+ local sizeY = self[1][1]:size()[2]
+ scale = math.sqrt(resX*resY/ (sizeX*sizeY*nbSamples))
+ end
+
+ for i=1,nbSamples do
+ if (step_x >= resX) then
+ step_x = 0
+ step_y = step_y + self[i][1]:size()[2]*scale
+ if (step_y >= resY) then
+ break
+ end
+ end
+ local tmp = image.scaleForDisplay{tensor=self[i][1], min=min, max=max}
+ w:blit(tmp, scale, step_x, step_y, title)
+ step_x = step_x + self[i][1]:size()[1]*scale
+ end
+end
+
+function lDataSet:__show()
+ self:display{nbSamples=100}
+end
+
+function lDataSet:useCacheFile(fileName)
+ self.cacheFileName = fileName
+end
+
+
+function lDataSet:save(fileName)
+ local fileName = fileName or self.fileName
+ self.fileName = fileName
+ print('<DataSet> Saving DataSet to:',fileName)
+ local file = torch.DiskFile(fileName, 'w')
+ self:write(file)
+ file:close()
+end
+
+function lDataSet:open(fileName)
+ local fileName = fileName or self.fileName
+ self.fileName = fileName
+ print('<DataSet> Loading DataSet from File:',fileName)
+ local file = torch.DiskFile(fileName, 'r')
+ self:read(file)
+ file:close()
+ print('<DataSet> '..self.nbSamples..' samples loaded')
+end
+
+function lDataSet:write(file)
+ file:writeBool(self.resized)
+ file:writeInt(self.nbSamples)
+ -- write all the samples
+ for i = 1,self.nbSamples do
+ file:writeObject(self[i])
+ end
+end
+
+function lDataSet:read(file)
+ self.resized = file:readBool()
+ self.nbSamples = file:readInt()
+ -- read all the samples
+ for i = 1,self.nbSamples do
+ self[i] = file:readObject()
+ end
+end
diff --git a/StochasticTrainer.lua b/StochasticTrainer.lua
index 526ec49..a1a78bc 100644
--- a/StochasticTrainer.lua
+++ b/StochasticTrainer.lua
@@ -23,7 +23,7 @@ function StochasticTrainer:__init(...)
{arg='learningRateDecay', type='number', help='learning rate decay (rate = rate * (1-decay), at each epoch)', default=0},
{arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0},
{arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW + momentum*prev(dE/dW))', default=0},
- {arg='maxIteration', type='number', help='maximum number of epochs', default=50},
+ {arg='maxEpoch', type='number', help='maximum number of epochs', default=50},
{arg='maxTarget', type='boolean', help='replaces an CxHxW target map by a HxN target of max values (for NLL criterions)', default=false},
{arg='dispProgress', type='boolean', help='display a progress bar during training/testing', default=true},
@@ -65,7 +65,7 @@ function StochasticTrainer:train(dataset)
for t = 1,dataset:size() do
-- disp progress
if self.dispProgress then
- xlua.dispProgress(t, dataset:size())
+ xlua.progress(t, dataset:size())
end
-- load new sample
@@ -117,7 +117,7 @@ function StochasticTrainer:train(dataset)
end
-- weight decay ?
- if self.weightDecay ~= 0 then
+ if self.weightDecay ~= 0 and module.decayParameters then
module:decayParameters(self.weightDecay)
end
@@ -175,7 +175,7 @@ function StochasticTrainer:test(dataset)
for t = 1,dataset:size() do
-- disp progress
if self.dispProgress then
- xlua.dispProgress(t, dataset:size())
+ xlua.progress(t, dataset:size())
end
-- get new sample
diff --git a/init.lua b/init.lua
index e7db0a6..e8fa5f6 100644
--- a/init.lua
+++ b/init.lua
@@ -74,3 +74,7 @@ torch.include('nnx', 'SuperCriterion.lua')
-- trainers:
torch.include('nnx', 'Trainer.lua')
torch.include('nnx', 'StochasticTrainer.lua')
+
+-- datasets:
+torch.include('nnx', 'DataSet.lua')
+torch.include('nnx', 'DataList.lua') \ No newline at end of file
diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec
index 94f9d9c..785fad1 100644
--- a/nnx-1.0-1.rockspec
+++ b/nnx-1.0-1.rockspec
@@ -22,6 +22,7 @@ description = {
dependencies = {
"lua >= 5.1",
"torch",
+ "sys",
"xlua",
"lunit"
}
@@ -71,6 +72,8 @@ build = {
install_files(/lua/nnx SuperCriterion.lua)
install_files(/lua/nnx Trainer.lua)
install_files(/lua/nnx StochasticTrainer.lua)
+ install_files(/lua/nnx DataSet.lua)
+ install_files(/lua/nnx DataList.lua)
add_subdirectory (test)
install_targets(/lib nnx)
]],