6 files changed, 421 insertions, 5 deletions
diff --git a/ConfusionMatrix.lua b/ConfusionMatrix.lua
index 78809ca..bf581bf 100644
--- a/ConfusionMatrix.lua
+++ b/ConfusionMatrix.lua
@@ -37,7 +37,7 @@ function ConfusionMatrix:updateValids()
    self.averageValid = 0
    local nvalids = 0
    for t = 1,self.mat:size(1) do
-      if not xlua.isNaN(self.valids[t]) then
+      if not sys.isNaN(self.valids[t]) then
          self.averageValid = self.averageValid + self.valids[t]
          nvalids = nvalids + 1
       end
diff --git a/DataList.lua b/DataList.lua
new file mode 100644
index 0000000..5b95c48
--- /dev/null
+++ b/DataList.lua
@@ -0,0 +1,58 @@
+--------------------------------------------------------------------------------
+-- DataList: a container for plain DataSets.
+-- Each sub dataset represents one class.
+--
+-- Authors: Corda, Farabet
+--------------------------------------------------------------------------------
+
+local DataList, parent = torch.class('nn.DataList','nn.DataSet')
+
+function DataList:__init()
+   parent.__init(self)
+   self.datasets = {}
+   self.nbClass = 0
+   self.ClassName = {}
+   self.nbSamples = 0
+end
+
+function DataList:__tostring__()
+   str = 'DataList'
+   str = str .. ' + nb samples : '..self.nbSamples
+   str = str .. ' + nb classes : '..self.nbClass
+   return str
+end
+
+function DataList:__index__(key)
+   if type(key)=='number' and self.nbClass>0 and key <= self.nbSamples then
+      local class = ((key-1) % self.nbClass) + 1
+      local classSize = self.datasets[class]:size()
+      local elmt = math.floor((key-1)/self.nbClass) + 1
+      elmt = ((elmt-1) % classSize) + 1
+
+      -- create target vector on the fly
+      self.datasets[class][elmt][2] = torch.Tensor(1,1,self.nbClass):fill(-1)
+      self.datasets[class][elmt][2][1][1][class] = 1
+
+      -- apply hook on sample
+      local sample = self.datasets[class][elmt]
+      if self.hookOnSample then
+         sample = self.hookOnSample(self,sample)
+      end
+
+      return sample,true
+   end
+   -- if key is not a number this should return nil
+   return rawget(self, key)
+end
+
+function DataList:appendDataSet(dataSet,className)
+   table.insert(self.datasets,dataSet)
+   if self.nbSamples == 0 then
+      self.nbSamples = dataSet:size()
+   else
+      self.nbSamples = math.floor(math.max(self.nbSamples/self.nbClass,dataSet:size()))
+   end
+   self.nbClass = self.nbClass + 1
+   self.nbSamples = self.nbSamples * self.nbClass
+   table.insert(self.ClassName,self.nbClass,className)
+end
diff --git a/DataSet.lua b/DataSet.lua
new file mode 100644
index 0000000..693c524
--- /dev/null
+++ b/DataSet.lua
@@ -0,0 +1,351 @@
+--------------------------------------------------------------------------------
+-- DataSet: a class to handle standard datasets.
+--
+-- Authors: Corda, Farabet
+--------------------------------------------------------------------------------
+
+local lDataSet = torch.class('nn.DataSet')
+
+function lDataSet:__init(...)
+   xlua.require('image',true)
+   self.nbSamples = 0
+   if select('#',...) > 0 then
+      self:load(...)
+   end
+end
+
+function lDataSet:size()
+   return self.nbSamples
+end
+
+function lDataSet:load(...)
+   -- parse args
+   local args, dataSetFolder, nbSamplesRequired, cacheFile, channels, sampleSize
+      = xlua.unpack(
+      {...},
+      'DataSet.load', nil,
+      {arg='dataSetFolder', type='string', help='path to dataset', req=true},
+      {arg='nbSamplesRequired', type='number', help='number of patches to load', default='all'},
+      {arg='cacheFile', type='string', help='path to file to cache files'},
+      {arg='channels', type='number', help='nb of channels', default=1},
+      {arg='sampleSize', type='table', help='resize all sample: {w,h}'}
+   )
+   self.cacheFileName = cacheFile or self.cacheFileName
+
+   -- Clear current dataset
+   self:emptySet()
+
+   -- Then try to find if cache file exists
+   -- the base name of this file can be provided by useCacheFile()
+   -- and the suffixe is the nb of samples needed, 'all' if not specified
+   local fileName
+   local datasetLoadedFromFile = false
+   if (self.cacheFileName ~= nil) then
+      fileName = self.cacheFileName .. '-' .. nbSamplesRequired
+      if sys.filep(fileName) then
+         -- File found
+         print('<DataSet> Loading samples from cached file ' .. fileName)
+         f = torch.DiskFile(fileName, 'rw')
+         self:read(f)
+         f.close(f)
+         datasetLoadedFromFile = true
+      end
+   end
+
+   -- If dataset couldn't be loaded from cache, load it
+   if (datasetLoadedFromFile == false) then
+      self:append{dataSetFolder=dataSetFolder, channels=channels,
+                  nbSamplesRequired=nbSamplesRequired,
+                  sampleSize=sampleSize}
+      -- if cache name given, create it now
+      if (fileName ~= nil) then
+         print('<DataSet> Dumping dataset to cache file ' .. fileName .. ' for fast retrieval')
+         f = torch.DiskFile(fileName, 'rw')
+         self:write(f)
+         f.close(f)
+      end
+   end
+end
+
+function lDataSet:emptySet(dataSetFolder)
+   for i = 1,table.getn(self) do
+      self[i] = nil
+   end
+   self.nbSamples = 0
+end
+
+function lDataSet:apply(toapply)
+   print('<DataSet> Applying function to dataset')
+   for i=1,self.nbSamples do
+      xlua.progress(i, self.nbSamples)
+      self[i][1] = toapply(self[i][1])
+   end
+end
+
+function lDataSet:cropAndResize(side)
+   for i=1,self.nbSamples do
+      local newSample = torch.Tensor(1, side, side)
+      local initSide = math.min(self[i][1]:size()[1], self[i][1]:size()[2])
+      local x1 = math.floor((self[i][1]:size(3) - initSide) / 2)
+      local y1 = math.floor((self[i][1]:size(2) - initSide) / 2)
+      local x2 = x1 + initSide
+      local y2 = y1 + initSide
+      image.crop(newSample,self[i][1],x1,y1,x2,y2)
+      self[i][1] = newSample
+   end
+end
+
+function lDataSet:add(args)
+   local input = args.input
+   local output = args.output
+   self.nbSamples = self.nbSamples + 1
+   self[self.nbSamples] = {input, output}
+end
+
+function lDataSet:append(...)
+   -- parse args
+   local args, dataSetFolder, channels, nbSamplesRequired, useLabelPiped,
+   useDirAsLabel, nbLabels, sampleSize
+      = xlua.unpack(
+      {...},
+      'DataSet:append', 'append a folder to the dataset object',
+      {arg='dataSetFolder', type='string', help='path to dataset', req=true},
+      {arg='channels', type='number', help='number of channels for the image to load', default=3},
+      {arg='nbSamplesRequired', type='number', help='max number of samples to load'},
+      {arg='useLabelPiped', type='boolean', help='flag to use the filename as output value',default=false},
+      {arg='useDirAsLabel', type='boolean', help='flag to use the directory as label',default=false},
+      {arg='nbLabels', type='number', help='how many classes (goes with useDirAsLabel)', default=1},
+      {arg='sampleSize', type='table', help='resize all sample: {w,h}'}
+   )
+   -- parse args
+   local files = sys.dir(dataSetFolder)
+
+   print('<DataSet> Loading samples from ' .. args.dataSetFolder .. '/')
+
+   -- nb of samples to load:
+   local toLoad = table.getn(files)
+   if (nbSamplesRequired ~= nil and nbSamplesRequired ~= 'all') then
+      toLoad = math.min(toLoad, nbSamplesRequired)
+   end
+   local loaded = 0
+
+   for k,file in pairs(files) do
+      local input, inputs, rawOutput
+
+      -- disp progress
+      xlua.progress(k, toLoad)
+
+      if (string.find(file,'.png')) then
+         -- load the PNG into a new Tensor
+         pathToPng = sys.concat(dataSetFolder, file)
+         input = image.loadPNG(pathToPng,channels)
+
+         -- parse the file name and set the ouput from it
+         rawOutput = sys.split(string.gsub(file, ".png", ""),'|')
+
+      elseif (string.find(file,'.p[pgn]m')) then
+         -- load the PPM into a new Tensor
+         pathToPpm = sys.concat(dataSetFolder, file)
+         input = image.loadPPM(pathToPpm,channels)
+
+         -- parse the file name and set the ouput from it
+         rawOutput = sys.split(string.gsub(file, ".p[pgn]m", ""),'|')
+
+      elseif (string.find(file,'.jpg')) then
+         -- load the JPG into a new Tensor
+         pathToPpm = sys.concat(dataSetFolder, file)
+         input = image.load(pathToPpm,channels)
+
+         -- parse the file name and set the ouput from it
+         rawOutput = sys.split(string.gsub(file, ".jpg", ""),'|')
+      end
+
+      -- if image loaded then add into the set
+      if (input and rawOutput) then
+         table.remove(rawOutput,1) --remove file ID
+
+         -- put input in 3D tensor
+         input:resize(channels, input:size(2), input:size(3))
+
+         -- rescale ?
+         if sampleSize then
+            inputs = torch.Tensor(channels, sampleSize[2], sampleSize[3])
+            image.scale(input, inputs, 'bilinear')
+         else
+            inputs = input
+         end
+
+         -- and generate output
+         local output = torch.Tensor(table.getn(rawOutput), 1)
+         for i,v in ipairs(rawOutput) do
+            output[i][1]=v
+         end
+
+         -- add input/output in the set
+         self.nbSamples = self.nbSamples + 1
+         self[self.nbSamples] = {inputs, output}
+
+         loaded = loaded + 1
+         if (loaded == toLoad) then
+            break
+         end
+      end
+
+      -- some cleanup, for memory
+      collectgarbage()
+   end
+end
+
+function lDataSet:appendDataSet(dataset)
+   print("<DataSet> Merging dataset of size = "..dataset:size()..
+      " into dataset of size = "..self:size())
+   for i = 1,dataset:size() do
+      self.nbSamples = self.nbSamples + 1
+      self[self.nbSamples] = {}
+      self[self.nbSamples][1] = torch.Tensor(dataset[i][1]):copy(dataset[i][1])
+      if (dataset[i][2] ~= nil) then
+         self[self.nbSamples][2] = torch.Tensor(dataset[i][2]):copy(dataset[i][2])
+      end
+   end
+end
+
+function lDataSet:popSubset(args)
+   -- parse args
+   local nElement = args.nElement
+   local ratio = args.ratio or 0.1
+   local subset = args.outputSet or DataSet()
+
+   -- get nb of samples to pop
+   local start_index
+   if (nElement ~= nil) then
+      start_index = self:size() - nElement + 1
+   else
+      start_index = math.floor((1-ratio)*self:size()) + 1
+   end
+
+   -- info
+   print('<DataSet> Popping ' .. self:size() - start_index + 1 .. ' samples dataset')
+
+   -- extract samples
+   for i = self:size(), start_index, -1 do
+      subset.nbSamples = subset.nbSamples + 1
+      subset[subset.nbSamples] = {}
+      subset[subset.nbSamples][1] = torch.Tensor(self[i][1]):copy(self[i][1])
+      subset[subset.nbSamples][2] = torch.Tensor(self[i][2]):copy(self[i][2])
+      self[i] = nil
+      self.nbSamples = self.nbSamples - 1
+   end
+
+   -- return network
+   return subset
+end
+
+function lDataSet:resize(w,h)
+   self.resized = true
+   xlua.error('not implemented yet', 'DataSet')
+end
+
+function lDataSet:shuffle()
+   if (self.nbSamples == 0) then
+      print('Warning, trying to shuffle empty Dataset, no effect...')
+      return
+   end
+   local n = self.nbSamples
+
+   while n > 2 do
+      local k = math.random(n)
+      -- swap elements
+      self[n], self[k] = self[k], self[n]
+      n = n - 1
+   end
+end
+
+function lDataSet:display(args) -- opt args : scale, nbSamples
+   -- arg list:
+   local min, max, nbSamples, scale, w
+   local title = 'DataSet'
+   local resX = 800
+   local resY = 600
+   -- parse args:
+   args = args or {}
+   min = args.min
+   max = args.max
+   nbSamples = args.nbSamples or self.nbSamples
+   nbSamples = math.min(nbSamples,self.nbSamples)
+   scale = args.scale
+   title = args.title or title
+   w = window or gfx.Window(resX, resY, title)
+   resX = args.resX or resX
+   resY = args.resY or resY
+   print('<DataSet> displaying ' .. nbSamples .. ' samples')
+
+   local step_x = 0
+   local step_y = 0
+   self.window = w
+
+   if (scale == nil) then
+      --get the best scale to feet all data
+      local sizeX = self[1][1]:size()[1]
+      local sizeY = self[1][1]:size()[2]
+      scale = math.sqrt(resX*resY/ (sizeX*sizeY*nbSamples))
+   end
+
+   for i=1,nbSamples do
+      if (step_x >= resX) then
+         step_x = 0
+         step_y = step_y + self[i][1]:size()[2]*scale
+         if (step_y >= resY) then
+            break
+         end
+      end
+      local tmp  = image.scaleForDisplay{tensor=self[i][1], min=min, max=max}
+      w:blit(tmp, scale, step_x, step_y, title)
+      step_x = step_x + self[i][1]:size()[1]*scale
+   end
+end
+
+function lDataSet:__show()
+   self:display{nbSamples=100}
+end
+
+function lDataSet:useCacheFile(fileName)
+   self.cacheFileName = fileName
+end
+
+
+function lDataSet:save(fileName)
+   local fileName = fileName or self.fileName
+   self.fileName = fileName
+   print('<DataSet> Saving DataSet to:',fileName)
+   local file = torch.DiskFile(fileName, 'w')
+   self:write(file)
+   file:close()
+end
+
+function lDataSet:open(fileName)
+   local fileName = fileName or self.fileName
+   self.fileName = fileName
+   print('<DataSet> Loading DataSet from File:',fileName)
+   local file = torch.DiskFile(fileName, 'r')
+   self:read(file)
+   file:close()
+   print('<DataSet> '..self.nbSamples..' samples loaded')
+end
+
+function lDataSet:write(file)
+   file:writeBool(self.resized)
+   file:writeInt(self.nbSamples)
+   -- write all the samples
+   for i = 1,self.nbSamples do
+      file:writeObject(self[i])
+   end
+end
+
+function lDataSet:read(file)
+   self.resized = file:readBool()
+   self.nbSamples = file:readInt()
+   -- read all the samples
+   for i = 1,self.nbSamples do
+      self[i] = file:readObject()
+   end
+end
diff --git a/StochasticTrainer.lua b/StochasticTrainer.lua
index 526ec49..a1a78bc 100644
--- a/StochasticTrainer.lua
+++ b/StochasticTrainer.lua
@@ -23,7 +23,7 @@ function StochasticTrainer:__init(...)
       {arg='learningRateDecay', type='number', help='learning rate decay (rate = rate * (1-decay), at each epoch)', default=0},
       {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0},
       {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW + momentum*prev(dE/dW))', default=0},
-      {arg='maxIteration', type='number', help='maximum number of epochs', default=50},
+      {arg='maxEpoch', type='number', help='maximum number of epochs', default=50},
 
       {arg='maxTarget', type='boolean', help='replaces an CxHxW target map by a HxN target of max values (for NLL criterions)', default=false},
       {arg='dispProgress', type='boolean', help='display a progress bar during training/testing', default=true},
@@ -65,7 +65,7 @@ function StochasticTrainer:train(dataset)
       for t = 1,dataset:size() do
          -- disp progress
          if self.dispProgress then
-            xlua.dispProgress(t, dataset:size())
+            xlua.progress(t, dataset:size())
          end
 
          -- load new sample
@@ -117,7 +117,7 @@ function StochasticTrainer:train(dataset)
             end
 
             -- weight decay ?
-            if self.weightDecay ~= 0 then
+            if self.weightDecay ~= 0 and module.decayParameters then
                module:decayParameters(self.weightDecay)
             end
 
@@ -175,7 +175,7 @@ function StochasticTrainer:test(dataset)
    for t = 1,dataset:size() do
       -- disp progress
       if self.dispProgress then
-         xlua.dispProgress(t, dataset:size())
+         xlua.progress(t, dataset:size())
       end
 
       -- get new sample
diff --git a/init.lua b/init.lua
index e7db0a6..e8fa5f6 100644
--- a/init.lua
+++ b/init.lua
@@ -74,3 +74,7 @@ torch.include('nnx', 'SuperCriterion.lua')
 -- trainers:
 torch.include('nnx', 'Trainer.lua')
 torch.include('nnx', 'StochasticTrainer.lua')
+
+-- datasets:
+torch.include('nnx', 'DataSet.lua')
+torch.include('nnx', 'DataList.lua')
+\ No newline at end of file
diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec
index 94f9d9c..785fad1 100644
--- a/nnx-1.0-1.rockspec
+++ b/nnx-1.0-1.rockspec
@@ -22,6 +22,7 @@ description = {
 dependencies = {
    "lua >= 5.1",
    "torch",
+   "sys",
    "xlua",
    "lunit"
 }
@@ -71,6 +72,8 @@ build = {
          install_files(/lua/nnx SuperCriterion.lua)
          install_files(/lua/nnx Trainer.lua)
          install_files(/lua/nnx StochasticTrainer.lua)
+         install_files(/lua/nnx DataSet.lua)
+         install_files(/lua/nnx DataList.lua)
          add_subdirectory (test)
          install_targets(/lib nnx)
    ]],