added some (ugly) code for optimizing on CPU when evaluating function on GPU

author: Marco Scoffier <github@metm.org> 2011-09-21 18:35:35 +0400
committer: Marco Scoffier <github@metm.org> 2011-09-21 18:35:35 +0400
commit: f3d95bdb2800806bc6283511ced2896e20e7b07a (patch)
tree: 8c04bce62c98a907293f28e9c6ae8125fc2de2ae
parent: b2388376c573e7cff9d3f24b6ae4c017c4faa18f (diff)
1 files changed, 32 insertions, 4 deletions
diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index 9ff88f8..53dcd77 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -19,8 +19,24 @@ function Batch:__init(...)
                      {arg='verbose', type='number',
                       help='verbose level during training [0-2]', default=0}
                   )
-   self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
-   self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
+   -- *auto conversion from CUDA* This is a bit ugly 
+   -- For now we assume that if we use the GPU this is for function
+   -- evaluations and that the batch optimisation will be on the CPU
+   -- thus we need to copy the flattened parameters to the CPU. 
+   -- It is a question whether it makes sense to flatten (allocate a
+   -- single contiguous memory space) all the parameters on the
+   -- GPU but not doing this tweaking flattenParameters would need more work
+   if torch.getdefaulttensortype() == 'torch.CudaTensor' then 
+      self.cuda_parameters = 
+	 nnx.flattenParameters(nnx.getParameters(self.module))
+      self.parameters = torch.DoubleTensor():resize(self.cuda_parameters:size()):copy(self.cuda_parameters)
+      self.cuda_gradParameters = 
+	 nnx.flattenParameters(nnx.getGradParameters(self.module))
+      self.gradParameters = torch.DoubleTensor():resize(self.cuda_gradParameters:size()):copy(self.cuda_gradParameters)
+   else
+      self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
+      self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
+   end
    self.evalCounter = 0
    self.sampleCounter = 0
    if self.parallelize > 1 then
@@ -52,7 +68,11 @@ function Batch:forward_sequential(inputs, targets, options)
            end
            local _t_ = sys.clock()
            -- reset gradients
-           self.gradParameters:zero()
+	   if torch.getdefaulttensortype() == 'torch.CudaTensor' then 
+	      self.cuda_gradParameters:zero()
+	   else
+	      self.gradParameters:zero()
+	   end
            -- f is the average of all criterions
            self.output = 0
            -- given all inputs, evaluate gradients
@@ -76,8 +96,16 @@ function Batch:forward_sequential(inputs, targets, options)
            end
            -- update evaluation counter
            self.evalCounter = self.evalCounter + 1
+
            -- normalize gradients
-           self.gradParameters:div(#inputs)
+           if torch.getdefaulttensortype() == 'torch.CudaTensor' then 
+	      self.cuda_gradParameters:div(#inputs)
+	      -- copy back to CPU version
+	      self.gradParameters:resize(self.cuda_gradParameters:size()):copy(self.cuda_gradParameters)
+	   else
+	      self.gradParameters:div(#inputs)
+	   end
+
            -- verbose
            if self.verbose >= 2 then
               print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec')
author	Marco Scoffier <github@metm.org>	2011-09-21 18:35:35 +0400
committer	Marco Scoffier <github@metm.org>	2011-09-21 18:35:35 +0400
commit	f3d95bdb2800806bc6283511ced2896e20e7b07a (patch)
tree	8c04bce62c98a907293f28e9c6ae8125fc2de2ae
parent	b2388376c573e7cff9d3f24b6ae4c017c4faa18f (diff)