diff options
author | Marco Scoffier <github@metm.org> | 2011-09-21 18:35:35 +0400 |
---|---|---|
committer | Marco Scoffier <github@metm.org> | 2011-09-21 18:35:35 +0400 |
commit | f3d95bdb2800806bc6283511ced2896e20e7b07a (patch) | |
tree | 8c04bce62c98a907293f28e9c6ae8125fc2de2ae | |
parent | b2388376c573e7cff9d3f24b6ae4c017c4faa18f (diff) |
added some (ugly) code for optimizing on CPU when evaluating function on GPU
-rw-r--r-- | BatchOptimization.lua | 36 |
1 files changed, 32 insertions, 4 deletions
diff --git a/BatchOptimization.lua b/BatchOptimization.lua index 9ff88f8..53dcd77 100644 --- a/BatchOptimization.lua +++ b/BatchOptimization.lua @@ -19,8 +19,24 @@ function Batch:__init(...) {arg='verbose', type='number', help='verbose level during training [0-2]', default=0} ) - self.parameters = nnx.flattenParameters(nnx.getParameters(self.module)) - self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module)) + -- *auto conversion from CUDA* This is a bit ugly + -- For now we assume that if we use the GPU this is for function + -- evaluations and that the batch optimisation will be on the CPU + -- thus we need to copy the flattened parameters to the CPU. + -- It is a question whether it makes sense to flatten (allocate a + -- single contiguous memory space) all the parameters on the + -- GPU but not doing this tweaking flattenParameters would need more work + if torch.getdefaulttensortype() == 'torch.CudaTensor' then + self.cuda_parameters = + nnx.flattenParameters(nnx.getParameters(self.module)) + self.parameters = torch.DoubleTensor():resize(self.cuda_parameters:size()):copy(self.cuda_parameters) + self.cuda_gradParameters = + nnx.flattenParameters(nnx.getGradParameters(self.module)) + self.gradParameters = torch.DoubleTensor():resize(self.cuda_gradParameters:size()):copy(self.cuda_gradParameters) + else + self.parameters = nnx.flattenParameters(nnx.getParameters(self.module)) + self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module)) + end self.evalCounter = 0 self.sampleCounter = 0 if self.parallelize > 1 then @@ -52,7 +68,11 @@ function Batch:forward_sequential(inputs, targets, options) end local _t_ = sys.clock() -- reset gradients - self.gradParameters:zero() + if torch.getdefaulttensortype() == 'torch.CudaTensor' then + self.cuda_gradParameters:zero() + else + self.gradParameters:zero() + end -- f is the average of all criterions self.output = 0 -- given all inputs, evaluate gradients @@ -76,8 +96,16 @@ function Batch:forward_sequential(inputs, targets, options) end -- update evaluation counter self.evalCounter = self.evalCounter + 1 + -- normalize gradients - self.gradParameters:div(#inputs) + if torch.getdefaulttensortype() == 'torch.CudaTensor' then + self.cuda_gradParameters:div(#inputs) + -- copy back to CPU version + self.gradParameters:resize(self.cuda_gradParameters:size()):copy(self.cuda_gradParameters) + else + self.gradParameters:div(#inputs) + end + -- verbose if self.verbose >= 2 then print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec') |