diff options
author | soumith <soumith@gmail.com> | 2016-02-16 22:34:15 +0300 |
---|---|---|
committer | soumith <soumith@gmail.com> | 2016-02-16 22:34:26 +0300 |
commit | 991093af1d24122bafa69cc1e085cf77a3c7185e (patch) | |
tree | ede810f75af838a43582a895c5d925ca3825844a /SpatialConvolution.lua | |
parent | 7b1f87198343414a0ba1bda31aff84228ac2697b (diff) |
removing streams optimization
Diffstat (limited to 'SpatialConvolution.lua')
-rw-r--r-- | SpatialConvolution.lua | 32 |
1 files changed, 0 insertions, 32 deletions
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua index 2597aa5..dd27fc3 100644 --- a/SpatialConvolution.lua +++ b/SpatialConvolution.lua @@ -348,34 +348,7 @@ function SpatialConvolution:updateOutput(input) if not self.weightDesc then self:resetWeightDescriptors() end self:createIODescriptors(input) - local prevStream - local streamQueue = {} - if self.groups > 1 then -- try to do stream parallelization - prevStream = cutorch.getStream() - - --[[ - Only if prevStream is 0, then do parallelization. - the justification for this is that this is a hard problem, there is no - way to know if one is doing other kinds of stream-parallelization - (like GPUConcat), and if thats the case, streams are already - being ideally exploited. - --]] - - if prevStream == 0 then - cutorch.reserveStreams(self.groups) - for i=1,self.groups do - cutorch.streamWaitFor(i, {prevStream}) - end - end - end - for g = 0, self.groups - 1 do - -- stream-parallelize if appropriate - if self.groups > 1 and prevStream == 0 then - cutorch.setStream(g + 1) - table.insert(streamQueue, g + 1) - end - errcheck('cudnnConvolutionForward', cudnn.getHandle(), one:data(), self.iDesc[0], input:data() + g*self.input_offset, @@ -386,11 +359,6 @@ function SpatialConvolution:updateOutput(input) self.oDesc[0], self.output:data() + g*self.output_offset); end - if prevStream == 0 then - cutorch.setStream(prevStream) - cutorch.streamWaitFor(prevStream, streamQueue) - end - -- add bias if self.bias then errcheck('cudnnAddTensor', cudnn.getHandle(), |