Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Fomitchev <bfomitchev@nvidia.com>2017-02-22 04:42:55 +0300
committerBoris Fomitchev <bfomitchev@nvidia.com>2017-02-22 04:51:55 +0300
commitb2965da96871a50ff3bf692e943fcde45498b846 (patch)
tree79b2fa466d437f4908f64961127de337ff8f8151
parenta2730c7e7159ba95aa8b1eca8931e68869ebdc6d (diff)
parent7f3e2b22c50d12c8583f33ff792c88d692bcef49 (diff)
Merge remote-tracking branch 'upstream/master' into 17.03-devel
Conflicts: init.lua
-rw-r--r--find.lua2
-rw-r--r--init.lua14
-rw-r--r--test/test.lua9
3 files changed, 20 insertions, 5 deletions
diff --git a/find.lua b/find.lua
index f1c783f..045ed86 100644
--- a/find.lua
+++ b/find.lua
@@ -110,7 +110,7 @@ local function fallbackWarning(layer, msg)
"\n *** Falling back to 32-bit math for: " .. convDataString(layer))
print(" *** [ Set cudnn.find.verboseFallback to false to disable this message ] *** ")
print(" *** [ Alternatively, you may force CUDNN to always operate on CudaHalfTensors via 32-bit float conversion, in Lua: ] ***\n"
- .." *** [ cudnn.configureMath({ ['torch.CudaHalfTensor'] = 'CUDNN_DATA_FLOAT'} ] ***")
+ .." *** cudnn.configureMath({ ['torch.CudaHalfTensor'] = 'CUDNN_DATA_FLOAT'} ) ***")
print(" *** [ Note: result may be faster or slower than native FP16, depending on your GPU and CUDNN operations ] *** ")
end
end
diff --git a/init.lua b/init.lua
index 20fa154..cbd434b 100644
--- a/init.lua
+++ b/init.lua
@@ -216,11 +216,21 @@ function cudnn.setConvolutionDescriptor(data, desc)
local myDesc = desc or cudnn.createDescriptors(
1, 'struct cudnnConvolutionStruct*[?]',
'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
+ -- make sure we have references to these tensors so gc doesn't clean them up
+ local padATensor = torch.IntTensor(data.padA)
+ local filterStrideATensor = torch.IntTensor(data.filterStrideA)
+ local upscaleATensor = torch.IntTensor(data.upscaleA)
errcheck('cudnnSetConvolutionNdDescriptor', myDesc[0],
data.arrayLength,
+<<<<<<< HEAD
torch.IntTensor(data.padA):data(),
torch.IntTensor(data.filterStrideA):data(),
torch.IntTensor(data.dilationA):data(),
+=======
+ padATensor:data(),
+ filterStrideATensor:data(),
+ upscaleATensor:data(),
+>>>>>>> upstream/master
data.mode,
data.dataType)
return myDesc
@@ -231,9 +241,11 @@ function cudnn.setFilterDescriptor(data, filterDesc)
1, 'struct cudnnFilterStruct*[?]',
'cudnnCreateFilterDescriptor', 'cudnnDestroyFilterDescriptor')
local dims = data.nbDims or #data.filterDimA
+ -- make sure we have references to these tensors so gc doesn't clean them up
+ local filterDimATensor = torch.IntTensor(data.filterDimA)
errcheck('cudnnSetFilterNdDescriptor', myDesc[0],
data.dataType, data.format or 'CUDNN_TENSOR_NCHW',
- dims, torch.IntTensor(data.filterDimA):data());
+ dims, filterDimATensor:data());
return myDesc
end
diff --git a/test/test.lua b/test/test.lua
index c641f29..ffe6cc3 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -77,9 +77,10 @@ local function testLayer(nnlayer, cudnnlayer, input, gradOutput, scale,
end
local res = {} -- result
- res.output = cudnnlayer:forward(cast(input))
+ inputcudnn = cast(input):clone() -- for inplace layers
+ res.output = cudnnlayer:forward(inputcudnn)
cudnnlayer:zeroGradParameters()
- res.gradInput = cudnnlayer:backward(cast(input), cast(gradOutput), scale)
+ res.gradInput = cudnnlayer:backward(inputcudnn, cast(gradOutput), scale)
if parametric then
res.gradWeight = cudnnlayer.gradWeight
res.gradBias = cudnnlayer.gradBias
@@ -550,9 +551,11 @@ function cudnntest.ReLU()
nonlin('ReLU', false) -- out of place
end
function cudnntest.Tanh()
+ nonlin('Tanh', true) -- inplace
nonlin('Tanh', false) -- out of place
end
function cudnntest.Sigmoid()
+ nonlin('Sigmoid', true) -- inplace
nonlin('Sigmoid', false) -- out of place
end
@@ -1021,7 +1024,7 @@ cudnn.find.verbose=false
-- todo: put it back for release to demo 16->32 bit float fallback
cudnn.find.verboseFallback=false
cudnn.useFindEx=false
-
+cudnn.configureMath({ ['torch.CudaHalfTensor'] = 'CUDNN_DATA_FLOAT'} )
for i = 1, 1 do -- cutorch.getDeviceCount() do
for _, benchmark, fast in ipairs({true, false}) do