diff options
author | Boris Fomitchev <bfomitchev@nvidia.com> | 2017-02-22 04:42:55 +0300 |
---|---|---|
committer | Boris Fomitchev <bfomitchev@nvidia.com> | 2017-02-22 04:51:55 +0300 |
commit | b2965da96871a50ff3bf692e943fcde45498b846 (patch) | |
tree | 79b2fa466d437f4908f64961127de337ff8f8151 | |
parent | a2730c7e7159ba95aa8b1eca8931e68869ebdc6d (diff) | |
parent | 7f3e2b22c50d12c8583f33ff792c88d692bcef49 (diff) |
Merge remote-tracking branch 'upstream/master' into 17.03-devel
Conflicts:
init.lua
-rw-r--r-- | find.lua | 2 | ||||
-rw-r--r-- | init.lua | 14 | ||||
-rw-r--r-- | test/test.lua | 9 |
3 files changed, 20 insertions, 5 deletions
@@ -110,7 +110,7 @@ local function fallbackWarning(layer, msg) "\n *** Falling back to 32-bit math for: " .. convDataString(layer)) print(" *** [ Set cudnn.find.verboseFallback to false to disable this message ] *** ") print(" *** [ Alternatively, you may force CUDNN to always operate on CudaHalfTensors via 32-bit float conversion, in Lua: ] ***\n" - .." *** [ cudnn.configureMath({ ['torch.CudaHalfTensor'] = 'CUDNN_DATA_FLOAT'} ] ***") + .." *** cudnn.configureMath({ ['torch.CudaHalfTensor'] = 'CUDNN_DATA_FLOAT'} ) ***") print(" *** [ Note: result may be faster or slower than native FP16, depending on your GPU and CUDNN operations ] *** ") end end @@ -216,11 +216,21 @@ function cudnn.setConvolutionDescriptor(data, desc) local myDesc = desc or cudnn.createDescriptors( 1, 'struct cudnnConvolutionStruct*[?]', 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor') + -- make sure we have references to these tensors so gc doesn't clean them up + local padATensor = torch.IntTensor(data.padA) + local filterStrideATensor = torch.IntTensor(data.filterStrideA) + local upscaleATensor = torch.IntTensor(data.upscaleA) errcheck('cudnnSetConvolutionNdDescriptor', myDesc[0], data.arrayLength, +<<<<<<< HEAD torch.IntTensor(data.padA):data(), torch.IntTensor(data.filterStrideA):data(), torch.IntTensor(data.dilationA):data(), +======= + padATensor:data(), + filterStrideATensor:data(), + upscaleATensor:data(), +>>>>>>> upstream/master data.mode, data.dataType) return myDesc @@ -231,9 +241,11 @@ function cudnn.setFilterDescriptor(data, filterDesc) 1, 'struct cudnnFilterStruct*[?]', 'cudnnCreateFilterDescriptor', 'cudnnDestroyFilterDescriptor') local dims = data.nbDims or #data.filterDimA + -- make sure we have references to these tensors so gc doesn't clean them up + local filterDimATensor = torch.IntTensor(data.filterDimA) errcheck('cudnnSetFilterNdDescriptor', myDesc[0], data.dataType, data.format or 'CUDNN_TENSOR_NCHW', - dims, torch.IntTensor(data.filterDimA):data()); + dims, filterDimATensor:data()); return myDesc end diff --git a/test/test.lua b/test/test.lua index c641f29..ffe6cc3 100644 --- a/test/test.lua +++ b/test/test.lua @@ -77,9 +77,10 @@ local function testLayer(nnlayer, cudnnlayer, input, gradOutput, scale, end local res = {} -- result - res.output = cudnnlayer:forward(cast(input)) + inputcudnn = cast(input):clone() -- for inplace layers + res.output = cudnnlayer:forward(inputcudnn) cudnnlayer:zeroGradParameters() - res.gradInput = cudnnlayer:backward(cast(input), cast(gradOutput), scale) + res.gradInput = cudnnlayer:backward(inputcudnn, cast(gradOutput), scale) if parametric then res.gradWeight = cudnnlayer.gradWeight res.gradBias = cudnnlayer.gradBias @@ -550,9 +551,11 @@ function cudnntest.ReLU() nonlin('ReLU', false) -- out of place end function cudnntest.Tanh() + nonlin('Tanh', true) -- inplace nonlin('Tanh', false) -- out of place end function cudnntest.Sigmoid() + nonlin('Sigmoid', true) -- inplace nonlin('Sigmoid', false) -- out of place end @@ -1021,7 +1024,7 @@ cudnn.find.verbose=false -- todo: put it back for release to demo 16->32 bit float fallback cudnn.find.verboseFallback=false cudnn.useFindEx=false - +cudnn.configureMath({ ['torch.CudaHalfTensor'] = 'CUDNN_DATA_FLOAT'} ) for i = 1, 1 do -- cutorch.getDeviceCount() do for _, benchmark, fast in ipairs({true, false}) do |