Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Fomitchev <bfomitchev@nvidia.com>2016-10-21 23:25:57 +0300
committerBoris Fomitchev <bfomitchev@nvidia.com>2016-10-21 23:25:57 +0300
commit26e91e961af52f8690fe88d2acc092f6d2ae5217 (patch)
tree4f23c525652a00d44d0ef59c6d54401f546baed9
parent0f78bac2253eb21859d7591e221361a9d542ac90 (diff)
debug diagnostic fixes. true fp16 disabled for now
-rw-r--r--TemporalConvolution.lua16
-rw-r--r--find.lua27
-rw-r--r--test/test.lua9
3 files changed, 40 insertions, 12 deletions
diff --git a/TemporalConvolution.lua b/TemporalConvolution.lua
index cee0f44..87f7775 100644
--- a/TemporalConvolution.lua
+++ b/TemporalConvolution.lua
@@ -24,11 +24,21 @@ function TemporalConvolution:__init(inputFrameSize, outputFrameSize,
end
function TemporalConvolution:createIODescriptors(input)
- return Convolution.createIODescriptors(self, input)
+ local sizeChanged = false
+ if not self.iDesc or not self.oDesc or
+ input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2]
+ or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] then
+ sizeChanged = true
+ end
+ cudnn.SpatialConvolution.createIODescriptors(self,input)
+ if sizeChanged then
+ self.oSize = self.output:size()
+ end
end
function TemporalConvolution:fastest(mode)
- return Convolution.fastest(self, mode)
+ self = cudnn.SpatialConvolution.fastest(self,mode)
+ return self
end
function TemporalConvolution:setMode(fmode, bdmode, bwmode)
@@ -125,5 +135,3 @@ function TemporalConvolution:clearState()
nn.utils.clear(self, '_input', '_gradOutput')
return parent.clearState(self)
end
-
-return TemporalConvolution
diff --git a/find.lua b/find.lua
index 9c7adf5..fba772f 100644
--- a/find.lua
+++ b/find.lua
@@ -79,9 +79,15 @@ local function getConvolutionDescriptor_ffi(desc)
math_p = ffi.new('cudnnDataType_t[1]')
}
- cudnn.errcheck('cudnnGetConvolutionNdDescriptor', desc[0], CUDNN_DIM_MAX,
- data.dim_p, data.padA, data.filterStrideA,
- data.upscaleA, data.mode_p, data.math_p)
+ local status = cudnn.call('cudnnGetConvolutionNdDescriptor', desc[0], CUDNN_DIM_MAX,
+ data.dim_p, data.padA, data.filterStrideA,
+ data.upscaleA, data.mode_p, data.math_p)
+ if (status ~= ffi.C.CUDNN_STATUS_SUCCESS) then
+ if find.verbose or find.verboseError then
+ print("cudnnGetConvolutionNdDescriptor failed: ", tonumber(status))
+ return nil
+ end
+ end
data.arrayLength = data.dim_p[0]
data.mode = data.mode_p[0]
@@ -94,10 +100,15 @@ local function verboseCall(layer, f, ...)
print("find:verboseCall: calling " .. f .. ", hash: ", layer.autotunerHash)
end
local status = cudnn.call(f, ...)
- if status ~= ffi.C.CUDNN_STATUS_SUCCESS and (find.verbose or find.verboseError) then
- local desc= cudnn.getConvolutionDescriptor_ffi(layer.convDesc)
- print("find:verboseCall:" .. f .. " failed: ", tonumber(status) , ' mode : ',
- desc.mode, ' datatype : ', desc.datatype)
+ if (status ~= ffi.C.CUDNN_STATUS_SUCCESS) and (find.verbose or find.verboseError) then
+ local prefix = "find:verboseCall:"
+ print( prefix .. f .. " failed: ", tonumber(status))
+ if layer.convDesc then
+ local desc = getConvolutionDescriptor_ffi(layer.convDesc)
+ if desc then
+ print (prefix .. ' conv desc mode : ', desc.mode, ' datatype : ', desc.datatype)
+ end
+ end
end
if find.verbose then
print("find:verboseCall: success, " .. f )
@@ -126,7 +137,7 @@ local function defaultFallback(layer, replay)
-- read conv descriptor
local convDescData = getConvolutionDescriptor_ffi(layer.convDesc)
- if convDescData.dataType == ffi.C.CUDNN_DATA_HALF then
+ if convDescData and convDescData.dataType == ffi.C.CUDNN_DATA_HALF then
if find.verbose then
if replay then
print("find.defaultFallback: replay for ", layer.autotunerHash)
diff --git a/test/test.lua b/test/test.lua
index a85ed26..2b69fa2 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -957,6 +957,15 @@ for i = 1, cutorch.getDeviceCount() do
print( 'Testing torch.CudaHalfTensor, torch.cudnn fp16 math is : ', cudnn.configmap('torch.CudaHalfTensor' ),
', cutorch.hasFastHalfInstructions() is ', cutorch.hasFastHalfInstructions())
+
+ if cudnn.configmap('torch.CudaHalfTensor') ~= 'CUDNN_DATA_FLOAT' then
+ print([[ Warning: 32-bit float math is forced for CudaHalfTensor test
+ even though native fast 16-bit float math is available for this device.
+ The reason is cudnn convolution algo find methods for fp16 and certain size combinations may fail.
+ This should be fixed in next release.]])
+ cudnn.configureMath({ ['torch.CudaHalfTensor'] = 'CUDNN_DATA_FLOAT'})
+ end
+
testparams = testparams_half
mytester:run()