diff options
author | Boris Fomitchev <bfomitchev@nvidia.com> | 2016-10-21 23:25:57 +0300 |
---|---|---|
committer | Boris Fomitchev <bfomitchev@nvidia.com> | 2016-10-21 23:25:57 +0300 |
commit | 26e91e961af52f8690fe88d2acc092f6d2ae5217 (patch) | |
tree | 4f23c525652a00d44d0ef59c6d54401f546baed9 | |
parent | 0f78bac2253eb21859d7591e221361a9d542ac90 (diff) |
debug diagnostic fixes. true fp16 disabled for now
-rw-r--r-- | TemporalConvolution.lua | 16 | ||||
-rw-r--r-- | find.lua | 27 | ||||
-rw-r--r-- | test/test.lua | 9 |
3 files changed, 40 insertions, 12 deletions
diff --git a/TemporalConvolution.lua b/TemporalConvolution.lua index cee0f44..87f7775 100644 --- a/TemporalConvolution.lua +++ b/TemporalConvolution.lua @@ -24,11 +24,21 @@ function TemporalConvolution:__init(inputFrameSize, outputFrameSize, end function TemporalConvolution:createIODescriptors(input) - return Convolution.createIODescriptors(self, input) + local sizeChanged = false + if not self.iDesc or not self.oDesc or + input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] + or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] then + sizeChanged = true + end + cudnn.SpatialConvolution.createIODescriptors(self,input) + if sizeChanged then + self.oSize = self.output:size() + end end function TemporalConvolution:fastest(mode) - return Convolution.fastest(self, mode) + self = cudnn.SpatialConvolution.fastest(self,mode) + return self end function TemporalConvolution:setMode(fmode, bdmode, bwmode) @@ -125,5 +135,3 @@ function TemporalConvolution:clearState() nn.utils.clear(self, '_input', '_gradOutput') return parent.clearState(self) end - -return TemporalConvolution @@ -79,9 +79,15 @@ local function getConvolutionDescriptor_ffi(desc) math_p = ffi.new('cudnnDataType_t[1]') } - cudnn.errcheck('cudnnGetConvolutionNdDescriptor', desc[0], CUDNN_DIM_MAX, - data.dim_p, data.padA, data.filterStrideA, - data.upscaleA, data.mode_p, data.math_p) + local status = cudnn.call('cudnnGetConvolutionNdDescriptor', desc[0], CUDNN_DIM_MAX, + data.dim_p, data.padA, data.filterStrideA, + data.upscaleA, data.mode_p, data.math_p) + if (status ~= ffi.C.CUDNN_STATUS_SUCCESS) then + if find.verbose or find.verboseError then + print("cudnnGetConvolutionNdDescriptor failed: ", tonumber(status)) + return nil + end + end data.arrayLength = data.dim_p[0] data.mode = data.mode_p[0] @@ -94,10 +100,15 @@ local function verboseCall(layer, f, ...) print("find:verboseCall: calling " .. f .. ", hash: ", layer.autotunerHash) end local status = cudnn.call(f, ...) - if status ~= ffi.C.CUDNN_STATUS_SUCCESS and (find.verbose or find.verboseError) then - local desc= cudnn.getConvolutionDescriptor_ffi(layer.convDesc) - print("find:verboseCall:" .. f .. " failed: ", tonumber(status) , ' mode : ', - desc.mode, ' datatype : ', desc.datatype) + if (status ~= ffi.C.CUDNN_STATUS_SUCCESS) and (find.verbose or find.verboseError) then + local prefix = "find:verboseCall:" + print( prefix .. f .. " failed: ", tonumber(status)) + if layer.convDesc then + local desc = getConvolutionDescriptor_ffi(layer.convDesc) + if desc then + print (prefix .. ' conv desc mode : ', desc.mode, ' datatype : ', desc.datatype) + end + end end if find.verbose then print("find:verboseCall: success, " .. f ) @@ -126,7 +137,7 @@ local function defaultFallback(layer, replay) -- read conv descriptor local convDescData = getConvolutionDescriptor_ffi(layer.convDesc) - if convDescData.dataType == ffi.C.CUDNN_DATA_HALF then + if convDescData and convDescData.dataType == ffi.C.CUDNN_DATA_HALF then if find.verbose then if replay then print("find.defaultFallback: replay for ", layer.autotunerHash) diff --git a/test/test.lua b/test/test.lua index a85ed26..2b69fa2 100644 --- a/test/test.lua +++ b/test/test.lua @@ -957,6 +957,15 @@ for i = 1, cutorch.getDeviceCount() do print( 'Testing torch.CudaHalfTensor, torch.cudnn fp16 math is : ', cudnn.configmap('torch.CudaHalfTensor' ), ', cutorch.hasFastHalfInstructions() is ', cutorch.hasFastHalfInstructions()) + + if cudnn.configmap('torch.CudaHalfTensor') ~= 'CUDNN_DATA_FLOAT' then + print([[ Warning: 32-bit float math is forced for CudaHalfTensor test + even though native fast 16-bit float math is available for this device. + The reason is cudnn convolution algo find methods for fp16 and certain size combinations may fail. + This should be fixed in next release.]]) + cudnn.configureMath({ ['torch.CudaHalfTensor'] = 'CUDNN_DATA_FLOAT'}) + end + testparams = testparams_half mytester:run() |