diff options
author | Soumith Chintala <soumith@gmail.com> | 2016-11-18 01:53:48 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-11-18 01:53:48 +0300 |
commit | 50224b77d55a0e2cdb4c8e6b1716a1cdc381a12f (patch) | |
tree | 06c54eef86403afeb0830d6bd473960d6a78bf91 | |
parent | 4422d797976bbb615522df82887b64fe0ef1ce91 (diff) | |
parent | cea3855c6b9d4e39bd8a681b5d74f07802cf2e25 (diff) |
Merge pull request #378 from gchanan/nnLinearHalf
Test for nn.Linear with CudaHalfTensor and CudaDoubleTensor.
-rw-r--r-- | init.lua | 3 | ||||
-rw-r--r-- | test.lua | 360 |
2 files changed, 181 insertions, 182 deletions
@@ -6,3 +6,6 @@ require('cunn.test') require('cunn.DataParallelTable') nn.Module._flattenTensorBuffer['torch.CudaTensor'] = torch.FloatTensor.new +nn.Module._flattenTensorBuffer['torch.CudaDoubleTensor'] = torch.DoubleTensor.new +-- FIXME: change this to torch.HalfTensor when available +nn.Module._flattenTensorBuffer['torch.CudaHalfTensor'] = torch.FloatTensor.new @@ -25,17 +25,6 @@ local function checkHalf() end end --- workarounds for non-existant functions -if cutorch.hasHalf then -function torch.CudaHalfTensor:mean() - return self:cuda():mean() -end -end - -function torch.CudaDoubleTensor:mean() - return self:cuda():mean() -end - local function half_max_error(maxabs) -- arbitrarily double the precision limit return 2 * ((maxabs and (2^(math.floor(math.log(maxabs) / math.log(2)))) * (2^(-10))) or 0) @@ -5215,17 +5204,19 @@ end function cunntest.getParameters() -- tensors are non-contiguous but compact; they can be gathered - local L = nn.Linear(10,10):cuda() - L.weight = torch.CudaTensor(10,10):t():fill(1) - local tmp = torch.CudaTensor(10,10):fill(2) - L.bias = tmp:select(1,2) - local P = L:getParameters() - mytester:asserteq(L.weight:mean(), 1) - mytester:asserteq(L.bias:mean(), 2) - mytester:asserteq(L.weight:storage(), L.bias:storage()) - mytester:asserteq(P:nElement(), 110) - mytester:asserteq(P:storage():size(), 110) - mytester:assertlt(L.bias[{ {10} }]:storageOffset() - 1, L.bias:storage():size()) + for k, typename in ipairs(typenames) do + local L = nn.Linear(10,10):type(typename) + L.weight = torch[typename:match('torch.(%a+)')](10,10):t():fill(1) + local tmp = torch[typename:match('torch.(%a+)')](10,10):fill(2) + L.bias = tmp:select(1,2) + local P = L:getParameters() + mytester:asserteq(L.weight:mean(), 1) + mytester:asserteq(L.bias:mean(), 2) + mytester:asserteq(L.weight:storage(), L.bias:storage()) + mytester:asserteq(P:nElement(), 110) + mytester:asserteq(P:storage():size(), 110) + mytester:assertlt(L.bias[{ {10} }]:storageOffset() - 1, L.bias:storage():size()) + end end function cunntest.SpatialReflectionPadding_forward() @@ -5447,214 +5438,219 @@ function cunntest.GPU() end assert(nn.GPU, "Please update nn to latest version") - local originaldevice = cutorch.getDevice() + for k, typename in ipairs(typenames) do + local tolerance = 1e-6 + if typename == 'torch.CudaHalfTensor' then tolerance = 1e-3 end + local originaldevice = cutorch.getDevice() - cutorch.setDevice(1) - local linear = nn.Linear(3,4) - local linear2 = linear:clone():float() - linear.mybuffer = {torch.CudaTensor(3)} + local ctype = t2cpu[typename] + cutorch.setDevice(1) + local linear = nn.Linear(3,4):type(ctype) + local linear2 = linear:clone():type(ctype) + linear.mybuffer = {torch[typename:match('torch.(%a+)')](3)} - local gpu = nn.GPU(linear, 2, 1) - gpu:cuda() + local gpu = nn.GPU(linear, 2, 1) + gpu:type(typename) - mytester:assert(linear.mybuffer[1]:getDevice() == 2) - mytester:assert(linear.weight:getDevice() == 2) - mytester:assert(cutorch.getDevice() == originaldevice) + mytester:assert(linear.mybuffer[1]:getDevice() == 2) + mytester:assert(linear.weight:getDevice() == 2) + mytester:assert(cutorch.getDevice() == originaldevice) - local input = torch.CudaTensor(2,3):uniform(0,1) - local output = gpu:forward(input) + local input = torch[typename:match('torch.(%a+)')](2,3):uniform(0,1) + local output = gpu:forward(input) - mytester:assert(linear.output:getDevice() == 2) - mytester:assert(output:getDevice() == 1) - mytester:assert(gpu._input:getDevice() == 2) + mytester:assert(linear.output:getDevice() == 2) + mytester:assert(output:getDevice() == 1) + mytester:assert(gpu._input:getDevice() == 2) - local gradOutput = torch.CudaTensor(2,4):uniform(0,1) - gpu:zeroGradParameters() - mytester:assert(cutorch.getDevice() == 1) - local gradInput = gpu:backward(input, gradOutput) + local gradOutput = torch[typename:match('torch.(%a+)')](2,4):uniform(0,1) + gpu:zeroGradParameters() + mytester:assert(cutorch.getDevice() == 1) + local gradInput = gpu:backward(input, gradOutput) - mytester:assert(cutorch.getDevice() == 1) - mytester:assert(gpu._gradOutput:getDevice() == 2) - mytester:assert(linear.gradInput:getDevice() == 2) - mytester:assert(gradInput:getDevice() == 1) + mytester:assert(cutorch.getDevice() == 1) + mytester:assert(gpu._gradOutput:getDevice() == 2) + mytester:assert(linear.gradInput:getDevice() == 2) + mytester:assert(gradInput:getDevice() == 1) - mytester:assert(cutorch.getDevice() == 1) - local input2, gradOutput2 = input:float(), gradOutput:float() - local output2 = linear2:forward(input2) - linear2:zeroGradParameters() - local gradInput2 = linear2:backward(input2, gradOutput2) + mytester:assert(cutorch.getDevice() == 1) + local input2, gradOutput2 = input:type(ctype), gradOutput:type(ctype) + local output2 = linear2:forward(input2) + linear2:zeroGradParameters() + local gradInput2 = linear2:backward(input2, gradOutput2) - mytester:assertTensorEq(input2, input:float(), 0.000001) - mytester:assertTensorEq(gradInput2, gradInput:float(), 0.000001) + mytester:assertTensorEq(input2:double(), input:double(), tolerance) + mytester:assertTensorEq(gradInput2:double(), gradInput:double(), tolerance) - local params, gradParams = gpu:parameters() - local params2, gradParams2 = linear2:parameters() + local params, gradParams = gpu:parameters() + local params2, gradParams2 = linear2:parameters() - for i=1,#params do - mytester:assertTensorEq(params2[i], params[i]:float(), 0.000001) - mytester:assertTensorEq(gradParams2[i], gradParams[i]:float(), 0.000001) - end + for i=1,#params do + mytester:assertTensorEq(params2[i]:double(), params[i]:double(), tolerance) + mytester:assertTensorEq(gradParams2[i]:double(), gradParams[i]:double(), tolerance) + end - -- test serialize/deserialize + -- test serialize/deserialize - local gpustr = torch.serialize(gpu) - mytester:assert(cutorch.getDevice() == 1) - local gpu2 = torch.deserialize(gpustr) - mytester:assert(cutorch.getDevice() == 1) + local gpustr = torch.serialize(gpu) + mytester:assert(cutorch.getDevice() == 1) + local gpu2 = torch.deserialize(gpustr) + mytester:assert(cutorch.getDevice() == 1) - local output2 = gpu2:forward(input) + local output2 = gpu2:forward(input) - mytester:assert(gpu2.modules[1].output:getDevice() == 2) - mytester:assert(output2:getDevice() == 1) - mytester:assert(gpu2._input:getDevice() == 2) + mytester:assert(gpu2.modules[1].output:getDevice() == 2) + mytester:assert(output2:getDevice() == 1) + mytester:assert(gpu2._input:getDevice() == 2) - gpu2:zeroGradParameters() - mytester:assert(cutorch.getDevice() == 1) - local gradInput2 = gpu2:backward(input, gradOutput) + gpu2:zeroGradParameters() + mytester:assert(cutorch.getDevice() == 1) + local gradInput2 = gpu2:backward(input, gradOutput) - mytester:assert(cutorch.getDevice() == 1) - mytester:assert(gpu2._gradOutput:getDevice() == 2) - mytester:assert(gpu2.modules[1].gradInput:getDevice() == 2) - mytester:assert(gradInput2:getDevice() == 1) + mytester:assert(cutorch.getDevice() == 1) + mytester:assert(gpu2._gradOutput:getDevice() == 2) + mytester:assert(gpu2.modules[1].gradInput:getDevice() == 2) + mytester:assert(gradInput2:getDevice() == 1) - mytester:assertTensorEq(input2, input2, 0.000001) - mytester:assertTensorEq(gradInput2, gradInput2, 0.000001) + mytester:assertTensorEq(input2:double(), input2:double(), tolerance) + mytester:assertTensorEq(gradInput2:double(), gradInput2:double(), tolerance) - local params, gradParams = gpu:parameters() - local params2, gradParams2 = gpu2:parameters() + local params, gradParams = gpu:parameters() + local params2, gradParams2 = gpu2:parameters() - for i=1,#params do - mytester:assert(params2[i]:getDevice() == params[i]:getDevice()) - mytester:assert(gradParams2[i]:getDevice() == gradParams[i]:getDevice()) - mytester:assertTensorEq(params2[i]:float(), params[i]:float(), 0.000001) - mytester:assertTensorEq(gradParams2[i]:float(), gradParams[i]:float(), 0.000001) - end + for i=1,#params do + mytester:assert(params2[i]:getDevice() == params[i]:getDevice()) + mytester:assert(gradParams2[i]:getDevice() == gradParams[i]:getDevice()) + mytester:assertTensorEq(params2[i]:double(), params[i]:double(), tolerance) + mytester:assertTensorEq(gradParams2[i]:double(), gradParams[i]:double(), tolerance) + end - -- test table input/output - local lin1, lin2 = nn.Linear(3,4), nn.Linear(3,4) - local para = nn.ParallelTable():add(lin1):add(lin2) - local para2 = para:clone():float() - local gpu = nn.GPU(para, 2, 1) + -- test table input/output + local lin1, lin2 = nn.Linear(3,4), nn.Linear(3,4) + local para = nn.ParallelTable():add(lin1):add(lin2) + local para2 = para:clone():type(ctype) + local gpu = nn.GPU(para, 2, 1) - gpu:cuda() - mytester:assert(lin1.weight:getDevice() == 2) - mytester:assert(lin2.weight:getDevice() == 2) - mytester:assert(cutorch.getDevice() == 1) + gpu:type(typename) + mytester:assert(lin1.weight:getDevice() == 2) + mytester:assert(lin2.weight:getDevice() == 2) + mytester:assert(cutorch.getDevice() == 1) - local device3 = cutorch.getDeviceCount() - local input = { - torch.CudaTensor(2,3):uniform(0,1), - cutorch.withDevice(device3, function() return torch.CudaTensor(2,3):uniform(0,1) end) -- tests input from multiple devices - } - local output = gpu:forward(input) - - mytester:assert(para.output[1]:getDevice() == 2) - mytester:assert(para.output[2]:getDevice() == 2) - mytester:assert(output[1]:getDevice() == 1) - mytester:assert(output[2]:getDevice() == 1) - mytester:assert(gpu._input[1]:getDevice() == 2) - mytester:assert(gpu._input[2]:getDevice() == 2) - - local gradOutput = { - torch.CudaTensor(2,4):uniform(0,1), - cutorch.withDevice(device3, function() return torch.CudaTensor(2,4):uniform(0,1) end) -- tests gradOutput from multiple devices - } + local device3 = cutorch.getDeviceCount() + local input = { + torch[typename:match('torch.(%a+)')](2,3):uniform(0,1), + cutorch.withDevice(device3, function() return torch[typename:match('torch.(%a+)')](2,3):uniform(0,1) end) -- tests input from multiple devices + } + local output = gpu:forward(input) + + mytester:assert(para.output[1]:getDevice() == 2) + mytester:assert(para.output[2]:getDevice() == 2) + mytester:assert(output[1]:getDevice() == 1) + mytester:assert(output[2]:getDevice() == 1) + mytester:assert(gpu._input[1]:getDevice() == 2) + mytester:assert(gpu._input[2]:getDevice() == 2) + + local gradOutput = { + torch[typename:match('torch.(%a+)')](2,4):uniform(0,1), + cutorch.withDevice(device3, function() return torch[typename:match('torch.(%a+)')](2,4):uniform(0,1) end) -- tests gradOutput from multiple devices + } + + gpu:zeroGradParameters() + mytester:assert(cutorch.getDevice() == 1) + local gradInput = gpu:backward(input, gradOutput) - gpu:zeroGradParameters() - mytester:assert(cutorch.getDevice() == 1) - local gradInput = gpu:backward(input, gradOutput) - - mytester:assert(cutorch.getDevice() == 1) - mytester:assert(gpu._gradOutput[1]:getDevice() == 2) - mytester:assert(gpu._gradOutput[2]:getDevice() == 2) - mytester:assert(para.gradInput[1]:getDevice() == 2) - mytester:assert(para.gradInput[2]:getDevice() == 2) - mytester:assert(gradInput[1]:getDevice() == 1) - mytester:assert(gradInput[2]:getDevice() == device3) - - local input2, gradOutput2 = {input[1]:float(), input[2]:float()}, {gradOutput[1]:float(), gradOutput[2]:float()} - local output2 = para2:forward(input2) - para2:zeroGradParameters() - local gradInput2 = para2:backward(input2, gradOutput2) - - mytester:assertTensorEq(input2[1], input[1]:float(), 0.000001) - mytester:assertTensorEq(input2[2], input[2]:float(), 0.000001) - mytester:assertTensorEq(gradInput2[1], gradInput[1]:float(), 0.000001) - mytester:assertTensorEq(gradInput2[2], gradInput[2]:float(), 0.000001) - - local params, gradParams = gpu:parameters() - local params2, gradParams2 = para2:parameters() - - for i=1,#params do - mytester:assertTensorEq(params2[i], params[i]:float(), 0.000001) - mytester:assertTensorEq(gradParams2[i], gradParams[i]:float(), 0.000001) - end + mytester:assert(cutorch.getDevice() == 1) + mytester:assert(gpu._gradOutput[1]:getDevice() == 2) + mytester:assert(gpu._gradOutput[2]:getDevice() == 2) + mytester:assert(para.gradInput[1]:getDevice() == 2) + mytester:assert(para.gradInput[2]:getDevice() == 2) + mytester:assert(gradInput[1]:getDevice() == 1) + mytester:assert(gradInput[2]:getDevice() == device3) + + local input2, gradOutput2 = {input[1]:type(ctype), input[2]:type(ctype)}, {gradOutput[1]:type(ctype), gradOutput[2]:type(ctype)} + local output2 = para2:forward(input2) + para2:zeroGradParameters() + local gradInput2 = para2:backward(input2, gradOutput2) + + mytester:assertTensorEq(input2[1]:double(), input[1]:double(), tolerance) + mytester:assertTensorEq(input2[2]:double(), input[2]:double(), tolerance) + mytester:assertTensorEq(gradInput2[1]:double(), gradInput[1]:double(), tolerance) + mytester:assertTensorEq(gradInput2[2]:double(), gradInput[2]:double(), tolerance) + + local params, gradParams = gpu:parameters() + local params2, gradParams2 = para2:parameters() + + for i=1,#params do + mytester:assertTensorEq(params2[i]:double(), params[i]:double(), tolerance) + mytester:assertTensorEq(gradParams2[i]:double(), gradParams[i]:double(), tolerance) + end - -- test that it handles reduction in input/output size + -- test that it handles reduction in input/output size - input[2], gradOutput[2] = nil, nil - para.modules[2] = nil - para.output[2] = nil - para.gradInput[2] = nil + input[2], gradOutput[2] = nil, nil + para.modules[2] = nil + para.output[2] = nil + para.gradInput[2] = nil - local output = gpu:forward(input) + local output = gpu:forward(input) - mytester:assert(#gpu._input == 1) - mytester:assert(#output == 1) + mytester:assert(#gpu._input == 1) + mytester:assert(#output == 1) - local gradInput = gpu:backward(input, gradOutput) + local gradInput = gpu:backward(input, gradOutput) - mytester:assert(#gpu._gradOutput == 1) - mytester:assert(#gradInput == 1) + mytester:assert(#gpu._gradOutput == 1) + mytester:assert(#gradInput == 1) - -- test sequential multi-GPUs + -- test sequential multi-GPUs - local mlp = nn.Sequential() - for device=1,ndevice do - local outdevice = device == ndevice and 1 or device - mlp:add(nn.GPU(nn.Linear(3,3), device, outdevice)) + local mlp = nn.Sequential() + for device=1,ndevice do + local outdevice = device == ndevice and 1 or device + mlp:add(nn.GPU(nn.Linear(3,3), device, outdevice)) + mytester:assert(cutorch.getDevice() == 1) + end + mlp:type(typename) mytester:assert(cutorch.getDevice() == 1) - end - mlp:cuda() - mytester:assert(cutorch.getDevice() == 1) - local input = torch.CudaTensor(2,3):uniform(0,1) - local gradOutput = torch.CudaTensor(2,3):uniform(0,1) + local input = torch[typename:match('torch.(%a+)')](2,3):uniform(0,1) + local gradOutput = torch[typename:match('torch.(%a+)')](2,3):uniform(0,1) - local output = mlp:forward(input) - mlp:zeroGradParameters() - local gradInput = mlp:backward(input, gradOutput) + local output = mlp:forward(input) + mlp:zeroGradParameters() + local gradInput = mlp:backward(input, gradOutput) - -- test CPU only + -- test CPU only - local params, gradParams = mlp:parameters() + local params, gradParams = mlp:parameters() - mlp:float() + mlp:type(ctype) - local input2, gradOutput2 = input:float(), gradOutput:float() + local input2, gradOutput2 = input:type(ctype), gradOutput:type(ctype) - local _cutorch = cutorch - cutorch = nil + local _cutorch = cutorch + cutorch = nil - local output2 = mlp:forward(input2) - mlp:zeroGradParameters() - local gradInput2 = mlp:backward(input2, gradOutput2) + local output2 = mlp:forward(input2) + mlp:zeroGradParameters() + local gradInput2 = mlp:backward(input2, gradOutput2) - cutorch = _cutorch + cutorch = _cutorch - mytester:assertTensorEq(output:float(), output2, 0.000001) - mytester:assertTensorEq(gradInput:float(), gradInput2, 0.000001) + mytester:assertTensorEq(output:double(), output2:double(), tolerance) + mytester:assertTensorEq(gradInput:double(), gradInput2:double(), tolerance) - local params2, gradParams2 = mlp:parameters() + local params2, gradParams2 = mlp:parameters() - for i=1,#params do - mytester:assertTensorEq(params[i]:float(), params2[i], 0.000001) - mytester:assertTensorEq(gradParams[i]:float(), gradParams2[i], 0.000001) - end + for i=1,#params do + mytester:assertTensorEq(params[i]:double(), params2[i]:double(), tolerance) + mytester:assertTensorEq(gradParams[i]:double(), gradParams2[i]:double(), tolerance) + end - cutorch.setDevice(originaldevice) + cutorch.setDevice(originaldevice) + end end local function setUp() |