diff options
author | Zeming Lin <ebetica0@gmail.com> | 2016-05-10 11:30:09 +0300 |
---|---|---|
committer | Zeming Lin <ebetica0@gmail.com> | 2016-05-14 00:28:26 +0300 |
commit | a20aa1ee3aab75172a5697560e01f61af94c3293 (patch) | |
tree | 2458a47ca580fe0c57009fdfe76e78096b8ac787 | |
parent | 17aca108abcf50830f624f8dfd8d8928c3241450 (diff) |
Fixing sparse linear race condition
-rw-r--r-- | SparseLinear.lua | 7 | ||||
-rw-r--r-- | lib/THNN/generic/SparseLinear.c | 104 | ||||
-rw-r--r-- | test.lua | 27 |
3 files changed, 92 insertions, 46 deletions
diff --git a/SparseLinear.lua b/SparseLinear.lua index 6595be9..7c3edad 100644 --- a/SparseLinear.lua +++ b/SparseLinear.lua @@ -130,8 +130,13 @@ function SparseLinear:accGradParameters(input, gradOutput, scale) gradOutput:resize(1, gradOutput:size(1)) end + local rows = self.formatted_input:select(2, 1) + local cols = self.formatted_input:select(2, 2) + local sortinds = cols * gradOutput:size(1) + rows + local _, inds = sortinds:sort(1, false) + local newinput = self.formatted_input:index(1, inds) input[1].THNN.SparseLinear_accGradParameters( - self.formatted_input:cdata(), + newinput:cdata(), gradOutput:cdata(), self.gradWeight:cdata(), self.gradBias:cdata(), diff --git a/lib/THNN/generic/SparseLinear.c b/lib/THNN/generic/SparseLinear.c index 0f426ba..b7bf8ab 100644 --- a/lib/THNN/generic/SparseLinear.c +++ b/lib/THNN/generic/SparseLinear.c @@ -48,7 +48,7 @@ void THNN_(SparseLinear_updateOutput)( THTensor *weight, THTensor *bias) { - long h, i; + long h, i, j, hp0, hp1; long outDim = THTensor_(size)(weight, 0); long inDim = THTensor_(size)(weight, 1); long batchSize = THTensor_(size)(output, 0); @@ -59,25 +59,43 @@ void THNN_(SparseLinear_updateOutput)( long nnz = THTensor_(size)(input, 0); + THLongTensor * csr = THLongTensor_newWithSize1d(batchSize+1); + THLongTensor_zero(csr); + +//#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000) + for (i=0; i<nnz; i++) { + hp0 = (long)(THNN_(get2d)(input, i, 0)) - 1; + hp1 = (i+1 == nnz) ? + batchSize : + (long)(THNN_(get2d)(input, i+1, 0)) - 1; + if (hp0 != hp1) for (h = hp0; h < hp1; h++) { + THLongTensor_set1d(csr, h+1, i+1); + } + } + + // output = weight * input + bias THTensor_(zero)(output); -#pragma omp parallel for private(i) schedule(static) if (nnz * outDim > 10000) - for (i = 0; i < nnz; i++) { - real val = THNN_(get2d)(input, i, 2); - if (val == 0) { - continue; - } +#pragma omp parallel for private(h, i) schedule(static) if (nnz > 10000) + for (h = 0; h < batchSize; h++) { + long i_start = THLongTensor_get1d(csr, h); + long i_end = THLongTensor_get1d(csr, h+1); + for (i = i_start; i < i_end; i++) { + real val = THNN_(get2d)(input, i, 2); + if (val == 0) { + continue; + } - long offset = (long)(THNN_(get2d)(input, i, 1)) - 1; - long h = (long)(THNN_(get2d)(input, i, 0)) - 1; - if (offset >= 0 && offset < inDim) { - THBlas_(axpy)(outDim, - val, - COL_PTR2(weight, offset), weight->stride[0], - ROW_PTR2(output, h), output->stride[1]); - } else { - THError("index out of bound. updateOutput: %d not between 1 and %d", - offset + 1, inDim); + long offset = (long)(THNN_(get2d)(input, i, 1)) - 1; + if (offset >= 0 && offset < inDim) { + THBlas_(axpy)(outDim, + val, + COL_PTR2(weight, offset), weight->stride[0], + ROW_PTR2(output, h), output->stride[1]); + } else { + THError("index out of bound. updateOutput: %d not between 1 and %d", + offset + 1, inDim); + } } } @@ -151,7 +169,7 @@ void THNN_(SparseLinear_accGradParameters)( real weightDecay, real scale) { - long h, i; + long h, i, col, hp0, hp1; long outDim = THTensor_(size)(weight, 0); long inDim = THTensor_(size)(weight, 1); @@ -165,26 +183,42 @@ void THNN_(SparseLinear_accGradParameters)( "gradOutput must be contiguous"); long nnz = THTensor_(size)(input, 0); - // THTensor_(resize2d)(gradOutput, batchSize, outDim); - // gradWeight += gradOutput * input -#pragma omp parallel for private(h, i) schedule(static) if (\ - nnz * outDim > 10000) + THLongTensor* csc = THLongTensor_newWithSize1d(inDim+1); + THLongTensor_zero(csc); + +#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000) for (i = 0; i < nnz; i++) { - real val = scale * THNN_(get2d)(input, i, 2); + hp0 = (long)(THNN_(get2d)(input, i, 1)) - 1; + hp1 = (i+1 == nnz) ? + inDim : + (long)(THNN_(get2d)(input, i+1, 1)) - 1; + if (hp0 != hp1) for (h = hp0; h < hp1; h++) { + THLongTensor_set1d(csc, h+1, i+1); + } + } - long offset = (long)(THNN_(get2d)(input, i, 1)) - 1; - long h = (long)(THNN_(get2d)(input, i, 0)) - 1; - if (offset >= 0 && offset < inDim) { - THBlas_(axpy)(outDim, - val, - ROW_PTR2(gradOutput, h), gradOutput->stride[1], - COL_PTR2(gradWeight, offset), gradWeight->stride[0]); - } else { - THError( - "index out of bound. accGradParameters: %d not between 1 and %d", - offset + 1, - inDim); + // gradWeight += gradOutput * input +#pragma omp parallel for private(h, i, col) schedule(static) if (nnz > 10000) + for (col = 0; col < inDim; col++) { + long i_start = THLongTensor_get1d(csc, col); + long i_end = THLongTensor_get1d(csc, col+1); + for (i = i_start; i < i_end; i++) { + real val = scale * THNN_(get2d)(input, i, 2); + + h = (long)(THNN_(get2d)(input, i, 0)) - 1; + long offset = (long)(THNN_(get2d)(input, i, 1)) - 1; + if (offset >= 0 && offset < inDim) { + THBlas_(axpy)(outDim, + val, + ROW_PTR2(gradOutput, h), gradOutput->stride[1], + COL_PTR2(gradWeight, offset), gradWeight->stride[0]); + } else { + THError( + "index out of bound. accGradParameters: %d not between 1 and %d", + offset + 1, + inDim); + } } } @@ -805,12 +805,7 @@ function nntest.Linear() end -- for ind, inj in pairs(inj_vals) do end -function nntest.SparseLinear() - local inb = math.random(5,10) - local ini = math.random(50,100) - local inj = math.random(5,10) - local numNonzero = math.random(3,5) - +local function test_sparse_linear(inb, ini, inj, numNonzero) local module = nn.SparseLinear(ini,inj, true) local linear = nn.Linear(ini, inj) linear.weight = module.weight:clone() @@ -822,11 +817,11 @@ function nntest.SparseLinear() local input = {} local nonsparse = torch.zeros(inb, ini) for i=1,inb do - local nnz = math.random(1, 3) + local nnz = math.random(1, 3) + numNonzero local inds = torch.randperm(ini)[{{1,nnz}}] input[i] = torch.Tensor(nnz, 2) input[i]:select(2,1):copy(inds) - input[i]:select(2,2):copy(torch.ones(nnz)) + input[i]:select(2,2):copy(torch.rand(nnz)) nonsparse[i]:scatter(1, input[i]:select(2,1):long(), input[i]:select(2,2)) end local gradOutput = torch.rand(inb, inj) @@ -872,8 +867,8 @@ function nntest.SparseLinear() mytester:assertle(gierr, precision, 'error on gradInput with ntimes = '..ntimes) for _,var in ipairs(cmps) do - local err = (module[var] - linear[var]):abs():max() - mytester:assertle(err, precision, 'error on '..var..' with ntimes='..ntimes) + local err = (module[var] - linear[var]):abs():max() + mytester:assertle(err, precision, 'error on '..var..' with ntimes = '..ntimes) end module:zeroGradParameters() @@ -913,6 +908,18 @@ function nntest.SparseLinear() mytester:assertle(err, precision, 'error on batch result forward') end +function nntest.SparseLinear() + local inb = math.random(5,10) + local ini = math.random(50,100) + local inj = math.random(5,10) + local numNonzero = math.random(3,5) + + test_sparse_linear(inb, ini, inj, numNonzero) + -- Tests OMP parallelism + test_sparse_linear(1, 50000, 10, 20000) + test_sparse_linear(1000, 1000, 10, 100) +end + function nntest.Bilinear() -- set up data: |