Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZeming Lin <ebetica0@gmail.com>2016-05-10 11:30:09 +0300
committerZeming Lin <ebetica0@gmail.com>2016-05-14 00:28:26 +0300
commita20aa1ee3aab75172a5697560e01f61af94c3293 (patch)
tree2458a47ca580fe0c57009fdfe76e78096b8ac787
parent17aca108abcf50830f624f8dfd8d8928c3241450 (diff)
Fixing sparse linear race condition
-rw-r--r--SparseLinear.lua7
-rw-r--r--lib/THNN/generic/SparseLinear.c104
-rw-r--r--test.lua27
3 files changed, 92 insertions, 46 deletions
diff --git a/SparseLinear.lua b/SparseLinear.lua
index 6595be9..7c3edad 100644
--- a/SparseLinear.lua
+++ b/SparseLinear.lua
@@ -130,8 +130,13 @@ function SparseLinear:accGradParameters(input, gradOutput, scale)
gradOutput:resize(1, gradOutput:size(1))
end
+ local rows = self.formatted_input:select(2, 1)
+ local cols = self.formatted_input:select(2, 2)
+ local sortinds = cols * gradOutput:size(1) + rows
+ local _, inds = sortinds:sort(1, false)
+ local newinput = self.formatted_input:index(1, inds)
input[1].THNN.SparseLinear_accGradParameters(
- self.formatted_input:cdata(),
+ newinput:cdata(),
gradOutput:cdata(),
self.gradWeight:cdata(),
self.gradBias:cdata(),
diff --git a/lib/THNN/generic/SparseLinear.c b/lib/THNN/generic/SparseLinear.c
index 0f426ba..b7bf8ab 100644
--- a/lib/THNN/generic/SparseLinear.c
+++ b/lib/THNN/generic/SparseLinear.c
@@ -48,7 +48,7 @@ void THNN_(SparseLinear_updateOutput)(
THTensor *weight,
THTensor *bias)
{
- long h, i;
+ long h, i, j, hp0, hp1;
long outDim = THTensor_(size)(weight, 0);
long inDim = THTensor_(size)(weight, 1);
long batchSize = THTensor_(size)(output, 0);
@@ -59,25 +59,43 @@ void THNN_(SparseLinear_updateOutput)(
long nnz = THTensor_(size)(input, 0);
+ THLongTensor * csr = THLongTensor_newWithSize1d(batchSize+1);
+ THLongTensor_zero(csr);
+
+//#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000)
+ for (i=0; i<nnz; i++) {
+ hp0 = (long)(THNN_(get2d)(input, i, 0)) - 1;
+ hp1 = (i+1 == nnz) ?
+ batchSize :
+ (long)(THNN_(get2d)(input, i+1, 0)) - 1;
+ if (hp0 != hp1) for (h = hp0; h < hp1; h++) {
+ THLongTensor_set1d(csr, h+1, i+1);
+ }
+ }
+
+
// output = weight * input + bias
THTensor_(zero)(output);
-#pragma omp parallel for private(i) schedule(static) if (nnz * outDim > 10000)
- for (i = 0; i < nnz; i++) {
- real val = THNN_(get2d)(input, i, 2);
- if (val == 0) {
- continue;
- }
+#pragma omp parallel for private(h, i) schedule(static) if (nnz > 10000)
+ for (h = 0; h < batchSize; h++) {
+ long i_start = THLongTensor_get1d(csr, h);
+ long i_end = THLongTensor_get1d(csr, h+1);
+ for (i = i_start; i < i_end; i++) {
+ real val = THNN_(get2d)(input, i, 2);
+ if (val == 0) {
+ continue;
+ }
- long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
- long h = (long)(THNN_(get2d)(input, i, 0)) - 1;
- if (offset >= 0 && offset < inDim) {
- THBlas_(axpy)(outDim,
- val,
- COL_PTR2(weight, offset), weight->stride[0],
- ROW_PTR2(output, h), output->stride[1]);
- } else {
- THError("index out of bound. updateOutput: %d not between 1 and %d",
- offset + 1, inDim);
+ long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ THBlas_(axpy)(outDim,
+ val,
+ COL_PTR2(weight, offset), weight->stride[0],
+ ROW_PTR2(output, h), output->stride[1]);
+ } else {
+ THError("index out of bound. updateOutput: %d not between 1 and %d",
+ offset + 1, inDim);
+ }
}
}
@@ -151,7 +169,7 @@ void THNN_(SparseLinear_accGradParameters)(
real weightDecay,
real scale)
{
- long h, i;
+ long h, i, col, hp0, hp1;
long outDim = THTensor_(size)(weight, 0);
long inDim = THTensor_(size)(weight, 1);
@@ -165,26 +183,42 @@ void THNN_(SparseLinear_accGradParameters)(
"gradOutput must be contiguous");
long nnz = THTensor_(size)(input, 0);
- // THTensor_(resize2d)(gradOutput, batchSize, outDim);
- // gradWeight += gradOutput * input
-#pragma omp parallel for private(h, i) schedule(static) if (\
- nnz * outDim > 10000)
+ THLongTensor* csc = THLongTensor_newWithSize1d(inDim+1);
+ THLongTensor_zero(csc);
+
+#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000)
for (i = 0; i < nnz; i++) {
- real val = scale * THNN_(get2d)(input, i, 2);
+ hp0 = (long)(THNN_(get2d)(input, i, 1)) - 1;
+ hp1 = (i+1 == nnz) ?
+ inDim :
+ (long)(THNN_(get2d)(input, i+1, 1)) - 1;
+ if (hp0 != hp1) for (h = hp0; h < hp1; h++) {
+ THLongTensor_set1d(csc, h+1, i+1);
+ }
+ }
- long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
- long h = (long)(THNN_(get2d)(input, i, 0)) - 1;
- if (offset >= 0 && offset < inDim) {
- THBlas_(axpy)(outDim,
- val,
- ROW_PTR2(gradOutput, h), gradOutput->stride[1],
- COL_PTR2(gradWeight, offset), gradWeight->stride[0]);
- } else {
- THError(
- "index out of bound. accGradParameters: %d not between 1 and %d",
- offset + 1,
- inDim);
+ // gradWeight += gradOutput * input
+#pragma omp parallel for private(h, i, col) schedule(static) if (nnz > 10000)
+ for (col = 0; col < inDim; col++) {
+ long i_start = THLongTensor_get1d(csc, col);
+ long i_end = THLongTensor_get1d(csc, col+1);
+ for (i = i_start; i < i_end; i++) {
+ real val = scale * THNN_(get2d)(input, i, 2);
+
+ h = (long)(THNN_(get2d)(input, i, 0)) - 1;
+ long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ THBlas_(axpy)(outDim,
+ val,
+ ROW_PTR2(gradOutput, h), gradOutput->stride[1],
+ COL_PTR2(gradWeight, offset), gradWeight->stride[0]);
+ } else {
+ THError(
+ "index out of bound. accGradParameters: %d not between 1 and %d",
+ offset + 1,
+ inDim);
+ }
}
}
diff --git a/test.lua b/test.lua
index 3847166..63b066f 100644
--- a/test.lua
+++ b/test.lua
@@ -805,12 +805,7 @@ function nntest.Linear()
end -- for ind, inj in pairs(inj_vals) do
end
-function nntest.SparseLinear()
- local inb = math.random(5,10)
- local ini = math.random(50,100)
- local inj = math.random(5,10)
- local numNonzero = math.random(3,5)
-
+local function test_sparse_linear(inb, ini, inj, numNonzero)
local module = nn.SparseLinear(ini,inj, true)
local linear = nn.Linear(ini, inj)
linear.weight = module.weight:clone()
@@ -822,11 +817,11 @@ function nntest.SparseLinear()
local input = {}
local nonsparse = torch.zeros(inb, ini)
for i=1,inb do
- local nnz = math.random(1, 3)
+ local nnz = math.random(1, 3) + numNonzero
local inds = torch.randperm(ini)[{{1,nnz}}]
input[i] = torch.Tensor(nnz, 2)
input[i]:select(2,1):copy(inds)
- input[i]:select(2,2):copy(torch.ones(nnz))
+ input[i]:select(2,2):copy(torch.rand(nnz))
nonsparse[i]:scatter(1, input[i]:select(2,1):long(), input[i]:select(2,2))
end
local gradOutput = torch.rand(inb, inj)
@@ -872,8 +867,8 @@ function nntest.SparseLinear()
mytester:assertle(gierr, precision, 'error on gradInput with ntimes = '..ntimes)
for _,var in ipairs(cmps) do
- local err = (module[var] - linear[var]):abs():max()
- mytester:assertle(err, precision, 'error on '..var..' with ntimes='..ntimes)
+ local err = (module[var] - linear[var]):abs():max()
+ mytester:assertle(err, precision, 'error on '..var..' with ntimes = '..ntimes)
end
module:zeroGradParameters()
@@ -913,6 +908,18 @@ function nntest.SparseLinear()
mytester:assertle(err, precision, 'error on batch result forward')
end
+function nntest.SparseLinear()
+ local inb = math.random(5,10)
+ local ini = math.random(50,100)
+ local inj = math.random(5,10)
+ local numNonzero = math.random(3,5)
+
+ test_sparse_linear(inb, ini, inj, numNonzero)
+ -- Tests OMP parallelism
+ test_sparse_linear(1, 50000, 10, 20000)
+ test_sparse_linear(1000, 1000, 10, 100)
+end
+
function nntest.Bilinear()
-- set up data: