Merge pull request #476 from lospooky/SpatialDepthWiseConvolution-segfault

Fix segfault in SpatialDepthWiseConvolution w/o bias
author: Soumith Chintala <soumith@gmail.com> 2017-07-03 07:36:49 +0300
committer: GitHub <noreply@github.com> 2017-07-03 07:36:49 +0300
commit: c3c0d9b11125f452c82f3225908f4313f91bb849 (patch)
tree: 1a150269d04cdd8b719ead8aa29adf38b5a6a735
parent: fd4780703227d2a00d708fdd1bc19b6c92e8e4d7 (diff)
parent: 28e9b2bcc0dfddc90c7ff12f7c3bdf4259ea047b (diff)
1 files changed, 42 insertions, 22 deletions
diff --git a/lib/THCUNN/generic/SpatialDepthWiseConvolution.cu b/lib/THCUNN/generic/SpatialDepthWiseConvolution.cu
index 5da6570..1fc365f 100644
--- a/lib/THCUNN/generic/SpatialDepthWiseConvolution.cu
+++ b/lib/THCUNN/generic/SpatialDepthWiseConvolution.cu
@@ -86,8 +86,12 @@ void THNN_(SpatialDepthWiseConvolution_updateOutput)(
   // Transpose weight & bias
   THCTensor *_weight = THCTensor_(newTranspose)(state, weight, 0, 1);
   weight = THCTensor_(newContiguous)(state, _weight);
-  THCTensor *_bias = THCTensor_(newTranspose)(state, bias, 0, 1);
-  bias = THCTensor_(newContiguous)(state, _bias);
+
+  THCTensor *_bias = NULL;
+  if(bias) {
+    _bias = THCTensor_(newTranspose)(state, bias, 0, 1);
+    bias = THCTensor_(newContiguous)(state, _bias);
+  }
 
   // resize weight
   long s1 = weight->size[0];
@@ -137,8 +141,11 @@ void THNN_(SpatialDepthWiseConvolution_updateOutput)(
   THCTensor *input_i = THCTensor_(new)(state);
   THCTensor *output_i = THCTensor_(new)(state);
   THCTensor *weight_i = THCTensor_(new)(state);
-  THCTensor *bias_i = THCTensor_(new)(state);
 
+  THCTensor *bias_i = NULL;
+  if(bias) {
+    bias_i = THCTensor_(new)(state);
+  }
   // For each elt in batch, do:
   for (int elt = 0; elt < batchSize; elt ++) {
     // Matrix mulitply per output:
@@ -152,8 +159,9 @@ void THNN_(SpatialDepthWiseConvolution_updateOutput)(
       THCTensor_(narrow)(state, input_i, input_n, 0, ipelt, 1);
       THCTensor_(select)(state, output_i, output_n, 0, ipelt);
       THCTensor_(select)(state, weight_i, weight, 0, ipelt);
-      THCTensor_(select)(state, bias_i, bias, 0, ipelt);
-
+      if (bias) {
+        THCTensor_(select)(state, bias_i, bias, 0, ipelt);
+      }
       // Do Bias first:
       // M,N,K are dims of matrix A and B
       // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
@@ -223,14 +231,15 @@ void THNN_(SpatialDepthWiseConvolution_updateOutput)(
 
   THCTensor_(free)(state, input_i);
   THCTensor_(free)(state, output_i);
-  THCTensor_(free)(state, bias_i);
+
   THCTensor_(free)(state, weight_i);
 
   THCTensor_(free)(state, weight);
   THCTensor_(free)(state, _weight);
+
+  THCTensor_(free)(state, bias_i);
   THCTensor_(free)(state, bias);
   THCTensor_(free)(state, _bias);
-
   // Transpose output
   THCTensor_(resize4d)(state, output, batchSize, nInputPlane * nOutputPlane, outputHeight, outputWidth);
 
@@ -452,15 +461,21 @@ void THNN_(SpatialDepthWiseConvolution_accGradParameters)(
 
   // Transpose gradWeight & gradBias
   THCTensor_(transpose)(state, gradWeight, NULL, 0, 1);
-  THCTensor_(transpose)(state, gradBias, NULL, 0, 1);
+
+
+  THCTensor *_gradBias = NULL;
+  if(gradBias) {
+    THCTensor_(transpose)(state, gradBias, NULL, 0, 1);
+    _gradBias = gradBias;
+    gradBias = THCTensor_(newContiguous)(state, gradBias);
+
+  }
 
   THCTensor *_gradWeight;
-  THCTensor *_gradBias;
-  _gradBias = gradBias;
+
   _gradWeight = gradWeight;
 
   gradWeight = THCTensor_(newContiguous)(state, gradWeight);
-  gradBias = THCTensor_(newContiguous)(state, gradBias);
 
 
   // resize gradWeight
@@ -506,7 +521,11 @@ void THNN_(SpatialDepthWiseConvolution_accGradParameters)(
   THCTensor *gradOutput_i = THCTensor_(new)(state);
   THCTensor *input_i = THCTensor_(new)(state);
   THCTensor *gradWeight_i = THCTensor_(new)(state);
-  THCTensor *gradBias_i = THCTensor_(new)(state);
+
+  THCTensor *gradBias_i = NULL;
+  if(gradBias) {
+    gradBias_i = THCTensor_(new)(state);
+  }
 
   // For each elt in batch, do:
   for (int elt = 0; elt < batchSize; elt ++) {
@@ -519,7 +538,9 @@ void THNN_(SpatialDepthWiseConvolution_accGradParameters)(
       THCTensor_(narrow)(state, input_i, input_n, 0, ipelt, 1);
       THCTensor_(select)(state, gradOutput_i, gradOutput_n, 0, ipelt);
       THCTensor_(select)(state, gradWeight_i, gradWeight, 0, ipelt);
-      THCTensor_(select)(state, gradBias_i, gradBias, 0, ipelt);
+      if (gradBias) {
+        THCTensor_(select)(state, gradBias_i, gradBias, 0, ipelt);
+      }
 
       // Extract columns:
       im2col(
@@ -596,29 +617,28 @@ void THNN_(SpatialDepthWiseConvolution_accGradParameters)(
 
   // Copy back and transpose back
   THCTensor_(transpose)(state, _gradWeight, NULL, 0, 1);
-  THCTensor_(transpose)(state, _gradBias, NULL, 0, 1);
   THCTensor_(resize4d)(state, _gradWeight, nInputPlane, nOutputPlane, kH, kW);
-  THCTensor_(resize2d)(state, _gradBias, nInputPlane, nOutputPlane);
-
   THCTensor_(copy)(state, _gradWeight, gradWeight);
-  THCTensor_(copy)(state, _gradBias, gradBias);
   THCTensor_(transpose)(state, _gradWeight, NULL, 0, 1);
-  THCTensor_(transpose)(state, _gradBias, NULL, 0, 1);
+
+  if(gradBias) {
+    THCTensor_(transpose)(state, _gradBias, NULL, 0, 1);
+    THCTensor_(resize2d)(state, _gradBias, nInputPlane, nOutputPlane);
+    THCTensor_(copy)(state, _gradBias, gradBias);
+    THCTensor_(transpose)(state, _gradBias, NULL, 0, 1);
+  }
 
 
   // Free
   THCTensor_(free)(state, input_n);
   THCTensor_(free)(state, gradOutput_n);
-
   THCTensor_(free)(state, input_i);
   THCTensor_(free)(state, gradOutput_i);
   THCTensor_(free)(state, gradWeight_i);
-  THCTensor_(free)(state, gradBias_i);
-
   THCTensor_(free)(state, gradWeight);
+  THCTensor_(free)(state, gradBias_i);
   THCTensor_(free)(state, gradBias);
 
-
   // Resize
   if (batch == 0) {
     THCTensor_(select)(state, gradOutput, NULL, 0, 0);
author	Soumith Chintala <soumith@gmail.com>	2017-07-03 07:36:49 +0300
committer	GitHub <noreply@github.com>	2017-07-03 07:36:49 +0300
commit	c3c0d9b11125f452c82f3225908f4313f91bb849 (patch)
tree	1a150269d04cdd8b719ead8aa29adf38b5a6a735
parent	fd4780703227d2a00d708fdd1bc19b6c92e8e4d7 (diff)
parent	28e9b2bcc0dfddc90c7ff12f7c3bdf4259ea047b (diff)