SpatialDepthWiseConvolution

author: stooloveu <stooloveu@gmail.com> 2017-01-06 05:27:05 +0300
committer: Nicholas Leonard <nleonard@twitter.com> 2017-05-12 18:34:27 +0300
commit: 5f1f7a267405d86dc3c392dc0d8e5442a5e7908c (patch)
tree: 3ab05990a09dd1f14a67816f7020ce7259af0535 /lib
parent: 44a15a1acbe1965364585b21371a795bfe593955 (diff)
3 files changed, 557 insertions, 0 deletions
diff --git a/lib/THNN/generic/SpatialDepthWiseConvolution.c b/lib/THNN/generic/SpatialDepthWiseConvolution.c
new file mode 100644
index 0000000..750bae0
--- /dev/null
+++ b/lib/THNN/generic/SpatialDepthWiseConvolution.c
@@ -0,0 +1,519 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialDepthWiseConvolution.c"
+#else
+
+static inline void THNN_(SpatialDepthWiseConvolution_shapeCheck)(
+	THTensor *input, THTensor *gradOutput,
+	THTensor *weight, THTensor *bias,
+	int kH, int kW, int dH, int dW, int padH, int padW) {
+
+  THArgCheck(kW > 0 && kH > 0, 9,
+	       "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
+  THArgCheck(dW > 0 && dH > 0, 11,
+	     "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+  THNN_ARGCHECK(weight->nDimension == 4, 5, weight,
+		"2D or 4D weight tensor expected, but got: %s");
+
+  if (bias != NULL) {
+    THNN_CHECK_DIM_SIZE(bias, 2, 0, weight->size[0]);
+    THNN_CHECK_DIM_SIZE(bias, 2, 1, weight->size[1]);
+  }
+
+  int ndim = input->nDimension;
+  int dimf = 0;
+  int dimh = 1;
+  int dimw = 2;
+
+  if (ndim == 4) {
+    dimf++;
+    dimh++;
+    dimw++;
+  }
+
+  THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
+		"3D or 4D input tensor expected but got: %s");
+
+  long nInputPlane  = weight->size[1];
+  long inputHeight  = input->size[dimh];
+  long inputWidth   = input->size[dimw];
+  long nOutputPlane = weight->size[0];
+  long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+  long outputWidth  = (inputWidth + 2*padW - kW) / dW + 1;
+
+  if (outputWidth < 1 || outputHeight < 1)
+    THError("Given input size: (%d x %d x %d). "
+	    "Calculated output size: (%d x %d x %d). Output size is too small",
+	    nInputPlane,inputHeight,inputWidth,nOutputPlane*nInputPlane,outputHeight,outputWidth);
+
+  THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+
+  if (gradOutput != NULL) {
+    THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimf, nInputPlane);
+    THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimh, nOutputPlane);
+    THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimw, outputHeight);
+    THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimw + 1, outputWidth);
+  }
+}
+
+static void THNN_(SpatialDepthWiseConvolution_updateOutput_frame)(
+          THTensor *input,
+          THTensor *output,
+          THTensor *weight,
+          THTensor *bias,
+          THTensor *finput,
+          int kW,
+          int kH,
+          int dW,
+          int dH,
+          int padW,
+          int padH,
+          long nInputPlane,
+          long inputWidth,
+          long inputHeight,
+          long nOutputPlane,
+          long outputWidth,
+          long outputHeight)
+{
+  long i;
+  THTensor *output2d;
+
+  THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH,
+		       nInputPlane, inputWidth, inputHeight,
+		       outputWidth, outputHeight);
+
+  output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset,
+                                         nOutputPlane, -1,
+                                         outputHeight*outputWidth, -1);
+  if (bias) {
+    for(i = 0; i < nOutputPlane; i++)
+        THVector_(fill)
+	  (output->storage->data + output->storageOffset + output->stride[0] * i,
+	   THTensor_(get1d)(bias, i), outputHeight*outputWidth);
+  } else {
+    THTensor_(zero)(output);
+  }
+
+  THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);
+
+  THTensor_(free)(output2d);
+}
+
+void THNN_(SpatialDepthWiseConvolution_updateOutput)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *output,
+          THTensor *weight,
+          THTensor *bias,
+          THTensor *finput,
+          THTensor *fgradInput,
+          int kW,
+          int kH,
+          int dW,
+          int dH,
+          int padW,
+          int padH)
+{
+  long nInputPlane = weight->nDimension == 2 ? weight->size[1]/(kH*kW) : weight->size[1];
+  long nOutputPlane = weight->size[0];
+  if (weight->nDimension == 2) {
+    THTensor_(resize4d)(weight, nOutputPlane, nInputPlane, kH, kW);
+  }
+
+  THNN_(SpatialDepthWiseConvolution_shapeCheck)
+    (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW);
+
+  THTensor *_weight = THTensor_(newTranspose)(weight, 0, 1);
+  weight = THTensor_(newContiguous)(_weight);
+  THTensor *_bias = THTensor_(newTranspose)(bias, 0, 1);
+  bias = THTensor_(newContiguous)(_bias);
+
+
+  // resize weight
+  long s1 = weight->size[0];
+  long s2 = weight->size[1];
+  long s3 = weight->size[2] * weight->size[3];
+  weight = THTensor_(newWithStorage3d)(weight->storage, weight->storageOffset,
+          s1, -1, s2, -1, s3, -1);
+
+  input = THTensor_(newContiguous)(input);
+
+  int ndim = input->nDimension;
+
+  int batch = 1;
+  if (ndim == 3) {
+    // Force batch
+    batch = 0;
+    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+  }
+
+  long inputHeight  = input->size[3];
+  long inputWidth   = input->size[2];
+  long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+  long outputWidth  = (inputWidth + 2*padW - kW) / dW + 1;
+
+  long T = input->size[0];
+  long t;
+
+  THTensor_(resize5d)(output, T, nInputPlane, nOutputPlane, outputHeight, outputWidth);
+  THTensor_(resize4d)(finput, T, nInputPlane, kW*kH*1, outputHeight*outputWidth);
+
+#pragma omp parallel for private(t)
+  for(t = 0; t < T; t++)
+  {
+    THTensor *input_t = THTensor_(newSelect)(input, 0, t);
+    THTensor *output_t = THTensor_(newSelect)(output, 0, t);
+    THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+    long i;
+#pragma omp parallel for private(i)
+    for(i = 0; i < nInputPlane; i++)
+    {
+      THTensor *weight_i = THTensor_(newSelect)(weight, 0, i);
+      THTensor *bias_i = THTensor_(newSelect)(bias, 0, i);
+      THTensor *input_i = THTensor_(newNarrow)(input_t, 0, i, 1);
+      THTensor *output_i = THTensor_(newSelect)(output_t, 0, i);
+      THTensor *finput_i = THTensor_(newSelect)(finput_t, 0, i);
+
+      THNN_(SpatialDepthWiseConvolution_updateOutput_frame)
+	(input_i, output_i, weight_i, bias_i, finput_i,
+	 kW, kH, dW, dH, padW, padH,
+	 1, inputWidth, inputHeight,
+	 nOutputPlane, outputWidth, outputHeight);
+
+      THTensor_(free)(input_i);
+      THTensor_(free)(weight_i);
+      THTensor_(free)(bias_i);
+      THTensor_(free)(output_i);
+      THTensor_(free)(finput_i);
+    }
+    THTensor_(free)(input_t);
+    THTensor_(free)(output_t);
+    THTensor_(free)(finput_t);
+  }
+
+  THTensor_(free)(weight);
+  THTensor_(free)(_weight);
+  THTensor_(free)(bias);
+  THTensor_(free)(_bias);
+
+  THTensor_(resize4d)(output, T, nInputPlane * nOutputPlane, outputWidth, outputHeight);
+
+  if (batch == 0) {
+    THTensor_(select)(output, NULL, 0, 0);
+    THTensor_(select)(input, NULL, 0, 0);
+    THTensor_(select)(finput, NULL, 0, 0);
+  }
+  THTensor_(free)(input);
+}
+
+static void THNN_(SpatialDepthWiseConvolution_updateGradInput_frame)(
+          THTensor *gradInput,
+          THTensor *gradOutput,
+          THTensor *weight,
+          THTensor *fgradInput,
+          int kW,
+          int kH,
+          int dW,
+          int dH,
+          int padW,
+          int padH)
+{
+  THTensor *gradOutput2d = THTensor_(newWithStorage2d)
+    (gradOutput->storage, gradOutput->storageOffset,
+     gradOutput->size[0], -1,
+     gradOutput->size[1]*gradOutput->size[2], -1);
+  THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
+  THTensor_(free)(gradOutput2d);
+
+  THTensor_(zero)(gradInput);
+
+  THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH,
+		      padW, padH,
+		      gradInput->size[0], gradInput->size[2], gradInput->size[1],
+		      gradOutput->size[2], gradOutput->size[1]);
+}
+
+void THNN_(SpatialDepthWiseConvolution_updateGradInput)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *gradOutput,
+          THTensor *gradInput,
+          THTensor *weight,
+          THTensor *finput,
+          THTensor *fgradInput,
+          int kW,
+          int kH,
+          int dW,
+          int dH,
+          int padW,
+          int padH)
+{
+  long nInputPlane = weight->nDimension == 2 ? weight->size[1]/(kH*kW) : weight->size[1];
+  long nOutputPlane = weight->size[0];
+  if (weight->nDimension == 2) {
+    THTensor_(resize4d)(weight, nOutputPlane, nInputPlane, kH, kW);
+  }
+  gradOutput = THTensor_(newWithTensor)(gradOutput);
+
+  if (input->nDimension == 3) {
+    if (gradOutput->nDimension == 3) {
+      THTensor_(resize4d)(gradOutput, nInputPlane, nOutputPlane, gradOutput->size[1], gradOutput->size[2]);
+    }
+  }
+  else
+  {
+    if (gradOutput->nDimension == 4) {
+      THTensor_(resize5d)(gradOutput, gradOutput->size[0], nInputPlane, nOutputPlane, gradOutput->size[2], gradOutput->size[3]);
+    }
+  }
+
+
+  THNN_(SpatialDepthWiseConvolution_shapeCheck)
+    (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW);
+
+  THTensor *_weight = THTensor_(newTranspose)(weight, 0, 1);
+  weight = THTensor_(newContiguous)(_weight);
+
+
+  // resize weight
+  long s1 = weight->size[0];
+  long s2 = weight->size[1];
+  long s3 = weight->size[2] * weight->size[3];
+  weight = THTensor_(newWithStorage3d)(weight->storage, weight->storageOffset,
+          s1, -1, s2, -1, s3, -1);
+
+  input = THTensor_(newContiguous)(input);
+
+  int batch = 1;
+  if (input->nDimension == 3) {
+    // Force batch
+    batch = 0;
+    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+    THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+  }
+
+  long inputHeight  = input->size[3];
+  long inputWidth   = input->size[2];
+  long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+  long outputWidth  = (inputWidth + 2*padW - kW) / dW + 1;
+
+  long T = input->size[0];
+  long t;
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(resize4d)(fgradInput, T, nInputPlane, kW*kH*1, outputHeight*outputWidth);
+
+  // depending on the BLAS library, fgradInput (result tensor) might
+  // be left uninitialized on zero alpha, which might lead to weird behavior
+  // hence, to be safe, zero it
+  THTensor_(zero)(fgradInput);
+
+
+
+#pragma omp parallel for private(t)
+  for(t = 0; t < T; t++)
+  {
+    THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
+    THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+    THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
+
+
+    long i;
+#pragma omp parallel for private(i)
+    for(i = 0; i < nInputPlane; i++)
+    {
+      THTensor *weight_i = THTensor_(newSelect)(weight, 0, i);
+      THTensor *gradInput_i = THTensor_(newNarrow)(gradInput_t, 0, i, 1);
+      THTensor *gradOutput_i = THTensor_(newSelect)(gradOutput_t, 0, i);
+      THTensor *fgradInput_i = THTensor_(newSelect)(fgradInput_t, 0, i);
+
+      THTensor_(transpose)(weight_i, weight_i, 0, 1);
+
+      THNN_(SpatialDepthWiseConvolution_updateGradInput_frame)(gradInput_i, gradOutput_i,
+              weight_i, fgradInput_i,
+              kW, kH, dW, dH, padW, padH);
+
+      THTensor_(free)(gradInput_i);
+      THTensor_(free)(weight_i);
+      THTensor_(free)(gradOutput_i);
+      THTensor_(free)(fgradInput_i);
+    }
+
+    THTensor_(free)(gradInput_t);
+    THTensor_(free)(gradOutput_t);
+    THTensor_(free)(fgradInput_t);
+  }
+
+  if (batch == 0) {
+    THTensor_(select)(gradOutput, NULL, 0, 0);
+    THTensor_(select)(input, NULL, 0, 0);
+    THTensor_(select)(gradInput, NULL, 0, 0);
+    THTensor_(select)(fgradInput, NULL, 0, 0);
+  }
+
+  THTensor_(free)(input);
+  THTensor_(free)(gradOutput);
+  THTensor_(free)(weight);
+  THTensor_(free)(_weight);
+}
+
+static void THNN_(SpatialDepthWiseConvolution_accGradParameters_frame)(
+          THTensor *gradOutput,
+          THTensor *gradWeight,
+          THTensor *gradBias,
+          THTensor *finput,
+          accreal scale)
+{
+  long i;
+  THTensor *gradOutput2d = THTensor_(newWithStorage2d)
+    (gradOutput->storage, gradOutput->storageOffset,
+     gradOutput->size[0], -1,
+     gradOutput->size[1]*gradOutput->size[2], -1);
+
+  THTensor_(transpose)(finput, finput, 0, 1);
+  THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput);
+  THTensor_(transpose)(finput, finput, 0, 1);
+
+  if (gradBias) {
+    for(i = 0; i < gradBias->size[0]; i++)
+    {
+      long k;
+      real sum = 0;
+      real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0];
+      for(k = 0; k < gradOutput2d->size[1]; k++)
+        sum += data[k];
+      (gradBias->storage->data + gradBias->storageOffset)[i] += scale*sum;
+    }
+  }
+
+  THTensor_(free)(gradOutput2d);
+}
+
+void THNN_(SpatialDepthWiseConvolution_accGradParameters)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *gradOutput,
+          THTensor *gradWeight,
+          THTensor *gradBias,
+          THTensor *finput,
+          THTensor *fgradInput,
+          int kW,
+          int kH,
+          int dW,
+          int dH,
+          int padW,
+          int padH,
+          accreal scale)
+{
+  long nInputPlane = gradWeight->nDimension == 2 ? gradWeight->size[1]/(kH*kW) : gradWeight->size[1];
+  long nOutputPlane = gradWeight->size[0];
+  if (gradWeight->nDimension == 2) {
+    THTensor_(resize4d)(gradWeight, nOutputPlane, nInputPlane, kH, kW);
+  }
+
+  gradOutput = THTensor_(newWithTensor)(gradOutput);
+  if (input->nDimension == 3) {
+    if (gradOutput->nDimension == 3) {
+      THTensor_(resize4d)(gradOutput, nInputPlane, nOutputPlane, gradOutput->size[1], gradOutput->size[2]);
+    }
+  }
+  else
+  {
+    if (gradOutput->nDimension == 4) {
+      THTensor_(resize5d)(gradOutput, gradOutput->size[0], nInputPlane, nOutputPlane, gradOutput->size[2], gradOutput->size[3]);
+    }
+  }
+
+
+  THNN_(SpatialDepthWiseConvolution_shapeCheck)
+    (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW);
+
+  // Transpose gradWeight & gradBias
+  THTensor_(transpose)(gradWeight, NULL, 0, 1);
+  THTensor_(transpose)(gradBias, NULL, 0, 1);
+
+  THTensor *_gradWeight;
+  THTensor *_gradBias;
+  _gradBias = gradBias;
+  _gradWeight = gradWeight;
+
+  gradWeight = THTensor_(newContiguous)(gradWeight);
+  gradBias = THTensor_(newContiguous)(gradBias);
+
+  // resize gradWeight
+  long s1 = gradWeight->size[0];
+  long s2 = gradWeight->size[1];
+  long s3 = gradWeight->size[2] * gradWeight->size[3];
+  gradWeight = THTensor_(newWithStorage3d)(gradWeight->storage, gradWeight->storageOffset,
+          s1, -1, s2, -1, s3, -1);
+
+  input = THTensor_(newContiguous)(input);
+
+
+  int batch = 1;
+  if (input->nDimension == 3) {
+    // Force batch
+    batch = 0;
+    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+    THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+  }
+
+  long inputHeight  = input->size[3];
+  long inputWidth   = input->size[2];
+  long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+  long outputWidth  = (inputWidth + 2*padW - kW) / dW + 1;
+
+  long T = input->size[0];
+  long t;
+  THTensor_(resize4d)(finput, T, nInputPlane, kW*kH*1, outputHeight*outputWidth);
+
+  for(t = 0; t < T; t++)
+  {
+    THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+    THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+    long i;
+#pragma omp parallel for private(i)
+    for(i = 0; i < nInputPlane; i++)
+    {
+      THTensor *finput_i = THTensor_(newSelect)(finput_t, 0, i);
+      THTensor *gradOutput_i = THTensor_(newSelect)(gradOutput_t, 0, i);
+      THTensor *gradWeight_i = THTensor_(newSelect)(gradWeight, 0, i);
+      THTensor *gradBias_i = THTensor_(newSelect)(gradBias, 0, i);
+
+      THNN_(SpatialDepthWiseConvolution_accGradParameters_frame)(gradOutput_i, gradWeight_i,
+                gradBias_i, finput_i, scale);
+
+      THTensor_(free)(finput_i);
+      THTensor_(free)(gradOutput_i);
+      THTensor_(free)(gradWeight_i);
+      THTensor_(free)(gradBias_i);
+    }
+
+    THTensor_(free)(gradOutput_t);
+    THTensor_(free)(finput_t);
+  }
+
+  // Copy back and transpose back
+  THTensor_(transpose)(_gradWeight, NULL, 0, 1);
+  THTensor_(transpose)(_gradBias, NULL, 0, 1);
+  THTensor_(resize4d)(_gradWeight, nInputPlane, nOutputPlane, kH, kW);
+  THTensor_(resize2d)(_gradBias, nInputPlane, nOutputPlane);
+
+  THTensor_(copy)(_gradWeight, gradWeight);
+  THTensor_(copy)(_gradBias, gradBias);
+  THTensor_(transpose)(_gradWeight, NULL, 0, 1);
+  THTensor_(transpose)(_gradBias, NULL, 0, 1);
+
+  if (batch == 0) {
+    THTensor_(select)(gradOutput, NULL, 0, 0);
+    THTensor_(select)(input, NULL, 0, 0);
+    THTensor_(select)(finput, NULL, 0, 0);
+  }
+
+  THTensor_(free)(input);
+  THTensor_(free)(gradOutput);
+  THTensor_(free)(gradWeight);
+  THTensor_(free)(gradBias);
+}
+
+#endif
diff --git a/lib/THNN/generic/THNN.h b/lib/THNN/generic/THNN.h
index 8abc261..fcc7f51 100644
--- a/lib/THNN/generic/THNN.h
+++ b/lib/THNN/generic/THNN.h
@@ -810,6 +810,41 @@ TH_API void THNN_(SpatialConvolutionMM_accGradParameters)(
           int padW, int padH,
           accreal scale);
 
+TH_API void THNN_(SpatialDepthWiseConvolution_updateOutput)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *output,
+          THTensor *weight,
+          THTensor *bias,         // [OPTIONAL]
+          THTensor *finput,
+          THTensor *fgradInput,
+          int kW, int kH,
+          int dW, int dH,
+          int padW, int padH);
+TH_API void THNN_(SpatialDepthWiseConvolution_updateGradInput)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *gradOutput,
+          THTensor *gradInput,
+          THTensor *weight,
+          THTensor *finput,
+          THTensor *fgradInput,
+          int kW, int kH,
+          int dW, int dH,
+          int padW, int padH);
+TH_API void THNN_(SpatialDepthWiseConvolution_accGradParameters)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *gradOutput,
+          THTensor *gradWeight,
+          THTensor *gradBias,     // [OPTIONAL]
+          THTensor *finput,
+          THTensor *fgradInput,
+          int kW, int kH,
+          int dW, int dH,
+          int padW, int padH,
+          accreal scale);
+
 TH_API void THNN_(SpatialConvolutionLocal_updateOutput)(
           THNNState *state,
           THTensor *input,
diff --git a/lib/THNN/init.c b/lib/THNN/init.c
index 9b90b20..6c64015 100644
--- a/lib/THNN/init.c
+++ b/lib/THNN/init.c
@@ -191,6 +191,9 @@
 #include "generic/SpatialConvolutionMM.c"
 #include "THGenerateFloatTypes.h"
 
+#include "generic/SpatialDepthWiseConvolution.c"
+#include "THGenerateFloatTypes.h"
+
 #include "generic/SpatialConvolutionLocal.c"
 #include "THGenerateFloatTypes.h"
author	stooloveu <stooloveu@gmail.com>	2017-01-06 05:27:05 +0300
committer	Nicholas Leonard <nleonard@twitter.com>	2017-05-12 18:34:27 +0300
commit	5f1f7a267405d86dc3c392dc0d8e5442a5e7908c (patch)
tree	3ab05990a09dd1f14a67816f7020ce7259af0535 /lib
parent	44a15a1acbe1965364585b21371a795bfe593955 (diff)