diff options
author | Eli Stevens <elis@doselab.com> | 2017-02-15 14:28:15 +0300 |
---|---|---|
committer | soumith <soumith@fb.com> | 2017-02-15 15:18:17 +0300 |
commit | 251fcbfaf9a0acbf5ef8799c7f22e8b302fd6c76 (patch) | |
tree | 7ca877887f347fa81fab0365045a074842d5360c | |
parent | e56cea1472fc17c0e100a0b424a42d938ae2f174 (diff) |
implementation of bias=False for VolConv.cu
-rw-r--r-- | lib/THCUNN/generic/SpatialFullConvolution.cu | 1 | ||||
-rw-r--r-- | lib/THCUNN/generic/THCUNN.h | 12 | ||||
-rw-r--r-- | lib/THCUNN/generic/VolumetricConvolution.cu | 96 | ||||
-rw-r--r-- | lib/THCUNN/generic/VolumetricFullConvolution.cu | 239 |
4 files changed, 178 insertions, 170 deletions
diff --git a/lib/THCUNN/generic/SpatialFullConvolution.cu b/lib/THCUNN/generic/SpatialFullConvolution.cu index 7a5d7ea..ec7eb2f 100644 --- a/lib/THCUNN/generic/SpatialFullConvolution.cu +++ b/lib/THCUNN/generic/SpatialFullConvolution.cu @@ -182,7 +182,6 @@ void THNN_(SpatialFullConvolution_updateOutput)( THCTensor_(data)(state, output_n), n_ ); } - } // Free diff --git a/lib/THCUNN/generic/THCUNN.h b/lib/THCUNN/generic/THCUNN.h index 0bc502f..3cfbd84 100644 --- a/lib/THCUNN/generic/THCUNN.h +++ b/lib/THCUNN/generic/THCUNN.h @@ -1023,7 +1023,7 @@ TH_API void THNN_(VolumetricConvolution_updateOutput)( THCTensor *input, THCTensor *output, THCTensor *weight, - THCTensor *bias, + THCTensor *bias, // [OPTIONAL] THCTensor *finput, THCTensor *fgradInput, int dT, int dW, int dH, @@ -1044,7 +1044,7 @@ TH_API void THNN_(VolumetricConvolution_accGradParameters)( THCTensor *input, THCTensor *gradOutput, THCTensor *gradWeight, - THCTensor *gradBias, + THCTensor *gradBias, // [OPTIONAL] THCTensor *finput, THCTensor *fgradInput, int dT, int dW, int dH, @@ -1056,7 +1056,7 @@ TH_API void THNN_(VolumetricDilatedConvolution_updateOutput)( THCTensor *input, THCTensor *output, THCTensor *weight, - THCTensor *bias, + THCTensor *bias, // [OPTIONAL] THCTensor *columns, THCTensor *ones, int kT, int kW, int kH, @@ -1081,7 +1081,7 @@ TH_API void THNN_(VolumetricDilatedConvolution_accGradParameters)( THCTensor *input, THCTensor *gradOutput, THCTensor *gradWeight, - THCTensor *gradBias, + THCTensor *gradBias, // [OPTIONAL] THCTensor *columns, THCTensor *ones, int kT, int kW, int kH, @@ -1118,7 +1118,7 @@ TH_API void THNN_(VolumetricFullConvolution_updateOutput)( THCTensor *input, THCTensor *output, THCTensor *weight, - THCTensor *bias, + THCTensor *bias, // [OPTIONAL] THCTensor *finput, THCTensor *fgradInput, int dT, int dW, int dH, @@ -1142,7 +1142,7 @@ TH_API void THNN_(VolumetricFullConvolution_accGradParameters)( THCTensor *input, THCTensor *gradOutput, THCTensor *gradWeight, - THCTensor *gradBias, + THCTensor *gradBias, // [OPTIONAL] THCTensor *finput, THCTensor *fgradInput, int dT, int dW, int dH, diff --git a/lib/THCUNN/generic/VolumetricConvolution.cu b/lib/THCUNN/generic/VolumetricConvolution.cu index d6da545..3343f27 100644 --- a/lib/THCUNN/generic/VolumetricConvolution.cu +++ b/lib/THCUNN/generic/VolumetricConvolution.cu @@ -178,22 +178,26 @@ void THNN_(VolumetricConvolution_updateOutput)( long k_ = 1; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) - #ifdef THC_REAL_IS_FLOAT - THCudaBlas_Sgemm( - #elif defined(THC_REAL_IS_HALF) - THCudaBlas_Hgemm( - #elif defined(THC_REAL_IS_DOUBLE) - THCudaBlas_Dgemm( - #endif - state, - 't', 'n', - n_, m_, k_, - ScalarConvert<int, real>::to(1), - THCTensor_(data)(state, ones), k_, - THCTensor_(data)(state, bias), k_, - ScalarConvert<int, real>::to(0), - THCTensor_(data)(state, output_n), n_ - ); + if (bias) { + #ifdef THC_REAL_IS_FLOAT + THCudaBlas_Sgemm( + #elif defined(THC_REAL_IS_HALF) + THCudaBlas_Hgemm( + #elif defined(THC_REAL_IS_DOUBLE) + THCudaBlas_Dgemm( + #endif + state, + 't', 'n', + n_, m_, k_, + ScalarConvert<int, real>::to(1), + THCTensor_(data)(state, ones), k_, + THCTensor_(data)(state, bias), k_, + ScalarConvert<int, real>::to(0), + THCTensor_(data)(state, output_n), n_ + ); + } else { + THCTensor_(zero)(state, output_n); + } // Extract columns: im3d2col( @@ -460,36 +464,38 @@ void THNN_(VolumetricConvolution_accGradParameters)( long k_ = outputDepth * outputHeight * outputWidth; // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices) - #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) - #ifdef THC_REAL_IS_FLOAT - THCudaBlas_Sgemv( - #elif defined(THC_REAL_IS_DOUBLE) - THCudaBlas_Dgemv( - #endif - state, - 't', - k_, m_, - scale, - THCTensor_(data)(state, gradOutput_n), k_, - THCTensor_(data)(state, ones), 1, - ScalarConvert<int, real>::to(1), - THCTensor_(data)(state, gradBias), 1 - ); - #endif - #ifdef THC_REAL_IS_HALF - THCudaBlas_Hgemm( - state, - 't', 'n', - m_, 1, k_, - scale, - THCTensor_(data)(state, gradOutput_n), k_, - THCTensor_(data)(state, ones), k_, - ScalarConvert<int, real>::to(1), - THCTensor_(data)(state, gradBias), m_ - ); - #endif + if (gradBias) { + #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) + #ifdef THC_REAL_IS_FLOAT + THCudaBlas_Sgemv( + #elif defined(THC_REAL_IS_DOUBLE) + THCudaBlas_Dgemv( + #endif + state, + 't', + k_, m_, + scale, + THCTensor_(data)(state, gradOutput_n), k_, + THCTensor_(data)(state, ones), 1, + ScalarConvert<int, real>::to(1), + THCTensor_(data)(state, gradBias), 1 + ); + #endif + #ifdef THC_REAL_IS_HALF + THCudaBlas_Hgemm( + state, + 't', 'n', + m_, 1, k_, + scale, + THCTensor_(data)(state, gradOutput_n), k_, + THCTensor_(data)(state, ones), k_, + ScalarConvert<int, real>::to(1), + THCTensor_(data)(state, gradBias), m_ + ); + #endif + } } - + // Free THCTensor_(free)(state, input_n); THCTensor_(free)(state, gradOutput_n); diff --git a/lib/THCUNN/generic/VolumetricFullConvolution.cu b/lib/THCUNN/generic/VolumetricFullConvolution.cu index 334c7da..883874a 100644 --- a/lib/THCUNN/generic/VolumetricFullConvolution.cu +++ b/lib/THCUNN/generic/VolumetricFullConvolution.cu @@ -3,37 +3,37 @@ #else static inline void THNN_(VolumetricFullConvolution_shapeCheck)( - THCState *state, - THCTensor *input, - THCTensor *gradOutput, - THCTensor *weight, - THCTensor *bias, - int dT, int dW, int dH, - int padT, int padW, int padH, - int adjT, int adjW, int adjH) { + THCState *state, + THCTensor *input, + THCTensor *gradOutput, + THCTensor *weight, + THCTensor *bias, + int dT, int dW, int dH, + int padT, int padW, int padH, + int adjT, int adjW, int adjH) { THCUNN_argCheck(state, input->nDimension == 4 || input->nDimension == 5, 2, input, - "4D or 5D (batch mode) tensor expected for input, but got: %s"); + "4D or 5D (batch mode) tensor expected for input, but got: %s"); // number of input & output planes and kernel size is indirectly defined by the weight tensor THCUNN_argCheck(state, weight->nDimension == 5, 4, weight, - "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor " - "expected for weight, but got: %s"); + "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor " + "expected for weight, but got: %s"); THArgCheck(THCTensor_(isContiguous)(state, weight), 4, - "weight tensor has to be contiguous"); + "weight tensor has to be contiguous"); THArgCheck(!bias || THCTensor_(isContiguous)(state, bias), 5, - "bias tensor has to be contiguous"); + "bias tensor has to be contiguous"); THArgCheck(dT > 0 && dW > 0 && dH > 0, 8, - "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); + "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); THArgCheck(adjT < dT && adjW < dW && adjH < dH, 14, - "output adjustment must be smaller than stride, but got " - "adjT: %d adjH: %d adjW: %d dT: %d dH: %d dW: %d", - adjT, adjH, adjW, dT, dH, dW); + "output adjustment must be smaller than stride, but got " + "adjT: %d adjH: %d adjW: %d dT: %d dH: %d dW: %d", + adjT, adjH, adjW, dT, dH, dW); int ndim = input->nDimension; int nInputPlane = THCTensor_(size)(state, weight, 0); int nOutputPlane = THCTensor_(size)(state, weight, 1); - const int kT = (int)weight->size[2]; - const int kH = (int)weight->size[3]; - const int kW = (int)weight->size[4]; + const int kT = (int)weight->size[2]; + const int kH = (int)weight->size[3]; + const int kW = (int)weight->size[4]; if (bias != NULL) { THCUNN_check_dim_size(state, bias, 1, 0, weight->size[1]); @@ -60,7 +60,7 @@ static inline void THNN_(VolumetricFullConvolution_shapeCheck)( if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", - nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth); + nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth); THCUNN_check_dim_size(state, input, ndim, dimf, nInputPlane); if (gradOutput != NULL) { @@ -72,16 +72,16 @@ static inline void THNN_(VolumetricFullConvolution_shapeCheck)( } void THNN_(VolumetricFullConvolution_updateOutput)( - THCState *state, - THCTensor *input, - THCTensor *output, - THCTensor *weight, - THCTensor *bias, - THCTensor *finput, - THCTensor *fgradInput, - int dT, int dW, int dH, - int padT, int padW, int padH, - int adjT, int adjW, int adjH) + THCState *state, + THCTensor *input, + THCTensor *output, + THCTensor *weight, + THCTensor *bias, + THCTensor *finput, + THCTensor *fgradInput, + int dT, int dW, int dH, + int padT, int padW, int padH, + int adjT, int adjW, int adjH) { THCTensor *columns = finput; @@ -89,16 +89,16 @@ void THNN_(VolumetricFullConvolution_updateOutput)( int nInputPlane = THCTensor_(size)(state, weight, 0); int nOutputPlane = THCTensor_(size)(state, weight, 1); - const int kT = (int)weight->size[2]; - const int kH = (int)weight->size[3]; - const int kW = (int)weight->size[4]; + const int kT = (int)weight->size[2]; + const int kH = (int)weight->size[3]; + const int kW = (int)weight->size[4]; THCUNN_assertSameGPU(state, 6, input, output, weight, - bias, columns, ones); + bias, columns, ones); THNN_(VolumetricFullConvolution_shapeCheck)( - state, input, NULL, weight, bias, - dT, dW, dH, padT, padW, padH, - adjT, adjW, adjH); + state, input, NULL, weight, bias, + dT, dW, dH, padT, padW, padH, + adjT, adjW, adjH); input = THCTensor_(newContiguous)(state, input); @@ -158,14 +158,14 @@ void THNN_(VolumetricFullConvolution_updateOutput)( #elif defined(THC_REAL_IS_DOUBLE) THCudaBlas_Dgemm( #endif - state, - 'n', 't', - n, m, k, - ScalarConvert<int, real>::to(1), - THCTensor_(data)(state, input_n), n, - THCTensor_(data)(state, weight), m, - ScalarConvert<int, real>::to(0), - THCTensor_(data)(state, columns), n + state, + 'n', 't', + n, m, k, + ScalarConvert<int, real>::to(1), + THCTensor_(data)(state, input_n), n, + THCTensor_(data)(state, weight), m, + ScalarConvert<int, real>::to(0), + THCTensor_(data)(state, columns), n ); // Unpack columns back into input: @@ -185,13 +185,14 @@ void THNN_(VolumetricFullConvolution_updateOutput)( long k_ = 1; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) - #ifdef THC_REAL_IS_FLOAT - THCudaBlas_Sgemm( - #elif defined(THC_REAL_IS_HALF) - THCudaBlas_Hgemm( - #elif defined(THC_REAL_IS_DOUBLE) - THCudaBlas_Dgemm( - #endif + if (bias) { + #ifdef THC_REAL_IS_FLOAT + THCudaBlas_Sgemm( + #elif defined(THC_REAL_IS_HALF) + THCudaBlas_Hgemm( + #elif defined(THC_REAL_IS_DOUBLE) + THCudaBlas_Dgemm( + #endif state, 't', 'n', n_, m_, k_, @@ -200,8 +201,8 @@ void THNN_(VolumetricFullConvolution_updateOutput)( THCTensor_(data)(state, bias), k_, ScalarConvert<int, real>::to(1), THCTensor_(data)(state, output_n), n_ - ); - + ); + } } // Free @@ -218,31 +219,31 @@ void THNN_(VolumetricFullConvolution_updateOutput)( } void THNN_(VolumetricFullConvolution_updateGradInput)( - THCState *state, - THCTensor *input, - THCTensor *gradOutput, - THCTensor *gradInput, - THCTensor *weight, - THCTensor *finput, - THCTensor *fgradInput, - int dT, int dW, int dH, - int padT, int padW, int padH, - int adjT, int adjW, int adjH) + THCState *state, + THCTensor *input, + THCTensor *gradOutput, + THCTensor *gradInput, + THCTensor *weight, + THCTensor *finput, + THCTensor *fgradInput, + int dT, int dW, int dH, + int padT, int padW, int padH, + int adjT, int adjW, int adjH) { THCTensor *gradColumns = finput; int nInputPlane = THCTensor_(size)(state, weight, 0); int nOutputPlane = THCTensor_(size)(state, weight, 1); - const int kT = (int)weight->size[2]; - const int kH = (int)weight->size[3]; - const int kW = (int)weight->size[4]; + const int kT = (int)weight->size[2]; + const int kH = (int)weight->size[3]; + const int kW = (int)weight->size[4]; THCUNN_assertSameGPU(state, 5, input, gradOutput, weight, - gradColumns, gradInput); + gradColumns, gradInput); THNN_(VolumetricFullConvolution_shapeCheck)( - state, input, gradOutput, weight, NULL, - dT, dW, dH, padT, padW, padH, - adjT, adjW, adjH); + state, input, gradOutput, weight, NULL, + dT, dW, dH, padT, padW, padH, + adjT, adjW, adjH); input = THCTensor_(newContiguous)(state, input); gradOutput = THCTensor_(newContiguous)(state, gradOutput); @@ -305,14 +306,14 @@ void THNN_(VolumetricFullConvolution_updateGradInput)( #elif defined(THC_REAL_IS_DOUBLE) THCudaBlas_Dgemm( #endif - state, - 'n', 'n', - n, m, k, - ScalarConvert<int, real>::to(1), - THCTensor_(data)(state, gradColumns), n, - THCTensor_(data)(state, weight), k, - ScalarConvert<int, real>::to(0), - THCTensor_(data)(state, gradInput_n), n + state, + 'n', 'n', + n, m, k, + ScalarConvert<int, real>::to(1), + THCTensor_(data)(state, gradColumns), n, + THCTensor_(data)(state, weight), k, + ScalarConvert<int, real>::to(0), + THCTensor_(data)(state, gradInput_n), n ); } @@ -334,33 +335,33 @@ void THNN_(VolumetricFullConvolution_updateGradInput)( void THNN_(VolumetricFullConvolution_accGradParameters)( - THCState *state, - THCTensor *input, - THCTensor *gradOutput, - THCTensor *gradWeight, - THCTensor *gradBias, - THCTensor *finput, - THCTensor *fgradInput, - int dT, int dW, int dH, - int padT, int padW, int padH, - int adjT, int adjW, int adjH, - real scale) + THCState *state, + THCTensor *input, + THCTensor *gradOutput, + THCTensor *gradWeight, + THCTensor *gradBias, + THCTensor *finput, + THCTensor *fgradInput, + int dT, int dW, int dH, + int padT, int padW, int padH, + int adjT, int adjW, int adjH, + real scale) { THCTensor *columns = finput; THCTensor *ones = fgradInput; int nInputPlane = THCTensor_(size)(state, gradWeight, 0); int nOutputPlane = THCTensor_(size)(state, gradWeight, 1); - const int kT = (int)gradWeight->size[2]; - const int kH = (int)gradWeight->size[3]; - const int kW = (int)gradWeight->size[4]; + const int kT = (int)gradWeight->size[2]; + const int kH = (int)gradWeight->size[3]; + const int kW = (int)gradWeight->size[4]; THCUNN_assertSameGPU(state, 6, input, gradOutput, gradWeight, - gradBias, columns, ones); + gradBias, columns, ones); THNN_(VolumetricFullConvolution_shapeCheck)( - state, input, gradOutput, gradWeight, - gradBias, dT, dW, dH, padT, padW, padH, - adjT, adjW, adjH); + state, input, gradOutput, gradWeight, + gradBias, dT, dW, dH, padT, padW, padH, + adjT, adjW, adjH); input = THCTensor_(newContiguous)(state, input); gradOutput = THCTensor_(newContiguous)(state, gradOutput); @@ -426,14 +427,14 @@ void THNN_(VolumetricFullConvolution_accGradParameters)( #elif defined(THC_REAL_IS_DOUBLE) THCudaBlas_Dgemm( #endif - state, - 't', 'n', - n, m, k, - scale, - THCTensor_(data)(state, columns), k, - THCTensor_(data)(state, input_n), k, - ScalarConvert<int, real>::to(1), - THCTensor_(data)(state, gradWeight), n + state, + 't', 'n', + n, m, k, + scale, + THCTensor_(data)(state, columns), k, + THCTensor_(data)(state, input_n), k, + ScalarConvert<int, real>::to(1), + THCTensor_(data)(state, gradWeight), n ); // Do Bias: @@ -443,12 +444,13 @@ void THNN_(VolumetricFullConvolution_accGradParameters)( long k_ = outputDepth * outputHeight * outputWidth; // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices) - #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) - #ifdef THC_REAL_IS_FLOAT - THCudaBlas_Sgemv( - #elif defined(THC_REAL_IS_DOUBLE) - THCudaBlas_Dgemv( - #endif + if (gradBias) { + #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) + #ifdef THC_REAL_IS_FLOAT + THCudaBlas_Sgemv( + #elif defined(THC_REAL_IS_DOUBLE) + THCudaBlas_Dgemv( + #endif state, 't', k_, m_, @@ -457,10 +459,10 @@ void THNN_(VolumetricFullConvolution_accGradParameters)( THCTensor_(data)(state, ones), 1, ScalarConvert<int, real>::to(1), THCTensor_(data)(state, gradBias), 1 - ); - #endif - #ifdef THC_REAL_IS_HALF - THCudaBlas_Hgemm( + ); + #endif + #ifdef THC_REAL_IS_HALF + THCudaBlas_Hgemm( state, 't', 'n', m_, 1, k_, @@ -469,8 +471,9 @@ void THNN_(VolumetricFullConvolution_accGradParameters)( THCTensor_(data)(state, ones), k_, ScalarConvert<int, real>::to(1), THCTensor_(data)(state, gradBias), m_ - ); - #endif + ); + #endif + } } // Free |