diff options
Diffstat (limited to 'lib/THCUNN/BatchNormalization.cu')
-rw-r--r-- | lib/THCUNN/BatchNormalization.cu | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/lib/THCUNN/BatchNormalization.cu b/lib/THCUNN/BatchNormalization.cu index 125e3ff..e6717c7 100644 --- a/lib/THCUNN/BatchNormalization.cu +++ b/lib/THCUNN/BatchNormalization.cu @@ -5,7 +5,7 @@ #include "THCDeviceTensor.cuh" #include "THCDeviceTensorUtils.cuh" - +#include "THCDeviceUtils.cuh" const int WARP_SIZE = 32; // The maximum number of threads in a block @@ -80,7 +80,7 @@ template <typename T> static __device__ __forceinline__ T warpSum(T val) { #if __CUDA_ARCH__ >= 300 for (int i = 0; i < getMSB(WARP_SIZE); ++i) { - val += __shfl_xor(val, 1 << i, WARP_SIZE); + val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); } #else __shared__ T values[MAX_BLOCK_SIZE]; |