diff options
author | soumith <soumith@fb.com> | 2016-10-14 02:52:40 +0300 |
---|---|---|
committer | soumith <soumith@fb.com> | 2016-10-14 02:52:40 +0300 |
commit | 065ce5aca2afbd86ea2632ecf122cb536dfe0106 (patch) | |
tree | 32b55a875e6fabfcd979745e30e320583016cb75 | |
parent | 1fcbb03fbf8df1caf545c7383ad5e44cd01ff459 (diff) |
removing CUDA_HALF_INSTRUCTIONS and enabling hgemm only for P100p100fix
-rw-r--r-- | lib/THC/THCBlas.cu | 2 | ||||
-rw-r--r-- | lib/THC/THCHalf.cu | 8 | ||||
-rw-r--r-- | lib/THC/THCHalf.h | 8 |
3 files changed, 12 insertions, 6 deletions
diff --git a/lib/THC/THCBlas.cu b/lib/THC/THCBlas.cu index 2ea3157..e346202 100644 --- a/lib/THC/THCBlas.cu +++ b/lib/THC/THCBlas.cu @@ -245,7 +245,7 @@ void THCudaBlas_Hgemm(THCState *state, char transa, char transb, long m, long n, cublasSetStream(handle, THCState_getCurrentStream(state)); // Check for native Hgemm support - if (THC_nativeHalfInstructions(state)) { + if (THC_fastHalfInstructions(state)) { THCublasCheck(cublasHgemm(handle, opa, opb, i_m, i_n, i_k, &alpha, a, i_lda, b, i_ldb, &beta, c, i_ldc)); diff --git a/lib/THC/THCHalf.cu b/lib/THC/THCHalf.cu index 5a77293..96397ff 100644 --- a/lib/THC/THCHalf.cu +++ b/lib/THC/THCHalf.cu @@ -128,3 +128,11 @@ THC_EXTERNC int THC_nativeHalfInstructions(THCState *state) { return (prop->major > 5 || (prop->major == 5 && prop->minor == 3)); } + +THC_EXTERNC int THC_fastHalfInstructions(THCState *state) { + cudaDeviceProp* prop = + THCState_getCurrentDeviceProperties(state); + + // Check for CC 6.0 only (corresponds to P100) + return (prop->major == 6 && prop->minor == 0); +} diff --git a/lib/THC/THCHalf.h b/lib/THC/THCHalf.h index 795874e..3359064 100644 --- a/lib/THC/THCHalf.h +++ b/lib/THC/THCHalf.h @@ -8,11 +8,6 @@ #define CUDA_HALF_TENSOR 1 #endif -/* Kernel side: Native fp16 ALU instructions are available if we have this: */ -#if defined(CUDA_HALF_TENSOR) && (CUDA_VERSION >= 8000) && (__CUDA_ARCH__ >= 530) -#define CUDA_HALF_INSTRUCTIONS 1 -#endif - #ifdef CUDA_HALF_TENSOR #include <cuda_fp16.h> @@ -26,6 +21,9 @@ THC_API float THC_half2float(half a); /* Check for native fp16 support on the current device (CC 5.3+) */ THC_EXTERNC int THC_nativeHalfInstructions(THCState *state); +/* Check for performant native fp16 support on the current device */ +THC_EXTERNC int THC_fastHalfInstructions(THCState *state); + #endif /* CUDA_HALF_TENSOR */ #endif |