Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/cutorch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsoumith <soumith@fb.com>2016-10-14 02:52:40 +0300
committersoumith <soumith@fb.com>2016-10-14 02:52:40 +0300
commit065ce5aca2afbd86ea2632ecf122cb536dfe0106 (patch)
tree32b55a875e6fabfcd979745e30e320583016cb75
parent1fcbb03fbf8df1caf545c7383ad5e44cd01ff459 (diff)
removing CUDA_HALF_INSTRUCTIONS and enabling hgemm only for P100p100fix
-rw-r--r--lib/THC/THCBlas.cu2
-rw-r--r--lib/THC/THCHalf.cu8
-rw-r--r--lib/THC/THCHalf.h8
3 files changed, 12 insertions, 6 deletions
diff --git a/lib/THC/THCBlas.cu b/lib/THC/THCBlas.cu
index 2ea3157..e346202 100644
--- a/lib/THC/THCBlas.cu
+++ b/lib/THC/THCBlas.cu
@@ -245,7 +245,7 @@ void THCudaBlas_Hgemm(THCState *state, char transa, char transb, long m, long n,
cublasSetStream(handle, THCState_getCurrentStream(state));
// Check for native Hgemm support
- if (THC_nativeHalfInstructions(state)) {
+ if (THC_fastHalfInstructions(state)) {
THCublasCheck(cublasHgemm(handle, opa, opb,
i_m, i_n, i_k, &alpha, a, i_lda, b, i_ldb,
&beta, c, i_ldc));
diff --git a/lib/THC/THCHalf.cu b/lib/THC/THCHalf.cu
index 5a77293..96397ff 100644
--- a/lib/THC/THCHalf.cu
+++ b/lib/THC/THCHalf.cu
@@ -128,3 +128,11 @@ THC_EXTERNC int THC_nativeHalfInstructions(THCState *state) {
return (prop->major > 5 ||
(prop->major == 5 && prop->minor == 3));
}
+
+THC_EXTERNC int THC_fastHalfInstructions(THCState *state) {
+ cudaDeviceProp* prop =
+ THCState_getCurrentDeviceProperties(state);
+
+ // Check for CC 6.0 only (corresponds to P100)
+ return (prop->major == 6 && prop->minor == 0);
+}
diff --git a/lib/THC/THCHalf.h b/lib/THC/THCHalf.h
index 795874e..3359064 100644
--- a/lib/THC/THCHalf.h
+++ b/lib/THC/THCHalf.h
@@ -8,11 +8,6 @@
#define CUDA_HALF_TENSOR 1
#endif
-/* Kernel side: Native fp16 ALU instructions are available if we have this: */
-#if defined(CUDA_HALF_TENSOR) && (CUDA_VERSION >= 8000) && (__CUDA_ARCH__ >= 530)
-#define CUDA_HALF_INSTRUCTIONS 1
-#endif
-
#ifdef CUDA_HALF_TENSOR
#include <cuda_fp16.h>
@@ -26,6 +21,9 @@ THC_API float THC_half2float(half a);
/* Check for native fp16 support on the current device (CC 5.3+) */
THC_EXTERNC int THC_nativeHalfInstructions(THCState *state);
+/* Check for performant native fp16 support on the current device */
+THC_EXTERNC int THC_fastHalfInstructions(THCState *state);
+
#endif /* CUDA_HALF_TENSOR */
#endif