From 299c554dccf923230321ad7495946543f3e9b457 Mon Sep 17 00:00:00 2001 From: Sylvain Jeaugey Date: Tue, 19 Nov 2019 14:57:39 -0800 Subject: 2.5.6-1 (#255) Add LL128 Protocol. Rewrite the topology detection and tree/ring creation (#179). Improve tree performance by sending/receiving from different GPUs. Add model-based tuning to switch between the different algorithms and protocols. Rework P2P/SHM detection in containers (#155, #248). Detect duplicated devices and return an error (#231). Add tuning for GCP --- src/collectives/device/common_kernel.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/collectives/device/common_kernel.h') diff --git a/src/collectives/device/common_kernel.h b/src/collectives/device/common_kernel.h index 435a598..aa1e936 100644 --- a/src/collectives/device/common_kernel.h +++ b/src/collectives/device/common_kernel.h @@ -263,8 +263,6 @@ __device__ __forceinline__ void ReduceCopyMulti(const int tid, const int nthread } } -#define WARP_SIZE 32 - template __device__ __forceinline__ void ReduceCopy128bMulti( const int w, const int nw, const int t, int nsrcs, const T* s[MAXSRCS], int ndsts, T* d[MAXDSTS], -- cgit v1.2.3