diff options
author | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2018-12-14 02:56:12 +0300 |
---|---|---|
committer | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2019-01-30 02:19:27 +0300 |
commit | 1450d42675be325cd3b7a684d4b231eedceb22fb (patch) | |
tree | dc1f88ad03d598c3bb03f20dd81d8ef671fc2bff /src/collectives/device/gen_rules.sh | |
parent | 4861e197fd83f0ac324ac0c21051820f8866e6ea (diff) |
2.4.2-1
Add tree algorithms for allreduce to improve performance at scale.
Add ncclCommAbort() and ncclCommGetAsyncError() to properly handle
network errors and be permit recover.
Detect initial CPU affinity and no longer escape it.
Diffstat (limited to 'src/collectives/device/gen_rules.sh')
-rwxr-xr-x | src/collectives/device/gen_rules.sh | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/src/collectives/device/gen_rules.sh b/src/collectives/device/gen_rules.sh new file mode 100755 index 0000000..3942c8c --- /dev/null +++ b/src/collectives/device/gen_rules.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# See LICENSE.txt for license information +# + +dir=$1 + +targets="GENOBJS := \\\\\n" + +for base in all_reduce all_gather broadcast reduce reduce_scatter; do + opn=0 + for op in sum prod min max; do + dtn=0 + for dt in i8 u8 i32 u32 i64 u64 f16 f32 f64; do + echo "${dir}/${base}_${op}_${dt}.o : ${base}.cu ${dir}/${base}.dep" + echo " @printf \"Compiling %-35s > %s\\\\n\" ${base}.cu ${dir}/${base}_${op}_${dt}.o" + echo " mkdir -p ${dir}" + echo " \${NVCC} -DNCCL_OP=${opn} -DNCCL_TYPE=${dtn} \${NVCUFLAGS} -dc ${base}.cu -o ${dir}/${base}_${op}_${dt}.o" + echo "" + targets="$targets\t${dir}/${base}_${op}_${dt}.o \\\\\n" + dtn=$(($dtn + 1)) + done + opn=$(($opn + 1)) + done +done +echo -e "$targets" |