Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/llvm/llvm-project.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/openmp
diff options
context:
space:
mode:
authorShilei Tian <i@tianshilei.me>2022-02-10 22:13:18 +0300
committerShilei Tian <i@tianshilei.me>2022-02-10 22:13:32 +0300
commitf6685f774697c85d6a352dcea013f46a99f9fe31 (patch)
tree3ab9e7cfb3c65b3fc65e9d4e4608af84a0d2bf29 /openmp
parent547a667ceeb60dca5447e5bc09165a52b22925eb (diff)
[OpenMP][CUDA] Refine the logic to determine grid size
This patch refines the logic to determine grid size as previous method can escape the check of whether `CudaBlocksPerGrid` could be greater than the actual hardware limit. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D119311
Diffstat (limited to 'openmp')
-rw-r--r--openmp/libomptarget/plugins/cuda/src/rtl.cpp10
1 files changed, 6 insertions, 4 deletions
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index e17593878b7c..0ca05f0ec3a0 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -1170,15 +1170,17 @@ public:
DP("Using default number of teams %d\n", DeviceData[DeviceId].NumTeams);
CudaBlocksPerGrid = DeviceData[DeviceId].NumTeams;
}
- } else if (TeamNum > DeviceData[DeviceId].BlocksPerGrid) {
- DP("Capping number of teams to team limit %d\n",
- DeviceData[DeviceId].BlocksPerGrid);
- CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
} else {
DP("Using requested number of teams %d\n", TeamNum);
CudaBlocksPerGrid = TeamNum;
}
+ if (CudaBlocksPerGrid > DeviceData[DeviceId].BlocksPerGrid) {
+ DP("Capping number of teams to team limit %d\n",
+ DeviceData[DeviceId].BlocksPerGrid);
+ CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
+ }
+
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
"Launching kernel %s with %d blocks and %d threads in %s mode\n",
(getOffloadEntry(DeviceId, TgtEntryPtr))