Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2017-08-05 05:06:39 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2017-08-05 15:32:59 +0300
commit45dcd20ca9e1f60c51e7752560b0042128740d69 (patch)
tree7c5260eead01919a08f6afc4e2bc3323a5c401c1 /intern/cycles/device/device_cuda.cpp
parentfa05718f278d6810356e64fafcf32c6222d67aab (diff)
Cycles: CUDA split performance tweaks, still far from megakernel.
On Pabellon, 25.8s mega, 35.4s split before, 32.7s split after.
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r--intern/cycles/device/device_cuda.cpp10
1 files changed, 3 insertions, 7 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 3a29538aa13..dbf636e1405 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1898,17 +1898,13 @@ public:
int threads_per_block;
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
- int xthreads = (int)sqrt(threads_per_block);
- int ythreads = (int)sqrt(threads_per_block);
-
- int xblocks = (dim.global_size[0] + xthreads - 1)/xthreads;
- int yblocks = (dim.global_size[1] + ythreads - 1)/ythreads;
+ int xblocks = (dim.global_size[0]*dim.global_size[1] + threads_per_block - 1)/threads_per_block;
cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuLaunchKernel(func,
- xblocks , yblocks, 1, /* blocks */
- xthreads, ythreads, 1, /* threads */
+ xblocks, 1, 1, /* blocks */
+ threads_per_block, 1, 1, /* threads */
0, 0, args, 0));
device->cuda_pop_context();