diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-05-17 14:32:50 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-05-17 20:40:57 +0300 |
commit | 91a5dbbd1763c0a9e3ec111199f8be06b25b61e2 (patch) | |
tree | 60d90466c1d798a6f4792597fa7e9b314b24487e | |
parent | bb32ecadb5be0012fb0f06dcb83dce66fd5cd871 (diff) |
Fix OpenCL group size performance issue on Intel GPUs
Contributed by Intel. On some scenes like classroom with particular integrated
GPUs this speeds up rendering 1.97x. With other benchmarks and GPUs it's
between 0.99-1.14x.
-rw-r--r-- | intern/cycles/device/opencl/device_opencl_impl.cpp | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index d378d32914c..b1d6284171a 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -569,6 +569,11 @@ class OpenCLSplitKernel : public DeviceSplitKernel { size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size); int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64), (int)sqrt(num_elements)); + + if (device->info.description.find("Intel") != string::npos) { + global_size = make_int2(min(512, global_size.x), min(512, global_size.y)); + } + VLOG(1) << "Global size: " << global_size << "."; return global_size; } |