From 91a5dbbd1763c0a9e3ec111199f8be06b25b61e2 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Mon, 17 May 2021 13:32:50 +0200 Subject: Fix OpenCL group size performance issue on Intel GPUs Contributed by Intel. On some scenes like classroom with particular integrated GPUs this speeds up rendering 1.97x. With other benchmarks and GPUs it's between 0.99-1.14x. --- intern/cycles/device/opencl/device_opencl_impl.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'intern/cycles/device') diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index d378d32914c..b1d6284171a 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -569,6 +569,11 @@ class OpenCLSplitKernel : public DeviceSplitKernel { size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size); int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64), (int)sqrt(num_elements)); + + if (device->info.description.find("Intel") != string::npos) { + global_size = make_int2(min(512, global_size.x), min(512, global_size.y)); + } + VLOG(1) << "Global size: " << global_size << "."; return global_size; } -- cgit v1.2.3