From 91a5dbbd1763c0a9e3ec111199f8be06b25b61e2 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Mon, 17 May 2021 13:32:50 +0200
Subject: Fix OpenCL group size performance issue on Intel GPUs

Contributed by Intel. On some scenes like classroom with particular integrated
GPUs this speeds up rendering 1.97x. With other benchmarks and GPUs it's
between 0.99-1.14x.
---
 intern/cycles/device/opencl/device_opencl_impl.cpp | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'intern/cycles/device')

diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index d378d32914c..b1d6284171a 100644
--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -569,6 +569,11 @@ class OpenCLSplitKernel : public DeviceSplitKernel {
     size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size);
     int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64),
                                  (int)sqrt(num_elements));
+
+    if (device->info.description.find("Intel") != string::npos) {
+      global_size = make_int2(min(512, global_size.x), min(512, global_size.y));
+    }
+
     VLOG(1) << "Global size: " << global_size << ".";
     return global_size;
   }
-- 
cgit v1.2.3