From 394a1373a0cd20b7d0660df4bf80e1231e33cba9 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 24 Mar 2020 19:42:23 +0100 Subject: Cycles: use OpenCL C 2.0 if available, to improve performance for AMD Tested with AMD Radeon Pro WX 9100, where it brings performance back to 2.80 level, and combined with recent changes is about 2-15% faster than 2.80 in our benchmark scenes. This somehow appears to specifically address the issue where adding more shader nodes leads to slower runtime. I found no additional speedup by applying this to change to 2.80 or removing the new shader node code. Ref T71479 Patch by Jeroen Bakker. Differential Revision: https://developer.blender.org/D6252 --- intern/cycles/device/opencl/device_opencl.h | 4 ++++ intern/cycles/device/opencl/device_opencl_impl.cpp | 11 +++++++++++ intern/cycles/device/opencl/opencl_util.cpp | 20 ++++++++++++++++---- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/intern/cycles/device/opencl/device_opencl.h b/intern/cycles/device/opencl/device_opencl.h index cee29aefcfd..d6f4fb43061 100644 --- a/intern/cycles/device/opencl/device_opencl.h +++ b/intern/cycles/device/opencl/device_opencl.h @@ -88,6 +88,10 @@ class OpenCLInfo { static bool device_supported(const string &platform_name, const cl_device_id device_id); static bool platform_version_check(cl_platform_id platform, string *error = NULL); static bool device_version_check(cl_device_id device, string *error = NULL); + static bool get_device_version(cl_device_id device, + int *r_major, + int *r_minor, + string *error = NULL); static string get_hardware_id(const string &platform_name, cl_device_id device_id); static void get_usable_devices(vector *usable_devices); diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index 6b1b0e4c369..b7a2be79804 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -1896,6 +1896,17 @@ string OpenCLDevice::kernel_build_options(const string *debug_src) { string build_options = "-cl-no-signed-zeros -cl-mad-enable "; + /* Build with OpenCL 2.0 if available, this improves performance + * with AMD OpenCL drivers on Windows and Linux (legacy drivers). + * Note that OpenCL selects the highest 1.x version by default, + * only for 2.0 do we need the explicit compiler flag. */ + int version_major, version_minor; + if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) { + if (version_major >= 2) { + build_options += "-cl-std=CL2.0 "; + } + } + if (platform_name == "NVIDIA CUDA") { build_options += "-D__KERNEL_OPENCL_NVIDIA__ " diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index aca4ccdde26..978c75d2e2c 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -810,18 +810,30 @@ bool OpenCLInfo::platform_version_check(cl_platform_id platform, string *error) return true; } -bool OpenCLInfo::device_version_check(cl_device_id device, string *error) +bool OpenCLInfo::get_device_version(cl_device_id device, int *r_major, int *r_minor, string *error) { - const int req_major = 1, req_minor = 1; - int major, minor; char version[256]; clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL); - if (sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) { + if (sscanf(version, "OpenCL C %d.%d", r_major, r_minor) < 2) { if (error != NULL) { *error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version); } return false; } + if (error != NULL) { + *error = ""; + } + return true; +} + +bool OpenCLInfo::device_version_check(cl_device_id device, string *error) +{ + const int req_major = 1, req_minor = 1; + int major, minor; + if (!get_device_version(device, &major, &minor, error)) { + return false; + } + if (!((major == req_major && minor >= req_minor) || (major > req_major))) { if (error != NULL) { *error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor); -- cgit v1.2.3