From df30b50f2f5ee66e055fc795fea91c0f7f4954d5 Mon Sep 17 00:00:00 2001 From: Stefan Werner Date: Fri, 6 Jul 2018 11:42:34 +0200 Subject: Cycles: Enabled half precision textures for OpenCL devices that support the cl_khr_fp16 extension. --- intern/cycles/device/device_opencl.cpp | 4 +++ intern/cycles/device/opencl/opencl.h | 13 ++++++-- intern/cycles/device/opencl/opencl_util.cpp | 38 +++++++++++++++++++++- intern/cycles/kernel/kernel_compat_opencl.h | 5 +++ .../kernel/kernels/opencl/kernel_opencl_image.h | 11 +++++++ 5 files changed, 68 insertions(+), 3 deletions(-) diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 9d61bbdae5d..95eef8263d4 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -137,6 +137,10 @@ void device_opencl_info(vector& devices) info.has_volume_decoupled = false; info.bvh_layout_mask = BVH_LAYOUT_BVH2; info.id = id; + + /* Check OpenCL extensions */ + info.has_half_images = platform_device.device_extensions.find("cl_khr_fp16") != string::npos; + devices.push_back(info); num_devices++; } diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 22e0503365c..d0571fc3c14 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -59,19 +59,22 @@ struct OpenCLPlatformDevice { cl_device_id device_id, cl_device_type device_type, const string& device_name, - const string& hardware_id) + const string& hardware_id, + const string& device_extensions) : platform_id(platform_id), platform_name(platform_name), device_id(device_id), device_type(device_type), device_name(device_name), - hardware_id(hardware_id) {} + hardware_id(hardware_id), + device_extensions(device_extensions) {} cl_platform_id platform_id; string platform_name; cl_device_id device_id; cl_device_type device_type; string device_name; string hardware_id; + string device_extensions; }; /* Contains all static OpenCL helper functions. */ @@ -130,6 +133,12 @@ public: static string get_device_name(cl_device_id device_id); + static bool get_device_extensions(cl_device_id device_id, + string *device_extensions, + cl_int* error = NULL); + + static string get_device_extensions(cl_device_id device_id); + static bool get_device_type(cl_device_id device_id, cl_device_type *device_type, cl_int* error = NULL); diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index 78ed401bff5..9104f64bedd 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -831,13 +831,15 @@ void OpenCLInfo::get_usable_devices(vector *usable_devices FIRST_VLOG(2) << "Adding new device " << readable_device_name << "."; string hardware_id = get_hardware_id(platform_name, device_id); + string device_extensions = get_device_extensions(device_id); usable_devices->push_back(OpenCLPlatformDevice( platform_id, platform_name, device_id, device_type, readable_device_name, - hardware_id)); + hardware_id, + device_extensions)); } else { FIRST_VLOG(2) << "Ignoring device " << device_name @@ -1047,6 +1049,40 @@ string OpenCLInfo::get_device_name(cl_device_id device_id) return device_name; } +bool OpenCLInfo::get_device_extensions(cl_device_id device_id, + string *device_extensions, + cl_int* error) +{ + char buffer[1024]; + cl_int err; + if((err = clGetDeviceInfo(device_id, + CL_DEVICE_EXTENSIONS, + sizeof(buffer), + &buffer, + NULL)) != CL_SUCCESS) + { + if(error != NULL) { + *error = err; + } + *device_extensions = ""; + return false; + } + if(error != NULL) { + *error = CL_SUCCESS; + } + *device_extensions = buffer; + return true; +} + +string OpenCLInfo::get_device_extensions(cl_device_id device_id) +{ + string device_extensions; + if(!get_device_extensions(device_id, &device_extensions)) { + return ""; + } + return device_extensions; +} + bool OpenCLInfo::get_device_type(cl_device_id device_id, cl_device_type *device_type, cl_int* error) diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 438c819eec6..d1ae10a0384 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -150,6 +150,11 @@ /* define NULL */ #define NULL 0 +/* enable extensions */ +#ifdef __KERNEL_CL_KHR_FP16__ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + #include "util/util_half.h" #include "util/util_types.h" diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index 011623130eb..dd9d683e030 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -72,6 +72,17 @@ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, const ccl_glo return make_float4(f, f, f, 1.0f); } /* Byte */ +#ifdef cl_khr_fp16 + /* half and half4 are optional in OpenCL */ + else if(texture_type == IMAGE_DATA_TYPE_HALF) { + float f = tex_fetch(half, info, offset); + return make_float4(f, f, f, 1.0f); + } + else if(texture_type == IMAGE_DATA_TYPE_HALF4) { + half4 r = tex_fetch(half4, info, offset); + return make_float4(r.x, r.y, r.z, r.w); + } +#endif else { uchar r = tex_fetch(uchar, info, offset); float f = r * (1.0f/255.0f); -- cgit v1.2.3