diff options
author | Peter Klimenko <peterklimk@outlook.com> | 2020-07-31 11:45:15 +0300 |
---|---|---|
committer | Peter Klimenko <peterklimk@outlook.com> | 2020-07-31 11:45:15 +0300 |
commit | 97a4a8d0fb7fd9ac34f9f5d4d5a0689c01235e14 (patch) | |
tree | fc9746d2210eda08be9d44ae67d5e58d64b48b40 /intern/cycles/device/opencl | |
parent | 4a7c203e9ecc7c5b0370afc0fdd6bcc183dc00df (diff) | |
parent | f3e8326453ae856d7914e45e832a2ed80aa9a9b9 (diff) |
merge
Diffstat (limited to 'intern/cycles/device/opencl')
-rw-r--r-- | intern/cycles/device/opencl/device_opencl.h | 18 | ||||
-rw-r--r-- | intern/cycles/device/opencl/device_opencl_impl.cpp | 105 | ||||
-rw-r--r-- | intern/cycles/device/opencl/memory_manager.cpp | 5 |
3 files changed, 79 insertions, 49 deletions
diff --git a/intern/cycles/device/opencl/device_opencl.h b/intern/cycles/device/opencl/device_opencl.h index 389268e1c2a..e0140996cf0 100644 --- a/intern/cycles/device/opencl/device_opencl.h +++ b/intern/cycles/device/opencl/device_opencl.h @@ -23,6 +23,7 @@ # include "util/util_map.h" # include "util/util_param.h" # include "util/util_string.h" +# include "util/util_task.h" # include "clew.h" @@ -258,6 +259,8 @@ class OpenCLDevice : public Device { TaskPool load_required_kernel_task_pool; /* Task pool for optional kernels (feature kernels during foreground rendering) */ TaskPool load_kernel_task_pool; + std::atomic<int> load_kernel_num_compiling; + cl_context cxContext; cl_command_queue cqCommandQueue; cl_platform_id cpPlatform; @@ -455,14 +458,6 @@ class OpenCLDevice : public Device { void denoise(RenderTile &tile, DenoisingTask &denoising); - class OpenCLDeviceTask : public DeviceTask { - public: - OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task) - { - run = function_bind(&OpenCLDevice::thread_run, device, this); - } - }; - int get_split_task_count(DeviceTask & /*task*/) { return 1; @@ -470,7 +465,10 @@ class OpenCLDevice : public Device { void task_add(DeviceTask &task) { - task_pool.push(new OpenCLDeviceTask(this, task)); + task_pool.push([=] { + DeviceTask task_copy = task; + thread_run(task_copy); + }); } void task_wait() @@ -483,7 +481,7 @@ class OpenCLDevice : public Device { task_pool.cancel(); } - void thread_run(DeviceTask *task); + void thread_run(DeviceTask &task); virtual BVHLayoutMask get_bvh_layout_mask() const { diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index beb3174b111..e851749949d 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -542,7 +542,7 @@ class OpenCLSplitKernel : public DeviceSplitKernel { virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, - DeviceTask * /*task*/) + DeviceTask & /*task*/) { cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice); /* Use small global size on CPU devices as it seems to be much faster. */ @@ -610,6 +610,7 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char *where) OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background) : Device(info, stats, profiler, background), + load_kernel_num_compiling(0), kernel_programs(this), preview_programs(this), memory_manager(this), @@ -684,9 +685,9 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b OpenCLDevice::~OpenCLDevice() { - task_pool.stop(); - load_required_kernel_task_pool.stop(); - load_kernel_task_pool.stop(); + task_pool.cancel(); + load_required_kernel_task_pool.cancel(); + load_kernel_task_pool.cancel(); memory_manager.free(); @@ -798,7 +799,11 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_feature * internally within a single process. */ foreach (OpenCLProgram *program, programs) { if (!program->load()) { - load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program)); + load_kernel_num_compiling++; + load_kernel_task_pool.push([=] { + program->compile(); + load_kernel_num_compiling--; + }); } } return true; @@ -868,7 +873,7 @@ bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requeste * Better to check on device level than per kernel as mixing preview and * non-preview kernels does not work due to different data types */ if (use_preview_kernels) { - use_preview_kernels = !load_kernel_task_pool.finished(); + use_preview_kernels = load_kernel_num_compiling.load() > 0; } } return split_kernel->load_kernels(requested_features); @@ -895,7 +900,7 @@ DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state() return DEVICE_KERNEL_USING_FEATURE_KERNEL; } - bool other_kernels_finished = load_kernel_task_pool.finished(); + bool other_kernels_finished = load_kernel_num_compiling.load() == 0; if (use_preview_kernels) { if (other_kernels_finished) { return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE; @@ -1336,20 +1341,20 @@ void OpenCLDevice::flush_texture_buffers() memory_manager.alloc("texture_info", texture_info); } -void OpenCLDevice::thread_run(DeviceTask *task) +void OpenCLDevice::thread_run(DeviceTask &task) { flush_texture_buffers(); - if (task->type == DeviceTask::RENDER) { + if (task.type == DeviceTask::RENDER) { RenderTile tile; - DenoisingTask denoising(this, *task); + DenoisingTask denoising(this, task); /* Allocate buffer for kernel globals */ device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals"); kgbuffer.alloc_to_device(1); /* Keep rendering tiles until done. */ - while (task->acquire_tile(this, tile, task->tile_types)) { + while (task.acquire_tile(this, tile, task.tile_types)) { if (tile.task == RenderTile::PATH_TRACE) { assert(tile.task == RenderTile::PATH_TRACE); scoped_timer timer(&tile.buffers->render_time); @@ -1368,42 +1373,42 @@ void OpenCLDevice::thread_run(DeviceTask *task) clFinish(cqCommandQueue); } else if (tile.task == RenderTile::BAKE) { - bake(*task, tile); + bake(task, tile); } else if (tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; denoise(tile, denoising); - task->update_progress(&tile, tile.w * tile.h); + task.update_progress(&tile, tile.w * tile.h); } - task->release_tile(tile); + task.release_tile(tile); } kgbuffer.free(); } - else if (task->type == DeviceTask::SHADER) { - shader(*task); + else if (task.type == DeviceTask::SHADER) { + shader(task); } - else if (task->type == DeviceTask::FILM_CONVERT) { - film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); + else if (task.type == DeviceTask::FILM_CONVERT) { + film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); } - else if (task->type == DeviceTask::DENOISE_BUFFER) { + else if (task.type == DeviceTask::DENOISE_BUFFER) { RenderTile tile; - tile.x = task->x; - tile.y = task->y; - tile.w = task->w; - tile.h = task->h; - tile.buffer = task->buffer; - tile.sample = task->sample + task->num_samples; - tile.num_samples = task->num_samples; - tile.start_sample = task->sample; - tile.offset = task->offset; - tile.stride = task->stride; - tile.buffers = task->buffers; - - DenoisingTask denoising(this, *task); + tile.x = task.x; + tile.y = task.y; + tile.w = task.w; + tile.h = task.h; + tile.buffer = task.buffer; + tile.sample = task.sample + task.num_samples; + tile.num_samples = task.num_samples; + tile.start_sample = task.sample; + tile.offset = task.offset; + tile.stride = task.stride; + tile.buffers = task.buffers; + + DenoisingTask denoising(this, task); denoise(tile, denoising); - task->update_progress(&tile, tile.w * tile.h); + task.update_progress(&tile, tile.w * tile.h); } } @@ -1845,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising) denoising.render_buffer.samples = rtile.sample; denoising.buffer.gpu_temporary_mem = true; - denoising.run_denoising(&rtile); + denoising.run_denoising(rtile); } void OpenCLDevice::shader(DeviceTask &task) @@ -1937,10 +1942,8 @@ void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile) clFinish(cqCommandQueue); } -string OpenCLDevice::kernel_build_options(const string *debug_src) +static bool kernel_build_opencl_2(cl_device_id cdDevice) { - string build_options = "-cl-no-signed-zeros -cl-mad-enable "; - /* Build with OpenCL 2.0 if available, this improves performance * with AMD OpenCL drivers on Windows and Linux (legacy drivers). * Note that OpenCL selects the highest 1.x version by default, @@ -1948,10 +1951,36 @@ string OpenCLDevice::kernel_build_options(const string *debug_src) int version_major, version_minor; if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) { if (version_major >= 2) { - build_options += "-cl-std=CL2.0 "; + /* This appears to trigger a driver bug in Radeon RX cards with certain + * driver version, so don't use OpenCL 2.0 for those. */ + string device_name = OpenCLInfo::get_readable_device_name(cdDevice); + if (string_startswith(device_name, "Radeon RX 4") || + string_startswith(device_name, "Radeon (TM) RX 4") || + string_startswith(device_name, "Radeon RX 5") || + string_startswith(device_name, "Radeon (TM) RX 5")) { + char version[256] = ""; + int driver_major, driver_minor; + clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL); + if (sscanf(version, "OpenCL 2.0 AMD-APP (%d.%d)", &driver_major, &driver_minor) == 2) { + return !(driver_major == 3075 && driver_minor <= 12); + } + } + + return true; } } + return false; +} + +string OpenCLDevice::kernel_build_options(const string *debug_src) +{ + string build_options = "-cl-no-signed-zeros -cl-mad-enable "; + + if (kernel_build_opencl_2(cdDevice)) { + build_options += "-cl-std=CL2.0 "; + } + if (platform_name == "NVIDIA CUDA") { build_options += "-D__KERNEL_OPENCL_NVIDIA__ " diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp index fedb3ea8c6a..0285dc969ec 100644 --- a/intern/cycles/device/opencl/memory_manager.cpp +++ b/intern/cycles/device/opencl/memory_manager.cpp @@ -64,6 +64,9 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device) total_size += alloc_size; } + /* Always allocate non-empty buffer, NULL pointers cause problems with some drivers. */ + total_size = max(total_size, 16); + if (need_realloc) { cl_ulong max_buffer_size; clGetDeviceInfo( @@ -251,7 +254,7 @@ void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg) device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer); } else { - device->kernel_set_args(kernel, (*narg)++, 0); + device->kernel_set_args(kernel, (*narg)++); } } } |