diff options
Diffstat (limited to 'intern/cycles/device/opencl/device_opencl_impl.cpp')
-rw-r--r-- | intern/cycles/device/opencl/device_opencl_impl.cpp | 105 |
1 files changed, 67 insertions, 38 deletions
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index beb3174b111..e851749949d 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -542,7 +542,7 @@ class OpenCLSplitKernel : public DeviceSplitKernel { virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, - DeviceTask * /*task*/) + DeviceTask & /*task*/) { cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice); /* Use small global size on CPU devices as it seems to be much faster. */ @@ -610,6 +610,7 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char *where) OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background) : Device(info, stats, profiler, background), + load_kernel_num_compiling(0), kernel_programs(this), preview_programs(this), memory_manager(this), @@ -684,9 +685,9 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b OpenCLDevice::~OpenCLDevice() { - task_pool.stop(); - load_required_kernel_task_pool.stop(); - load_kernel_task_pool.stop(); + task_pool.cancel(); + load_required_kernel_task_pool.cancel(); + load_kernel_task_pool.cancel(); memory_manager.free(); @@ -798,7 +799,11 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_feature * internally within a single process. */ foreach (OpenCLProgram *program, programs) { if (!program->load()) { - load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program)); + load_kernel_num_compiling++; + load_kernel_task_pool.push([=] { + program->compile(); + load_kernel_num_compiling--; + }); } } return true; @@ -868,7 +873,7 @@ bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requeste * Better to check on device level than per kernel as mixing preview and * non-preview kernels does not work due to different data types */ if (use_preview_kernels) { - use_preview_kernels = !load_kernel_task_pool.finished(); + use_preview_kernels = load_kernel_num_compiling.load() > 0; } } return split_kernel->load_kernels(requested_features); @@ -895,7 +900,7 @@ DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state() return DEVICE_KERNEL_USING_FEATURE_KERNEL; } - bool other_kernels_finished = load_kernel_task_pool.finished(); + bool other_kernels_finished = load_kernel_num_compiling.load() == 0; if (use_preview_kernels) { if (other_kernels_finished) { return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE; @@ -1336,20 +1341,20 @@ void OpenCLDevice::flush_texture_buffers() memory_manager.alloc("texture_info", texture_info); } -void OpenCLDevice::thread_run(DeviceTask *task) +void OpenCLDevice::thread_run(DeviceTask &task) { flush_texture_buffers(); - if (task->type == DeviceTask::RENDER) { + if (task.type == DeviceTask::RENDER) { RenderTile tile; - DenoisingTask denoising(this, *task); + DenoisingTask denoising(this, task); /* Allocate buffer for kernel globals */ device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals"); kgbuffer.alloc_to_device(1); /* Keep rendering tiles until done. */ - while (task->acquire_tile(this, tile, task->tile_types)) { + while (task.acquire_tile(this, tile, task.tile_types)) { if (tile.task == RenderTile::PATH_TRACE) { assert(tile.task == RenderTile::PATH_TRACE); scoped_timer timer(&tile.buffers->render_time); @@ -1368,42 +1373,42 @@ void OpenCLDevice::thread_run(DeviceTask *task) clFinish(cqCommandQueue); } else if (tile.task == RenderTile::BAKE) { - bake(*task, tile); + bake(task, tile); } else if (tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; denoise(tile, denoising); - task->update_progress(&tile, tile.w * tile.h); + task.update_progress(&tile, tile.w * tile.h); } - task->release_tile(tile); + task.release_tile(tile); } kgbuffer.free(); } - else if (task->type == DeviceTask::SHADER) { - shader(*task); + else if (task.type == DeviceTask::SHADER) { + shader(task); } - else if (task->type == DeviceTask::FILM_CONVERT) { - film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); + else if (task.type == DeviceTask::FILM_CONVERT) { + film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); } - else if (task->type == DeviceTask::DENOISE_BUFFER) { + else if (task.type == DeviceTask::DENOISE_BUFFER) { RenderTile tile; - tile.x = task->x; - tile.y = task->y; - tile.w = task->w; - tile.h = task->h; - tile.buffer = task->buffer; - tile.sample = task->sample + task->num_samples; - tile.num_samples = task->num_samples; - tile.start_sample = task->sample; - tile.offset = task->offset; - tile.stride = task->stride; - tile.buffers = task->buffers; - - DenoisingTask denoising(this, *task); + tile.x = task.x; + tile.y = task.y; + tile.w = task.w; + tile.h = task.h; + tile.buffer = task.buffer; + tile.sample = task.sample + task.num_samples; + tile.num_samples = task.num_samples; + tile.start_sample = task.sample; + tile.offset = task.offset; + tile.stride = task.stride; + tile.buffers = task.buffers; + + DenoisingTask denoising(this, task); denoise(tile, denoising); - task->update_progress(&tile, tile.w * tile.h); + task.update_progress(&tile, tile.w * tile.h); } } @@ -1845,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising) denoising.render_buffer.samples = rtile.sample; denoising.buffer.gpu_temporary_mem = true; - denoising.run_denoising(&rtile); + denoising.run_denoising(rtile); } void OpenCLDevice::shader(DeviceTask &task) @@ -1937,10 +1942,8 @@ void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile) clFinish(cqCommandQueue); } -string OpenCLDevice::kernel_build_options(const string *debug_src) +static bool kernel_build_opencl_2(cl_device_id cdDevice) { - string build_options = "-cl-no-signed-zeros -cl-mad-enable "; - /* Build with OpenCL 2.0 if available, this improves performance * with AMD OpenCL drivers on Windows and Linux (legacy drivers). * Note that OpenCL selects the highest 1.x version by default, @@ -1948,10 +1951,36 @@ string OpenCLDevice::kernel_build_options(const string *debug_src) int version_major, version_minor; if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) { if (version_major >= 2) { - build_options += "-cl-std=CL2.0 "; + /* This appears to trigger a driver bug in Radeon RX cards with certain + * driver version, so don't use OpenCL 2.0 for those. */ + string device_name = OpenCLInfo::get_readable_device_name(cdDevice); + if (string_startswith(device_name, "Radeon RX 4") || + string_startswith(device_name, "Radeon (TM) RX 4") || + string_startswith(device_name, "Radeon RX 5") || + string_startswith(device_name, "Radeon (TM) RX 5")) { + char version[256] = ""; + int driver_major, driver_minor; + clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL); + if (sscanf(version, "OpenCL 2.0 AMD-APP (%d.%d)", &driver_major, &driver_minor) == 2) { + return !(driver_major == 3075 && driver_minor <= 12); + } + } + + return true; } } + return false; +} + +string OpenCLDevice::kernel_build_options(const string *debug_src) +{ + string build_options = "-cl-no-signed-zeros -cl-mad-enable "; + + if (kernel_build_opencl_2(cdDevice)) { + build_options += "-cl-std=CL2.0 "; + } + if (platform_name == "NVIDIA CUDA") { build_options += "-D__KERNEL_OPENCL_NVIDIA__ " |