Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/opencl/device_opencl_impl.cpp')
-rw-r--r--intern/cycles/device/opencl/device_opencl_impl.cpp105
1 files changed, 67 insertions, 38 deletions
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index beb3174b111..e851749949d 100644
--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -542,7 +542,7 @@ class OpenCLSplitKernel : public DeviceSplitKernel {
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
- DeviceTask * /*task*/)
+ DeviceTask & /*task*/)
{
cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice);
/* Use small global size on CPU devices as it seems to be much faster. */
@@ -610,6 +610,7 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char *where)
OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
: Device(info, stats, profiler, background),
+ load_kernel_num_compiling(0),
kernel_programs(this),
preview_programs(this),
memory_manager(this),
@@ -684,9 +685,9 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
OpenCLDevice::~OpenCLDevice()
{
- task_pool.stop();
- load_required_kernel_task_pool.stop();
- load_kernel_task_pool.stop();
+ task_pool.cancel();
+ load_required_kernel_task_pool.cancel();
+ load_kernel_task_pool.cancel();
memory_manager.free();
@@ -798,7 +799,11 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_feature
* internally within a single process. */
foreach (OpenCLProgram *program, programs) {
if (!program->load()) {
- load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+ load_kernel_num_compiling++;
+ load_kernel_task_pool.push([=] {
+ program->compile();
+ load_kernel_num_compiling--;
+ });
}
}
return true;
@@ -868,7 +873,7 @@ bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requeste
* Better to check on device level than per kernel as mixing preview and
* non-preview kernels does not work due to different data types */
if (use_preview_kernels) {
- use_preview_kernels = !load_kernel_task_pool.finished();
+ use_preview_kernels = load_kernel_num_compiling.load() > 0;
}
}
return split_kernel->load_kernels(requested_features);
@@ -895,7 +900,7 @@ DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state()
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
- bool other_kernels_finished = load_kernel_task_pool.finished();
+ bool other_kernels_finished = load_kernel_num_compiling.load() == 0;
if (use_preview_kernels) {
if (other_kernels_finished) {
return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE;
@@ -1336,20 +1341,20 @@ void OpenCLDevice::flush_texture_buffers()
memory_manager.alloc("texture_info", texture_info);
}
-void OpenCLDevice::thread_run(DeviceTask *task)
+void OpenCLDevice::thread_run(DeviceTask &task)
{
flush_texture_buffers();
- if (task->type == DeviceTask::RENDER) {
+ if (task.type == DeviceTask::RENDER) {
RenderTile tile;
- DenoisingTask denoising(this, *task);
+ DenoisingTask denoising(this, task);
/* Allocate buffer for kernel globals */
device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals");
kgbuffer.alloc_to_device(1);
/* Keep rendering tiles until done. */
- while (task->acquire_tile(this, tile, task->tile_types)) {
+ while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
assert(tile.task == RenderTile::PATH_TRACE);
scoped_timer timer(&tile.buffers->render_time);
@@ -1368,42 +1373,42 @@ void OpenCLDevice::thread_run(DeviceTask *task)
clFinish(cqCommandQueue);
}
else if (tile.task == RenderTile::BAKE) {
- bake(*task, tile);
+ bake(task, tile);
}
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
- task->release_tile(tile);
+ task.release_tile(tile);
}
kgbuffer.free();
}
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
+ else if (task.type == DeviceTask::SHADER) {
+ shader(task);
}
- else if (task->type == DeviceTask::FILM_CONVERT) {
- film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
+ else if (task.type == DeviceTask::FILM_CONVERT) {
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
}
- else if (task->type == DeviceTask::DENOISE_BUFFER) {
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
RenderTile tile;
- tile.x = task->x;
- tile.y = task->y;
- tile.w = task->w;
- tile.h = task->h;
- tile.buffer = task->buffer;
- tile.sample = task->sample + task->num_samples;
- tile.num_samples = task->num_samples;
- tile.start_sample = task->sample;
- tile.offset = task->offset;
- tile.stride = task->stride;
- tile.buffers = task->buffers;
-
- DenoisingTask denoising(this, *task);
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ DenoisingTask denoising(this, task);
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
}
@@ -1845,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
denoising.render_buffer.samples = rtile.sample;
denoising.buffer.gpu_temporary_mem = true;
- denoising.run_denoising(&rtile);
+ denoising.run_denoising(rtile);
}
void OpenCLDevice::shader(DeviceTask &task)
@@ -1937,10 +1942,8 @@ void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile)
clFinish(cqCommandQueue);
}
-string OpenCLDevice::kernel_build_options(const string *debug_src)
+static bool kernel_build_opencl_2(cl_device_id cdDevice)
{
- string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
-
/* Build with OpenCL 2.0 if available, this improves performance
* with AMD OpenCL drivers on Windows and Linux (legacy drivers).
* Note that OpenCL selects the highest 1.x version by default,
@@ -1948,10 +1951,36 @@ string OpenCLDevice::kernel_build_options(const string *debug_src)
int version_major, version_minor;
if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) {
if (version_major >= 2) {
- build_options += "-cl-std=CL2.0 ";
+ /* This appears to trigger a driver bug in Radeon RX cards with certain
+ * driver version, so don't use OpenCL 2.0 for those. */
+ string device_name = OpenCLInfo::get_readable_device_name(cdDevice);
+ if (string_startswith(device_name, "Radeon RX 4") ||
+ string_startswith(device_name, "Radeon (TM) RX 4") ||
+ string_startswith(device_name, "Radeon RX 5") ||
+ string_startswith(device_name, "Radeon (TM) RX 5")) {
+ char version[256] = "";
+ int driver_major, driver_minor;
+ clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL);
+ if (sscanf(version, "OpenCL 2.0 AMD-APP (%d.%d)", &driver_major, &driver_minor) == 2) {
+ return !(driver_major == 3075 && driver_minor <= 12);
+ }
+ }
+
+ return true;
}
}
+ return false;
+}
+
+string OpenCLDevice::kernel_build_options(const string *debug_src)
+{
+ string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
+
+ if (kernel_build_opencl_2(cdDevice)) {
+ build_options += "-cl-std=CL2.0 ";
+ }
+
if (platform_name == "NVIDIA CUDA") {
build_options +=
"-D__KERNEL_OPENCL_NVIDIA__ "