Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Klimenko <peterklimk@outlook.com>2020-07-31 11:45:15 +0300
committerPeter Klimenko <peterklimk@outlook.com>2020-07-31 11:45:15 +0300
commit97a4a8d0fb7fd9ac34f9f5d4d5a0689c01235e14 (patch)
treefc9746d2210eda08be9d44ae67d5e58d64b48b40 /intern/cycles/device/opencl
parent4a7c203e9ecc7c5b0370afc0fdd6bcc183dc00df (diff)
parentf3e8326453ae856d7914e45e832a2ed80aa9a9b9 (diff)
merge
Diffstat (limited to 'intern/cycles/device/opencl')
-rw-r--r--intern/cycles/device/opencl/device_opencl.h18
-rw-r--r--intern/cycles/device/opencl/device_opencl_impl.cpp105
-rw-r--r--intern/cycles/device/opencl/memory_manager.cpp5
3 files changed, 79 insertions, 49 deletions
diff --git a/intern/cycles/device/opencl/device_opencl.h b/intern/cycles/device/opencl/device_opencl.h
index 389268e1c2a..e0140996cf0 100644
--- a/intern/cycles/device/opencl/device_opencl.h
+++ b/intern/cycles/device/opencl/device_opencl.h
@@ -23,6 +23,7 @@
# include "util/util_map.h"
# include "util/util_param.h"
# include "util/util_string.h"
+# include "util/util_task.h"
# include "clew.h"
@@ -258,6 +259,8 @@ class OpenCLDevice : public Device {
TaskPool load_required_kernel_task_pool;
/* Task pool for optional kernels (feature kernels during foreground rendering) */
TaskPool load_kernel_task_pool;
+ std::atomic<int> load_kernel_num_compiling;
+
cl_context cxContext;
cl_command_queue cqCommandQueue;
cl_platform_id cpPlatform;
@@ -455,14 +458,6 @@ class OpenCLDevice : public Device {
void denoise(RenderTile &tile, DenoisingTask &denoising);
- class OpenCLDeviceTask : public DeviceTask {
- public:
- OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&OpenCLDevice::thread_run, device, this);
- }
- };
-
int get_split_task_count(DeviceTask & /*task*/)
{
return 1;
@@ -470,7 +465,10 @@ class OpenCLDevice : public Device {
void task_add(DeviceTask &task)
{
- task_pool.push(new OpenCLDeviceTask(this, task));
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy);
+ });
}
void task_wait()
@@ -483,7 +481,7 @@ class OpenCLDevice : public Device {
task_pool.cancel();
}
- void thread_run(DeviceTask *task);
+ void thread_run(DeviceTask &task);
virtual BVHLayoutMask get_bvh_layout_mask() const
{
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index beb3174b111..e851749949d 100644
--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -542,7 +542,7 @@ class OpenCLSplitKernel : public DeviceSplitKernel {
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
- DeviceTask * /*task*/)
+ DeviceTask & /*task*/)
{
cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice);
/* Use small global size on CPU devices as it seems to be much faster. */
@@ -610,6 +610,7 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char *where)
OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
: Device(info, stats, profiler, background),
+ load_kernel_num_compiling(0),
kernel_programs(this),
preview_programs(this),
memory_manager(this),
@@ -684,9 +685,9 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
OpenCLDevice::~OpenCLDevice()
{
- task_pool.stop();
- load_required_kernel_task_pool.stop();
- load_kernel_task_pool.stop();
+ task_pool.cancel();
+ load_required_kernel_task_pool.cancel();
+ load_kernel_task_pool.cancel();
memory_manager.free();
@@ -798,7 +799,11 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_feature
* internally within a single process. */
foreach (OpenCLProgram *program, programs) {
if (!program->load()) {
- load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+ load_kernel_num_compiling++;
+ load_kernel_task_pool.push([=] {
+ program->compile();
+ load_kernel_num_compiling--;
+ });
}
}
return true;
@@ -868,7 +873,7 @@ bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requeste
* Better to check on device level than per kernel as mixing preview and
* non-preview kernels does not work due to different data types */
if (use_preview_kernels) {
- use_preview_kernels = !load_kernel_task_pool.finished();
+ use_preview_kernels = load_kernel_num_compiling.load() > 0;
}
}
return split_kernel->load_kernels(requested_features);
@@ -895,7 +900,7 @@ DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state()
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
- bool other_kernels_finished = load_kernel_task_pool.finished();
+ bool other_kernels_finished = load_kernel_num_compiling.load() == 0;
if (use_preview_kernels) {
if (other_kernels_finished) {
return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE;
@@ -1336,20 +1341,20 @@ void OpenCLDevice::flush_texture_buffers()
memory_manager.alloc("texture_info", texture_info);
}
-void OpenCLDevice::thread_run(DeviceTask *task)
+void OpenCLDevice::thread_run(DeviceTask &task)
{
flush_texture_buffers();
- if (task->type == DeviceTask::RENDER) {
+ if (task.type == DeviceTask::RENDER) {
RenderTile tile;
- DenoisingTask denoising(this, *task);
+ DenoisingTask denoising(this, task);
/* Allocate buffer for kernel globals */
device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals");
kgbuffer.alloc_to_device(1);
/* Keep rendering tiles until done. */
- while (task->acquire_tile(this, tile, task->tile_types)) {
+ while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
assert(tile.task == RenderTile::PATH_TRACE);
scoped_timer timer(&tile.buffers->render_time);
@@ -1368,42 +1373,42 @@ void OpenCLDevice::thread_run(DeviceTask *task)
clFinish(cqCommandQueue);
}
else if (tile.task == RenderTile::BAKE) {
- bake(*task, tile);
+ bake(task, tile);
}
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
- task->release_tile(tile);
+ task.release_tile(tile);
}
kgbuffer.free();
}
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
+ else if (task.type == DeviceTask::SHADER) {
+ shader(task);
}
- else if (task->type == DeviceTask::FILM_CONVERT) {
- film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
+ else if (task.type == DeviceTask::FILM_CONVERT) {
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
}
- else if (task->type == DeviceTask::DENOISE_BUFFER) {
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
RenderTile tile;
- tile.x = task->x;
- tile.y = task->y;
- tile.w = task->w;
- tile.h = task->h;
- tile.buffer = task->buffer;
- tile.sample = task->sample + task->num_samples;
- tile.num_samples = task->num_samples;
- tile.start_sample = task->sample;
- tile.offset = task->offset;
- tile.stride = task->stride;
- tile.buffers = task->buffers;
-
- DenoisingTask denoising(this, *task);
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ DenoisingTask denoising(this, task);
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
}
@@ -1845,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
denoising.render_buffer.samples = rtile.sample;
denoising.buffer.gpu_temporary_mem = true;
- denoising.run_denoising(&rtile);
+ denoising.run_denoising(rtile);
}
void OpenCLDevice::shader(DeviceTask &task)
@@ -1937,10 +1942,8 @@ void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile)
clFinish(cqCommandQueue);
}
-string OpenCLDevice::kernel_build_options(const string *debug_src)
+static bool kernel_build_opencl_2(cl_device_id cdDevice)
{
- string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
-
/* Build with OpenCL 2.0 if available, this improves performance
* with AMD OpenCL drivers on Windows and Linux (legacy drivers).
* Note that OpenCL selects the highest 1.x version by default,
@@ -1948,10 +1951,36 @@ string OpenCLDevice::kernel_build_options(const string *debug_src)
int version_major, version_minor;
if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) {
if (version_major >= 2) {
- build_options += "-cl-std=CL2.0 ";
+ /* This appears to trigger a driver bug in Radeon RX cards with certain
+ * driver version, so don't use OpenCL 2.0 for those. */
+ string device_name = OpenCLInfo::get_readable_device_name(cdDevice);
+ if (string_startswith(device_name, "Radeon RX 4") ||
+ string_startswith(device_name, "Radeon (TM) RX 4") ||
+ string_startswith(device_name, "Radeon RX 5") ||
+ string_startswith(device_name, "Radeon (TM) RX 5")) {
+ char version[256] = "";
+ int driver_major, driver_minor;
+ clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL);
+ if (sscanf(version, "OpenCL 2.0 AMD-APP (%d.%d)", &driver_major, &driver_minor) == 2) {
+ return !(driver_major == 3075 && driver_minor <= 12);
+ }
+ }
+
+ return true;
}
}
+ return false;
+}
+
+string OpenCLDevice::kernel_build_options(const string *debug_src)
+{
+ string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
+
+ if (kernel_build_opencl_2(cdDevice)) {
+ build_options += "-cl-std=CL2.0 ";
+ }
+
if (platform_name == "NVIDIA CUDA") {
build_options +=
"-D__KERNEL_OPENCL_NVIDIA__ "
diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp
index fedb3ea8c6a..0285dc969ec 100644
--- a/intern/cycles/device/opencl/memory_manager.cpp
+++ b/intern/cycles/device/opencl/memory_manager.cpp
@@ -64,6 +64,9 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
total_size += alloc_size;
}
+ /* Always allocate non-empty buffer, NULL pointers cause problems with some drivers. */
+ total_size = max(total_size, 16);
+
if (need_realloc) {
cl_ulong max_buffer_size;
clGetDeviceInfo(
@@ -251,7 +254,7 @@ void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer);
}
else {
- device->kernel_set_args(kernel, (*narg)++, 0);
+ device->kernel_set_args(kernel, (*narg)++);
}
}
}