Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/opencl/opencl_split.cpp')
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp74
1 files changed, 68 insertions, 6 deletions
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 76dcbd6fc9a..76d9983e9a2 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -25,6 +25,7 @@
#include "device/device_split_kernel.h"
+#include "util/util_algorithm.h"
#include "util/util_logging.h"
#include "util/util_md5.h"
#include "util/util_path.h"
@@ -176,17 +177,62 @@ protected:
friend class OpenCLSplitKernelFunction;
};
+struct CachedSplitMemory {
+ int id;
+ device_memory *split_data;
+ device_memory *ray_state;
+ device_ptr *rng_state;
+ device_memory *queue_index;
+ device_memory *use_queues_flag;
+ device_memory *work_pools;
+ device_ptr *buffer;
+};
+
class OpenCLSplitKernelFunction : public SplitKernelFunction {
public:
OpenCLDeviceSplitKernel* device;
OpenCLDeviceBase::OpenCLProgram program;
+ CachedSplitMemory& cached_memory;
+ int cached_id;
+
+ OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device, CachedSplitMemory& cached_memory) :
+ device(device), cached_memory(cached_memory), cached_id(cached_memory.id-1)
+ {
+ }
- OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device) : device(device) {}
- ~OpenCLSplitKernelFunction() { program.release(); }
+ ~OpenCLSplitKernelFunction()
+ {
+ program.release();
+ }
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data)
{
- device->kernel_set_args(program(), 0, kg, data);
+ if(cached_id != cached_memory.id) {
+ cl_uint start_arg_index =
+ device->kernel_set_args(program(),
+ 0,
+ kg,
+ data,
+ *cached_memory.split_data,
+ *cached_memory.ray_state,
+ *cached_memory.rng_state);
+
+/* TODO(sergey): Avoid map lookup here. */
+#define KERNEL_TEX(type, ttype, name) \
+ device->set_kernel_arg_mem(program(), &start_arg_index, #name);
+#include "kernel/kernel_textures.h"
+#undef KERNEL_TEX
+
+ start_arg_index +=
+ device->kernel_set_args(program(),
+ start_arg_index,
+ *cached_memory.queue_index,
+ *cached_memory.use_queues_flag,
+ *cached_memory.work_pools,
+ *cached_memory.buffer);
+
+ cached_id = cached_memory.id;
+ }
device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
program(),
@@ -213,14 +259,15 @@ public:
class OpenCLSplitKernel : public DeviceSplitKernel {
OpenCLDeviceSplitKernel *device;
+ CachedSplitMemory cached_memory;
public:
explicit OpenCLSplitKernel(OpenCLDeviceSplitKernel *device) : DeviceSplitKernel(device), device(device) {
}
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name,
+ virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
const DeviceRequestedFeatures& requested_features)
{
- OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device);
+ OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device, cached_memory);
bool single_program = OpenCLInfo::use_single_program();
kernel->program =
@@ -349,6 +396,15 @@ public:
return false;
}
+ cached_memory.split_data = &split_data;
+ cached_memory.ray_state = &ray_state;
+ cached_memory.rng_state = &rtile.rng_state;
+ cached_memory.queue_index = &queue_index;
+ cached_memory.use_queues_flag = &use_queues_flag;
+ cached_memory.work_pools = &work_pool_wgs;
+ cached_memory.buffer = &rtile.buffer;
+ cached_memory.id++;
+
return true;
}
@@ -368,12 +424,18 @@ public:
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
+
+ if(DebugFlags().opencl.mem_limit) {
+ max_buffer_size = min(max_buffer_size,
+ cl_ulong(DebugFlags().opencl.mem_limit - device->stats.mem_used));
+ }
+
VLOG(1) << "Maximum device allocation size: "
<< string_human_readable_number(max_buffer_size) << " bytes. ("
<< string_human_readable_size(max_buffer_size) << ").";
size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size / 2);
- int2 global_size = make_int2(round_down((int)sqrt(num_elements), 64), (int)sqrt(num_elements));
+ int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64), (int)sqrt(num_elements));
VLOG(1) << "Global size: " << global_size << ".";
return global_size;
}