Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorMai Lavelle <mai.lavelle@gmail.com>2017-01-26 09:41:48 +0300
committerMai Lavelle <mai.lavelle@gmail.com>2017-03-08 08:52:41 +0300
commit520b53364c73c75c4ff400d639dad13630f0e6fc (patch)
tree8ecdc8d47cb3d604b42d955840b9a3d38738946b /intern
parentdfd6055eb0798c8cba245d47cd4a3fe51270a52a (diff)
Cycles: Add OpenCL kernel for zeroing memory buffers
Transferring memory to the device was very slow and there's really no need when only zeroing a buffer.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp54
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl16
2 files changed, 69 insertions, 1 deletions
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index c3f099b795e..7fa14eee70c 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -206,6 +206,7 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
base_program.add_kernel(ustring("convert_to_half_float"));
base_program.add_kernel(ustring("shader"));
base_program.add_kernel(ustring("bake"));
+ base_program.add_kernel(ustring("zero_buffer"));
vector<OpenCLProgram*> programs;
programs.push_back(&base_program);
@@ -311,10 +312,61 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, in
void OpenCLDeviceBase::mem_zero(device_memory& mem)
{
if(mem.device_pointer) {
+ if(base_program.is_loaded()) {
+ cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer"));
+
+ size_t global_size[] = {1024, 1024};
+ size_t num_threads = global_size[0] * global_size[1];
+
+ cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
+ unsigned long long d_offset = 0;
+ unsigned long long d_size = 0;
+
+ while(d_offset < mem.memory_size()) {
+ d_size = std::min<unsigned long long>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
+
+ kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
+
+ ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
+ ckZeroBuffer,
+ 2,
+ NULL,
+ global_size,
+ NULL,
+ 0,
+ NULL,
+ NULL);
+ opencl_assert_err(ciErr, "clEnqueueNDRangeKernel");
+
+ d_offset += d_size;
+ }
+ }
+
if(mem.data_pointer) {
memset((void*)mem.data_pointer, 0, mem.memory_size());
}
- mem_copy_to(mem);
+
+ if(!base_program.is_loaded()) {
+ void* zero = (void*)mem.data_pointer;
+
+ if(!mem.data_pointer) {
+ zero = util_aligned_malloc(mem.memory_size(), 16);
+ memset(zero, 0, mem.memory_size());
+ }
+
+ opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
+ CL_MEM_PTR(mem.device_pointer),
+ CL_TRUE,
+ 0,
+ mem.memory_size(),
+ zero,
+ 0,
+ NULL, NULL));
+
+ if(!mem.data_pointer) {
+ util_aligned_free(zero);
+ }
+ }
}
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index a68f97857b6..e501fd4f015 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -193,4 +193,20 @@ __kernel void kernel_ocl_convert_to_half_float(
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
+__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, ulong size, ulong offset)
+{
+ size_t i = get_global_id(0) + get_global_id(1) * get_global_size(0);
+
+ if(i < size / sizeof(float4)) {
+ buffer[i+offset/sizeof(float4)] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ else if(i == size / sizeof(float4)) {
+ ccl_global uchar *b = (ccl_global uchar*)&buffer[i+offset/sizeof(float4)];
+
+ for(i = 0; i < size % sizeof(float4); i++) {
+ *(b++) = 0;
+ }
+ }
+}
+
#endif /* __COMPILE_ONLY_MEGAKERNEL__ */