diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-12-30 17:04:01 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-12-30 17:04:04 +0300 |
commit | 3918c8b9a52ae9dcdb0488df92d7d3ca615be8c7 (patch) | |
tree | 3740e477610ab3ed020d505cd98308d29f663f5b /intern/cycles/device | |
parent | c8a551bf13edf711b93ea89cd3fcd244e4206cee (diff) |
Cycles: Optionally output luminance from the shader evaluation kernel
This makes it possible to move some parts of evaluation from host to the device
and hopefully reduce memory usage by avoid having full RGBA buffer on the host.
Reviewers: juicyfruit, lukasstockner97, brecht
Reviewed By: lukasstockner97, brecht
Differential Revision: https://developer.blender.org/D1702
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 12 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 20 | ||||
-rw-r--r-- | intern/cycles/device/device_multi.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/device/device_network.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/device/device_network.h | 4 | ||||
-rw-r--r-- | intern/cycles/device/device_opencl.cpp | 7 | ||||
-rw-r--r-- | intern/cycles/device/device_task.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_task.h | 2 |
8 files changed, 37 insertions, 14 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index f06963c146e..832f4d1c1fd 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -343,7 +343,7 @@ public: #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif - void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int); + void(*shader_kernel)(KernelGlobals*, uint4*, float4*, float*, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 if(system_cpu_support_avx2()) @@ -374,8 +374,14 @@ public: for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, - task.shader_eval_type, x, task.offset, sample); + shader_kernel(&kg, + (uint4*)task.shader_input, + (float4*)task.shader_output, + (float*)task.shader_output_luma, + task.shader_eval_type, + x, + task.offset, + sample); if(task.get_cancel() || task_pool.canceled()) break; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index d9d6fd77ecb..5c9ca3454c6 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -726,6 +726,7 @@ public: CUfunction cuShader; CUdeviceptr d_input = cuda_device_ptr(task.shader_input); CUdeviceptr d_output = cuda_device_ptr(task.shader_output); + CUdeviceptr d_output_luma = cuda_device_ptr(task.shader_output_luma); /* get kernel function */ if(task.shader_eval_type >= SHADER_EVAL_BAKE) { @@ -747,13 +748,18 @@ public: int shader_w = min(shader_chunk_size, end - shader_x); /* pass in parameters */ - void *args[] = {&d_input, - &d_output, - &task.shader_eval_type, - &shader_x, - &shader_w, - &offset, - &sample}; + void *args[8]; + int arg = 0; + args[arg++] = &d_input; + args[arg++] = &d_output; + if(task.shader_eval_type < SHADER_EVAL_BAKE) { + args[arg++] = &d_output_luma; + } + args[arg++] = &task.shader_eval_type; + args[arg++] = &shader_x; + args[arg++] = &shader_w; + args[arg++] = &offset; + args[arg++] = &sample; /* launch kernel */ int threads_per_block; diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 8fb841b2b0d..069305e8a29 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -316,6 +316,7 @@ public: if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half]; if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input]; if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output]; + if(task.shader_output_luma) subtask.shader_output_luma = sub.ptr_map[task.shader_output_luma]; sub.device->task_add(subtask); } diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index afa35224aba..23faa61e8e5 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -648,6 +648,9 @@ protected: if(task.shader_output) task.shader_output = device_ptr_from_client_pointer(task.shader_output); + if(task.shader_output)luma) + task.shader_output_luma = device_ptr_from_client_pointer(task.shader_output_luma); + task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2); task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1); diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h index 2e751f6697f..60ecc1d0a86 100644 --- a/intern/cycles/device/device_network.h +++ b/intern/cycles/device/device_network.h @@ -132,7 +132,7 @@ public: archive & type & task.x & task.y & task.w & task.h; archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples; archive & task.offset & task.stride; - archive & task.shader_input & task.shader_output & task.shader_eval_type; + archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type; archive & task.shader_x & task.shader_w; archive & task.need_finish_queue; } @@ -291,7 +291,7 @@ public: *archive & type & task.x & task.y & task.w & task.h; *archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples; *archive & task.offset & task.stride; - *archive & task.shader_input & task.shader_output & task.shader_eval_type; + *archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type; *archive & task.shader_x & task.shader_w; *archive & task.need_finish_queue; diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index e0c602461ed..a1743f53831 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -1304,6 +1304,7 @@ public: cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); cl_mem d_input = CL_MEM_PTR(task.shader_input); cl_mem d_output = CL_MEM_PTR(task.shader_output); + cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma); cl_int d_shader_eval_type = task.shader_eval_type; cl_int d_shader_x = task.shader_x; cl_int d_shader_w = task.shader_w; @@ -1330,6 +1331,12 @@ public: d_input, d_output); + if(task.shader_eval_type < SHADER_EVAL_BAKE) { + start_arg_index += kernel_set_args(kernel, + start_arg_index, + d_output_luma); + } + #define KERNEL_TEX(type, ttype, name) \ set_kernel_arg_mem(kernel, &start_arg_index, #name); #include "kernel_textures.h" diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp index d527540f300..0cae118a692 100644 --- a/intern/cycles/device/device_task.cpp +++ b/intern/cycles/device/device_task.cpp @@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN DeviceTask::DeviceTask(Type type_) : type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0), sample(0), num_samples(1), - shader_input(0), shader_output(0), + shader_input(0), shader_output(0), shader_output_luma(0), shader_eval_type(0), shader_x(0), shader_w(0) { last_update_time = time_dt(); diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h index 834ea60988a..7654508d4a5 100644 --- a/intern/cycles/device/device_task.h +++ b/intern/cycles/device/device_task.h @@ -46,7 +46,7 @@ public: int offset, stride; device_ptr shader_input; - device_ptr shader_output; + device_ptr shader_output, shader_output_luma; int shader_eval_type; int shader_x, shader_w; |