From 2ba7c3aa650c3c795d903a24998204f67c75b017 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Wed, 13 Oct 2021 19:13:35 +0200 Subject: Cleanup: refactor to make number of channels for shader evaluation variable --- intern/cycles/integrator/shader_eval.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'intern/cycles/integrator/shader_eval.cpp') diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp index a14e41ec5be..53546c03872 100644 --- a/intern/cycles/integrator/shader_eval.cpp +++ b/intern/cycles/integrator/shader_eval.cpp @@ -34,9 +34,10 @@ ShaderEval::ShaderEval(Device *device, Progress &progress) : device_(device), pr } bool ShaderEval::eval(const ShaderEvalType type, - const int max_num_points, + const int max_num_inputs, + const int num_channels, const function &)> &fill_input, - const function &)> &read_output) + const function &)> &read_output) { bool first_device = true; bool success = true; @@ -50,26 +51,27 @@ bool ShaderEval::eval(const ShaderEvalType type, first_device = false; device_vector input(device, "ShaderEval input", MEM_READ_ONLY); - device_vector output(device, "ShaderEval output", MEM_READ_WRITE); + device_vector output(device, "ShaderEval output", MEM_READ_WRITE); /* Allocate and copy device buffers. */ DCHECK_EQ(input.device, device); DCHECK_EQ(output.device, device); DCHECK_LE(output.size(), input.size()); - input.alloc(max_num_points); + input.alloc(max_num_inputs); int num_points = fill_input(input); if (num_points == 0) { return; } input.copy_to_device(); - output.alloc(num_points); + output.alloc(num_points * num_channels); output.zero_to_device(); /* Evaluate on CPU or GPU. */ - success = (device->info.type == DEVICE_CPU) ? eval_cpu(device, type, input, output) : - eval_gpu(device, type, input, output); + success = (device->info.type == DEVICE_CPU) ? + eval_cpu(device, type, input, output, num_points) : + eval_gpu(device, type, input, output, num_points); /* Copy data back from device if not canceled. */ if (success) { @@ -87,7 +89,8 @@ bool ShaderEval::eval(const ShaderEvalType type, bool ShaderEval::eval_cpu(Device *device, const ShaderEvalType type, device_vector &input, - device_vector &output) + device_vector &output, + const int64_t work_size) { vector kernel_thread_globals; device->get_cpu_kernel_thread_globals(kernel_thread_globals); @@ -96,9 +99,8 @@ bool ShaderEval::eval_cpu(Device *device, const CPUKernels &kernels = *(device->get_cpu_kernels()); /* Simple parallel_for over all work items. */ - const int64_t work_size = output.size(); KernelShaderEvalInput *input_data = input.data(); - float4 *output_data = output.data(); + float *output_data = output.data(); bool success = true; tbb::task_arena local_arena(device->info.cpu_threads); @@ -130,7 +132,8 @@ bool ShaderEval::eval_cpu(Device *device, bool ShaderEval::eval_gpu(Device *device, const ShaderEvalType type, device_vector &input, - device_vector &output) + device_vector &output, + const int64_t work_size) { /* Find required kernel function. */ DeviceKernel kernel; @@ -151,7 +154,6 @@ bool ShaderEval::eval_gpu(Device *device, * TODO : query appropriate size from device.*/ const int64_t chunk_size = 65536; - const int64_t work_size = output.size(); void *d_input = (void *)input.device_pointer; void *d_output = (void *)output.device_pointer; -- cgit v1.2.3