diff options
Diffstat (limited to 'intern/cycles/integrator/shader_eval.cpp')
-rw-r--r-- | intern/cycles/integrator/shader_eval.cpp | 173 |
1 files changed, 173 insertions, 0 deletions
diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp new file mode 100644 index 00000000000..465b4a8d4da --- /dev/null +++ b/intern/cycles/integrator/shader_eval.cpp @@ -0,0 +1,173 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "integrator/shader_eval.h" + +#include "device/device.h" +#include "device/device_queue.h" + +#include "device/cpu/kernel.h" +#include "device/cpu/kernel_thread_globals.h" + +#include "util/util_logging.h" +#include "util/util_progress.h" +#include "util/util_tbb.h" + +CCL_NAMESPACE_BEGIN + +ShaderEval::ShaderEval(Device *device, Progress &progress) : device_(device), progress_(progress) +{ + DCHECK_NE(device_, nullptr); +} + +bool ShaderEval::eval(const ShaderEvalType type, + const int max_num_points, + const function<int(device_vector<KernelShaderEvalInput> &)> &fill_input, + const function<void(device_vector<float4> &)> &read_output) +{ + bool first_device = true; + bool success = true; + + device_->foreach_device([&](Device *device) { + if (!first_device) { + LOG(ERROR) << "Multi-devices are not yet fully implemented, will evaluate shader on a " + "single device."; + return; + } + first_device = false; + + device_vector<KernelShaderEvalInput> input(device, "ShaderEval input", MEM_READ_ONLY); + device_vector<float4> output(device, "ShaderEval output", MEM_READ_WRITE); + + /* Allocate and copy device buffers. */ + DCHECK_EQ(input.device, device); + DCHECK_EQ(output.device, device); + DCHECK_LE(output.size(), input.size()); + + input.alloc(max_num_points); + int num_points = fill_input(input); + if (num_points == 0) { + return; + } + + input.copy_to_device(); + output.alloc(num_points); + output.zero_to_device(); + + /* Evaluate on CPU or GPU. */ + success = (device->info.type == DEVICE_CPU) ? eval_cpu(device, type, input, output) : + eval_gpu(device, type, input, output); + + /* Copy data back from device if not cancelled. */ + if (success) { + output.copy_from_device(0, 1, output.size()); + read_output(output); + } + + input.free(); + output.free(); + }); + + return success; +} + +bool ShaderEval::eval_cpu(Device *device, + const ShaderEvalType type, + device_vector<KernelShaderEvalInput> &input, + device_vector<float4> &output) +{ + vector<CPUKernelThreadGlobals> kernel_thread_globals; + device->get_cpu_kernel_thread_globals(kernel_thread_globals); + + /* Find required kernel function. */ + const CPUKernels &kernels = *(device->get_cpu_kernels()); + + /* Simple parallel_for over all work items. */ + const int64_t work_size = output.size(); + KernelShaderEvalInput *input_data = input.data(); + float4 *output_data = output.data(); + bool success = true; + + tbb::task_arena local_arena(device->info.cpu_threads); + local_arena.execute([&]() { + tbb::parallel_for(int64_t(0), work_size, [&](int64_t work_index) { + /* TODO: is this fast enough? */ + if (progress_.get_cancel()) { + success = false; + return; + } + + const int thread_index = tbb::this_task_arena::current_thread_index(); + KernelGlobals *kg = &kernel_thread_globals[thread_index]; + + switch (type) { + case SHADER_EVAL_DISPLACE: + kernels.shader_eval_displace(kg, input_data, output_data, work_index); + break; + case SHADER_EVAL_BACKGROUND: + kernels.shader_eval_background(kg, input_data, output_data, work_index); + break; + } + }); + }); + + return success; +} + +bool ShaderEval::eval_gpu(Device *device, + const ShaderEvalType type, + device_vector<KernelShaderEvalInput> &input, + device_vector<float4> &output) +{ + /* Find required kernel function. */ + DeviceKernel kernel; + switch (type) { + case SHADER_EVAL_DISPLACE: + kernel = DEVICE_KERNEL_SHADER_EVAL_DISPLACE; + break; + case SHADER_EVAL_BACKGROUND: + kernel = DEVICE_KERNEL_SHADER_EVAL_BACKGROUND; + break; + }; + + /* Create device queue. */ + unique_ptr<DeviceQueue> queue = device->gpu_queue_create(); + queue->init_execution(); + + /* Execute work on GPU in chunk, so we can cancel. + * TODO : query appropriate size from device.*/ + const int chunk_size = 65536; + + const int work_size = output.size(); + void *d_input = (void *)input.device_pointer; + void *d_output = (void *)output.device_pointer; + + for (int d_offset = 0; d_offset < work_size; d_offset += chunk_size) { + int d_work_size = min(chunk_size, work_size - d_offset); + void *args[] = {&d_input, &d_output, &d_offset, &d_work_size}; + + queue->enqueue(kernel, d_work_size, args); + queue->synchronize(); + + if (progress_.get_cancel()) { + return false; + } + } + + return true; +} + +CCL_NAMESPACE_END |