diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-10-13 20:13:35 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-10-15 16:42:44 +0300 |
commit | 2ba7c3aa650c3c795d903a24998204f67c75b017 (patch) | |
tree | ef80c7cadbe59d1062dd75818baad4d8ad594bcb /intern | |
parent | 70376154a0b09dc05fcc5bd79c33fdf7c6acbd9a (diff) |
Cleanup: refactor to make number of channels for shader evaluation variable
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/cpu/kernel.h | 2 | ||||
-rw-r--r-- | intern/cycles/integrator/shader_eval.cpp | 26 | ||||
-rw-r--r-- | intern/cycles/integrator/shader_eval.h | 11 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cpu/kernel_arch.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cpu/kernel_arch_impl.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/integrator_intersect_shadow.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_bake.h | 12 | ||||
-rw-r--r-- | intern/cycles/render/light.cpp | 12 | ||||
-rw-r--r-- | intern/cycles/render/mesh_displace.cpp | 11 |
10 files changed, 53 insertions, 36 deletions
diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h index 54b18308544..b5f0d873f30 100644 --- a/intern/cycles/device/cpu/kernel.h +++ b/intern/cycles/device/cpu/kernel.h @@ -54,7 +54,7 @@ class CPUKernels { /* Shader evaluation. */ using ShaderEvalFunction = CPUKernelFunction<void (*)( - const KernelGlobals *kg, const KernelShaderEvalInput *, float4 *, const int)>; + const KernelGlobals *kg, const KernelShaderEvalInput *, float *, const int)>; ShaderEvalFunction shader_eval_displace; ShaderEvalFunction shader_eval_background; diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp index a14e41ec5be..53546c03872 100644 --- a/intern/cycles/integrator/shader_eval.cpp +++ b/intern/cycles/integrator/shader_eval.cpp @@ -34,9 +34,10 @@ ShaderEval::ShaderEval(Device *device, Progress &progress) : device_(device), pr } bool ShaderEval::eval(const ShaderEvalType type, - const int max_num_points, + const int max_num_inputs, + const int num_channels, const function<int(device_vector<KernelShaderEvalInput> &)> &fill_input, - const function<void(device_vector<float4> &)> &read_output) + const function<void(device_vector<float> &)> &read_output) { bool first_device = true; bool success = true; @@ -50,26 +51,27 @@ bool ShaderEval::eval(const ShaderEvalType type, first_device = false; device_vector<KernelShaderEvalInput> input(device, "ShaderEval input", MEM_READ_ONLY); - device_vector<float4> output(device, "ShaderEval output", MEM_READ_WRITE); + device_vector<float> output(device, "ShaderEval output", MEM_READ_WRITE); /* Allocate and copy device buffers. */ DCHECK_EQ(input.device, device); DCHECK_EQ(output.device, device); DCHECK_LE(output.size(), input.size()); - input.alloc(max_num_points); + input.alloc(max_num_inputs); int num_points = fill_input(input); if (num_points == 0) { return; } input.copy_to_device(); - output.alloc(num_points); + output.alloc(num_points * num_channels); output.zero_to_device(); /* Evaluate on CPU or GPU. */ - success = (device->info.type == DEVICE_CPU) ? eval_cpu(device, type, input, output) : - eval_gpu(device, type, input, output); + success = (device->info.type == DEVICE_CPU) ? + eval_cpu(device, type, input, output, num_points) : + eval_gpu(device, type, input, output, num_points); /* Copy data back from device if not canceled. */ if (success) { @@ -87,7 +89,8 @@ bool ShaderEval::eval(const ShaderEvalType type, bool ShaderEval::eval_cpu(Device *device, const ShaderEvalType type, device_vector<KernelShaderEvalInput> &input, - device_vector<float4> &output) + device_vector<float> &output, + const int64_t work_size) { vector<CPUKernelThreadGlobals> kernel_thread_globals; device->get_cpu_kernel_thread_globals(kernel_thread_globals); @@ -96,9 +99,8 @@ bool ShaderEval::eval_cpu(Device *device, const CPUKernels &kernels = *(device->get_cpu_kernels()); /* Simple parallel_for over all work items. */ - const int64_t work_size = output.size(); KernelShaderEvalInput *input_data = input.data(); - float4 *output_data = output.data(); + float *output_data = output.data(); bool success = true; tbb::task_arena local_arena(device->info.cpu_threads); @@ -130,7 +132,8 @@ bool ShaderEval::eval_cpu(Device *device, bool ShaderEval::eval_gpu(Device *device, const ShaderEvalType type, device_vector<KernelShaderEvalInput> &input, - device_vector<float4> &output) + device_vector<float> &output, + const int64_t work_size) { /* Find required kernel function. */ DeviceKernel kernel; @@ -151,7 +154,6 @@ bool ShaderEval::eval_gpu(Device *device, * TODO : query appropriate size from device.*/ const int64_t chunk_size = 65536; - const int64_t work_size = output.size(); void *d_input = (void *)input.device_pointer; void *d_output = (void *)output.device_pointer; diff --git a/intern/cycles/integrator/shader_eval.h b/intern/cycles/integrator/shader_eval.h index 7dbf334b8d7..013fad17d4f 100644 --- a/intern/cycles/integrator/shader_eval.h +++ b/intern/cycles/integrator/shader_eval.h @@ -40,19 +40,22 @@ class ShaderEval { /* Evaluate shader at points specified by KernelShaderEvalInput and write out * RGBA colors to output. */ bool eval(const ShaderEvalType type, - const int max_num_points, + const int max_num_inputs, + const int num_channels, const function<int(device_vector<KernelShaderEvalInput> &)> &fill_input, - const function<void(device_vector<float4> &)> &read_output); + const function<void(device_vector<float> &)> &read_output); protected: bool eval_cpu(Device *device, const ShaderEvalType type, device_vector<KernelShaderEvalInput> &input, - device_vector<float4> &output); + device_vector<float> &output, + const int64_t work_size); bool eval_gpu(Device *device, const ShaderEvalType type, device_vector<KernelShaderEvalInput> &input, - device_vector<float4> &output); + device_vector<float> &output, + const int64_t work_size); Device *device_; Progress &progress_; diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h index 81f328c710b..8b7b0ec0548 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch.h @@ -58,11 +58,11 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel); void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg, const KernelShaderEvalInput *input, - float4 *output, + float *output, const int offset); void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg, const KernelShaderEvalInput *input, - float4 *output, + float *output, const int offset); /* -------------------------------------------------------------------- diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 1432abfd330..23e371f165f 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -114,7 +114,7 @@ DEFINE_INTEGRATOR_SHADE_KERNEL(megakernel) void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg, const KernelShaderEvalInput *input, - float4 *output, + float *output, const int offset) { #ifdef KERNEL_STUB @@ -126,7 +126,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg, void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg, const KernelShaderEvalInput *input, - float4 *output, + float *output, const int offset) { #ifdef KERNEL_STUB diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 3379114fc62..21901215757 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -615,7 +615,7 @@ KERNEL_FILM_CONVERT_DEFINE(float4, rgba) ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) kernel_gpu_shader_eval_displace(KernelShaderEvalInput *input, - float4 *output, + float *output, const int offset, const int work_size) { @@ -629,7 +629,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) kernel_gpu_shader_eval_background(KernelShaderEvalInput *input, - float4 *output, + float *output, const int offset, const int work_size) { diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h index 00d44f0e5ed..3ebd21e4651 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h @@ -85,7 +85,8 @@ ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS, if (num_recorded_hits > 0) { sort_intersections(isect, num_recorded_hits); - /* Write intersection result into global integrator state memory. */ + /* Write intersection result into global integrator state memory. + * More efficient may be to do this directly from the intersection kernel. */ for (int hit = 0; hit < num_recorded_hits; hit++) { integrator_state_write_shadow_isect(INTEGRATOR_STATE_PASS, &isect[hit], hit); } diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index cfff727d007..6cbb8dcc291 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -26,7 +26,7 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg, ccl_global const KernelShaderEvalInput *input, - ccl_global float4 *output, + ccl_global float *output, const int offset) { /* Setup shader data. */ @@ -53,12 +53,14 @@ ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg, D = ensure_finite3(D); /* Write output. */ - output[offset] += make_float4(D.x, D.y, D.z, 0.0f); + output[offset * 3 + 0] += D.x; + output[offset * 3 + 1] += D.y; + output[offset * 3 + 2] += D.z; } ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg, ccl_global const KernelShaderEvalInput *input, - ccl_global float4 *output, + ccl_global float *output, const int offset) { /* Setup ray */ @@ -88,7 +90,9 @@ ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg, color = ensure_finite3(color); /* Write output. */ - output[offset] += make_float4(color.x, color.y, color.z, 0.0f); + output[offset * 3 + 0] += color.x; + output[offset * 3 + 1] += color.y; + output[offset * 3 + 2] += color.z; } CCL_NAMESPACE_END diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index ae1150fc07b..400ed0802a6 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -50,6 +50,7 @@ static void shade_background_pixels(Device *device, device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); const int size = width * height; + const int num_channels = 3; pixels.resize(size); /* Evaluate shader on device. */ @@ -57,6 +58,7 @@ static void shade_background_pixels(Device *device, shader_eval.eval( SHADER_EVAL_BACKGROUND, size, + num_channels, [&](device_vector<KernelShaderEvalInput> &d_input) { /* Fill coordinates for shading. */ KernelShaderEvalInput *d_input_data = d_input.data(); @@ -77,15 +79,15 @@ static void shade_background_pixels(Device *device, return size; }, - [&](device_vector<float4> &d_output) { + [&](device_vector<float> &d_output) { /* Copy output to pixel buffer. */ - float4 *d_output_data = d_output.data(); + float *d_output_data = d_output.data(); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - pixels[y * width + x].x = d_output_data[y * width + x].x; - pixels[y * width + x].y = d_output_data[y * width + x].y; - pixels[y * width + x].z = d_output_data[y * width + x].z; + pixels[y * width + x].x = d_output_data[(y * width + x) * num_channels + 0]; + pixels[y * width + x].y = d_output_data[(y * width + x) * num_channels + 1]; + pixels[y * width + x].z = d_output_data[(y * width + x) * num_channels + 2]; } } }); diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index c00c4c24211..bf8a4585907 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -115,7 +115,7 @@ static int fill_shader_input(const Scene *scene, /* Read back mesh displacement shader output. */ static void read_shader_output(const Scene *scene, Mesh *mesh, - const device_vector<float4> &d_output) + const device_vector<float> &d_output) { const array<int> &mesh_shaders = mesh->get_shader(); const array<Node *> &mesh_used_shaders = mesh->get_used_shaders(); @@ -125,7 +125,7 @@ static void read_shader_output(const Scene *scene, const int num_motion_steps = mesh->get_motion_steps(); vector<bool> done(num_verts, false); - const float4 *d_output_data = d_output.data(); + const float *d_output_data = d_output.data(); int d_output_index = 0; Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); @@ -144,7 +144,11 @@ static void read_shader_output(const Scene *scene, for (int j = 0; j < 3; j++) { if (!done[t.v[j]]) { done[t.v[j]] = true; - float3 off = float4_to_float3(d_output_data[d_output_index++]); + float3 off = make_float3(d_output_data[d_output_index + 0], + d_output_data[d_output_index + 1], + d_output_data[d_output_index + 2]); + d_output_index += 3; + /* Avoid illegal vertex coordinates. */ off = ensure_finite3(off); mesh_verts[t.v[j]] += off; @@ -194,6 +198,7 @@ bool GeometryManager::displace( ShaderEval shader_eval(device, progress); if (!shader_eval.eval(SHADER_EVAL_DISPLACE, num_verts, + 3, function_bind(&fill_shader_input, scene, mesh, object_index, _1), function_bind(&read_shader_output, scene, mesh, _1))) { return false; |