diff options
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/device/cuda/compat.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/hip/compat.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/compat.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/optix/compat.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/film/accumulate.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shade_surface.h | 13 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shade_volume.h | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shadow_state_template.h | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/state_template.h | 18 |
9 files changed, 31 insertions, 25 deletions
diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h index 2feebad074f..ba3aefa43bf 100644 --- a/intern/cycles/kernel/device/cuda/compat.h +++ b/intern/cycles/kernel/device/cuda/compat.h @@ -52,6 +52,7 @@ typedef unsigned long long uint64_t; #endif #define ccl_device_noinline __device__ __noinline__ #define ccl_device_noinline_cpu ccl_device +#define ccl_device_inline_method ccl_device #define ccl_global #define ccl_static_constant __constant__ #define ccl_device_constant __constant__ __device__ diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h index fb07602539b..b58179e12ff 100644 --- a/intern/cycles/kernel/device/hip/compat.h +++ b/intern/cycles/kernel/device/hip/compat.h @@ -45,6 +45,7 @@ typedef unsigned long long uint64_t; #define ccl_device_forceinline __device__ __forceinline__ #define ccl_device_noinline __device__ __noinline__ #define ccl_device_noinline_cpu ccl_device +#define ccl_device_inline_method ccl_device #define ccl_global #define ccl_static_constant __constant__ #define ccl_device_constant __constant__ __device__ diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 4a2c39d90fd..19358e063d8 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -42,6 +42,7 @@ using namespace metal; #define ccl_device_forceinline ccl_device #define ccl_device_noinline ccl_device __attribute__((noinline)) #define ccl_device_noinline_cpu ccl_device +#define ccl_device_inline_method ccl_device #define ccl_global device #define ccl_static_constant static constant constexpr #define ccl_device_constant constant diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index 482b921a1a8..c7a7be7309a 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -49,6 +49,7 @@ typedef unsigned long long uint64_t; __device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything #define ccl_device_inline ccl_device #define ccl_device_forceinline ccl_device +#define ccl_device_inline_method ccl_device #define ccl_device_noinline __device__ __noinline__ #define ccl_device_noinline_cpu ccl_device #define ccl_global diff --git a/intern/cycles/kernel/film/accumulate.h b/intern/cycles/kernel/film/accumulate.h index 6bdf1f2f3a1..c9303088e3f 100644 --- a/intern/cycles/kernel/film/accumulate.h +++ b/intern/cycles/kernel/film/accumulate.h @@ -552,7 +552,7 @@ ccl_device_inline void kernel_accum_background(KernelGlobals kg, const bool is_transparent_background_ray, ccl_global float *ccl_restrict render_buffer) { - float3 contribution = INTEGRATOR_STATE(state, path, throughput) * L; + float3 contribution = float3(INTEGRATOR_STATE(state, path, throughput)) * L; kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index 2793dd3e218..2c478784bc9 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -195,12 +195,13 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - const float3 pass_diffuse_weight = (bounce == 0) ? - bsdf_eval_pass_diffuse_weight(&bsdf_eval) : - INTEGRATOR_STATE(state, path, pass_diffuse_weight); - const float3 pass_glossy_weight = (bounce == 0) ? - bsdf_eval_pass_glossy_weight(&bsdf_eval) : - INTEGRATOR_STATE(state, path, pass_glossy_weight); + const packed_float3 pass_diffuse_weight = + (bounce == 0) ? packed_float3(bsdf_eval_pass_diffuse_weight(&bsdf_eval)) : + INTEGRATOR_STATE(state, path, pass_diffuse_weight); + const packed_float3 pass_glossy_weight = (bounce == 0) ? + packed_float3( + bsdf_eval_pass_glossy_weight(&bsdf_eval)) : + INTEGRATOR_STATE(state, path, pass_glossy_weight); INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = pass_glossy_weight; } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index c5a80eb336f..141433c37a8 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -792,9 +792,10 @@ ccl_device_forceinline void integrate_volume_direct_light( const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - const float3 pass_diffuse_weight = (bounce == 0) ? - one_float3() : - INTEGRATOR_STATE(state, path, pass_diffuse_weight); + const packed_float3 pass_diffuse_weight = (bounce == 0) ? + packed_float3(one_float3()) : + INTEGRATOR_STATE( + state, path, pass_diffuse_weight); INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = zero_float3(); } diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h index 667ab88c8c4..625a429d3db 100644 --- a/intern/cycles/kernel/integrator/shadow_state_template.h +++ b/intern/cycles/kernel/integrator/shadow_state_template.h @@ -40,15 +40,15 @@ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_T /* enum PathRayFlag */ KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* Throughput. */ -KERNEL_STRUCT_MEMBER(shadow_path, float3, throughput, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING) /* Throughput for shadow pass. */ KERNEL_STRUCT_MEMBER(shadow_path, - float3, + packed_float3, unshadowed_throughput, KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE) /* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */ -KERNEL_STRUCT_MEMBER(shadow_path, float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) -KERNEL_STRUCT_MEMBER(shadow_path, float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) /* Number of intersections found by ray-tracing. */ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_END(shadow_path) @@ -56,8 +56,8 @@ KERNEL_STRUCT_END(shadow_path) /********************************** Shadow Ray *******************************/ KERNEL_STRUCT_BEGIN(shadow_ray) -KERNEL_STRUCT_MEMBER(shadow_ray, float3, P, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(shadow_ray, float3, D, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING) diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h index 3299f973713..bd18a7498a3 100644 --- a/intern/cycles/kernel/integrator/state_template.h +++ b/intern/cycles/kernel/integrator/state_template.h @@ -59,12 +59,12 @@ KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING) /* Continuation probability for path termination. */ KERNEL_STRUCT_MEMBER(path, float, continuation_probability, KERNEL_FEATURE_PATH_TRACING) /* Throughput. */ -KERNEL_STRUCT_MEMBER(path, float3, throughput, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING) /* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */ -KERNEL_STRUCT_MEMBER(path, float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) -KERNEL_STRUCT_MEMBER(path, float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) /* Denoising. */ -KERNEL_STRUCT_MEMBER(path, float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING) +KERNEL_STRUCT_MEMBER(path, packed_float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING) /* Shader sorting. */ /* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */ KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING) @@ -73,8 +73,8 @@ KERNEL_STRUCT_END(path) /************************************** Ray ***********************************/ KERNEL_STRUCT_BEGIN(ray) -KERNEL_STRUCT_MEMBER(ray, float3, P, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(ray, float3, D, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING) @@ -96,10 +96,10 @@ KERNEL_STRUCT_END(isect) /*************** Subsurface closure state for subsurface kernel ***************/ KERNEL_STRUCT_BEGIN(subsurface) -KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE) -KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, packed_float3, albedo, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, packed_float3, radius, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE) -KERNEL_STRUCT_MEMBER(subsurface, float3, Ng, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, packed_float3, Ng, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_END(subsurface) /********************************** Volume Stack ******************************/ |