diff options
author | Brecht Van Lommel <brecht@blender.org> | 2022-07-13 15:22:42 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2022-07-14 17:36:38 +0300 |
commit | 28c3739a9bc055b3a3e5ba2f5cfc219f70d64dfa (patch) | |
tree | 4bb752ff0df62426e8d0404fccb6c5d4e59b6ff0 | |
parent | 5539fb3121313f91b6e46d982ef62ff97763d2c2 (diff) |
Cleanup: replace state flow macros in the kernel with functions
-rw-r--r-- | intern/cycles/kernel/integrator/init_from_bake.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/init_from_camera.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/intersect_closest.h | 71 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/intersect_shadow.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/intersect_subsurface.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/intersect_volume_stack.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shade_background.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shade_light.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shade_shadow.h | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shade_surface.h | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shade_volume.h | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shadow_catcher.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/state_flow.h | 267 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/subsurface.h | 6 |
14 files changed, 239 insertions, 165 deletions
diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index c63684d58e6..4d75dcea190 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -181,7 +181,7 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, integrator_state_write_ray(kg, state, &ray); /* Setup next kernel to execute. */ - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } else { /* Surface baking. */ @@ -247,13 +247,13 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); } else { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index); } } diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h index 9fe27cdda9a..73db13be697 100644 --- a/intern/cycles/kernel/integrator/init_from_camera.h +++ b/intern/cycles/kernel/integrator/init_from_camera.h @@ -100,10 +100,10 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, /* Continue with intersect_closest kernel, optionally initializing volume * stack before that if the camera may be inside a volume. */ if (kernel_data.cam.is_inside_volume) { - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); } else { - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } return true; diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 621aa05f46b..923dab9591a 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -109,14 +109,14 @@ ccl_device_forceinline void integrator_split_shadow_catcher( /* If using background pass, schedule background shading kernel so that we have a background * to alpha-over on. The background kernel will then continue the path afterwards. */ INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND; - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); return; } if (!integrator_state_volume_stack_is_empty(kg, state)) { /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher * objects from it, and then continue shading volume and shadow catcher surface after. */ - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); return; } @@ -128,18 +128,19 @@ ccl_device_forceinline void integrator_split_shadow_catcher( const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } } /* Schedule next kernel to be executed after updating volume stack for shadow catcher. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume( KernelGlobals kg, IntegratorState state) { @@ -156,20 +157,21 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } } /* Schedule next kernel to be executed after executing background shader for shadow catcher. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background( KernelGlobals kg, IntegratorState state) { @@ -177,7 +179,8 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche if (!integrator_state_volume_stack_is_empty(kg, state)) { /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher * objects from it, and then continue shading volume and shadow catcher surface after. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_next( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); return; } @@ -190,7 +193,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche * * Note that current_kernel is a template value since making this a variable * leads to poor performance with CUDA atomics. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel( KernelGlobals kg, IntegratorState state, @@ -206,10 +209,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel( const int flags = (hit_surface) ? kernel_data_fetch(shaders, shader).flags : 0; if (!integrator_intersect_terminate(kg, state, flags)) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } return; } @@ -218,7 +221,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel( if (hit) { /* Hit a surface, continue with light or surface kernel. */ if (isect->type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); } else { /* Hit a surface, continue with surface kernel unless terminated. */ @@ -231,16 +234,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel( (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } #ifdef __SHADOW_CATCHER__ @@ -249,13 +252,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel( #endif } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } } } else { /* Nothing hit, continue with background kernel. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } } @@ -263,7 +266,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel( * * The logic here matches integrator_intersect_next_kernel, except that * volume shading and termination testing have already been done. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( KernelGlobals kg, IntegratorState state, @@ -273,7 +276,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( if (isect->prim != PRIM_NONE) { /* Hit a surface, continue with light or surface kernel. */ if (isect->type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); return; } else { @@ -286,16 +289,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } #ifdef __SHADOW_CATCHER__ @@ -307,7 +310,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( } else { /* Nothing hit, continue with background kernel. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); return; } } diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h index 3e746998225..adcbfa19afe 100644 --- a/intern/cycles/kernel/integrator/intersect_shadow.h +++ b/intern/cycles/kernel/integrator/intersect_shadow.h @@ -162,7 +162,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt if (opaque_hit) { /* Hit an opaque surface, shadow path ends here. */ - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); return; } else { @@ -171,7 +171,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt * * TODO: could also write to render buffer directly if no transparent shadows? * Could save a kernel execution for the common case. */ - INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, + integrator_shadow_path_next(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } diff --git a/intern/cycles/kernel/integrator/intersect_subsurface.h b/intern/cycles/kernel/integrator/intersect_subsurface.h index 0a2c4ad680d..f439d6905a0 100644 --- a/intern/cycles/kernel/integrator/intersect_subsurface.h +++ b/intern/cycles/kernel/integrator/intersect_subsurface.h @@ -17,7 +17,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat } #endif - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h index 49ef01dc870..68c7fdd5909 100644 --- a/intern/cycles/kernel/integrator/intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h @@ -222,7 +222,7 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt } else { /* Volume stack init for camera rays, continue with intersection of camera ray. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, + integrator_path_next(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } } diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h index 4791a963ae6..47233634463 100644 --- a/intern/cycles/kernel/integrator/shade_background.h +++ b/intern/cycles/kernel/integrator/shade_background.h @@ -213,7 +213,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg, } #endif - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h index be926c78439..7b3e9e0ee7e 100644 --- a/intern/cycles/kernel/integrator/shade_light.h +++ b/intern/cycles/kernel/integrator/shade_light.h @@ -99,11 +99,11 @@ ccl_device void integrator_shade_light(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce; if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); return; } else { - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, + integrator_path_next(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); return; } diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h index 2b929b7b62e..afe77590e9a 100644 --- a/intern/cycles/kernel/integrator/shade_shadow.h +++ b/intern/cycles/kernel/integrator/shade_shadow.h @@ -158,20 +158,20 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg, /* Evaluate transparent shadows. */ const bool opaque = integrate_transparent_shadow(kg, state, num_hits); if (opaque) { - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } #endif if (shadow_intersections_has_remaining(num_hits)) { /* More intersections to find, continue shadow ray. */ - INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, + integrator_shadow_path_next(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); return; } else { kernel_accum_light(kg, state, render_buffer); - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } } diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index 57b88b806a4..91e34968148 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -190,8 +190,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, const bool is_light = light_sample_is_light(&ls); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT( - shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false); /* Copy volume stack and enter/exit volume. */ integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); @@ -442,7 +442,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, ray.dD = differential_zero_compact(); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true); /* Copy volume stack and enter/exit volume. */ integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); @@ -604,22 +605,23 @@ ccl_device bool integrate_surface(KernelGlobals kg, } template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE, - int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE> + DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE> ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) { if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) { if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + integrator_path_next( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } else { kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f); - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 6cf80f4ddc5..683c031f0d9 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -774,8 +774,8 @@ ccl_device_forceinline void integrate_volume_direct_light( const bool is_light = light_sample_is_light(ls); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT( - shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false); /* Write shadow ray and associated state to global memory. */ integrator_state_write_shadow_ray(kg, shadow_state, &ray); @@ -1032,13 +1032,15 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, if (event == VOLUME_PATH_SCATTERED) { /* Queue intersect_closest kernel. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); return; } else if (event == VOLUME_PATH_MISSED) { /* End path. */ - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); return; } else { diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h index 42d44580f80..ff63625aceb 100644 --- a/intern/cycles/kernel/integrator/shadow_catcher.h +++ b/intern/cycles/kernel/integrator/shadow_catcher.h @@ -50,7 +50,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg, ConstIntegratorState state) { - if (INTEGRATOR_PATH_IS_TERMINATED) { + if (integrator_path_is_terminated(state)) { return false; } diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h index d397ef385e7..1ae746022d0 100644 --- a/intern/cycles/kernel/integrator/state_flow.h +++ b/intern/cycles/kernel/integrator/state_flow.h @@ -10,62 +10,94 @@ CCL_NAMESPACE_BEGIN /* Control Flow * - * Utilities for control flow between kernels. The implementation may differ per device - * or even be handled on the host side. To abstract such differences, experiment with - * different implementations and for debugging, this is abstracted using macros. + * Utilities for control flow between kernels. The implementation is different between CPU and + * GPU devices. For the latter part of the logic is handled on the host side with wavefronts. * * There is a main path for regular path tracing camera for path tracing. Shadows for next * event estimation branch off from this into their own path, that may be computed in - * parallel while the main path continues. + * parallel while the main path continues. Additionally, shading kernels are sorted using + * a key for coherence. * * Each kernel on the main path must call one of these functions. These may not be called * multiple times from the same kernel. * - * INTEGRATOR_PATH_INIT(next_kernel) - * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) - * INTEGRATOR_PATH_TERMINATE(current_kernel) + * integrator_path_init(kg, state, next_kernel) + * integrator_path_next(kg, state, current_kernel, next_kernel) + * integrator_path_terminate(kg, state, current_kernel) * * For the shadow path similar functions are used, and again each shadow kernel must call * one of them, and only once. */ -#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0) -#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \ - (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0) +ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state) +{ + return INTEGRATOR_STATE(state, path, queued_kernel) == 0; +} + +ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state) +{ + return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0; +} #ifdef __KERNEL_GPU__ -# define INTEGRATOR_PATH_INIT(next_kernel) \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; - -# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \ - IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.next_shadow_path_index[0], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; +ccl_device_forceinline void integrator_path_init(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_next(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; +} + +ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( + KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao) +{ + IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( + &kernel_integrator_state.next_shadow_path_index[0], 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; + return shadow_state; +} + +ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; +} # ifdef __KERNEL_SORT_PARTITIONING__ /* Sort first by truncated state index (for good locality), then by key (for good coherence). */ @@ -75,68 +107,103 @@ CCL_NAMESPACE_BEGIN # define INTEGRATOR_SORT_KEY(key, state) (key) # endif -# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ - { \ - const int key_ = INTEGRATOR_SORT_KEY(key, state); \ - atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ - 1); \ - } -# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ - { \ - const int key_ = INTEGRATOR_SORT_KEY(key, state); \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ - 1); \ - } +ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel, + const uint32_t key) +{ + const int key_ = INTEGRATOR_SORT_KEY(key, state); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1); +} + +ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel, + const uint32_t key) +{ + const int key_ = INTEGRATOR_SORT_KEY(key, state); + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1); +} #else -# define INTEGRATOR_PATH_INIT(next_kernel) \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)key; \ - } -# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)current_kernel; \ - } -# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \ - (void)current_kernel; \ - } -# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)key; \ - (void)current_kernel; \ - } - -# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \ - IntegratorShadowState shadow_state = &state->shadow_type; \ - INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \ - (void)current_kernel; \ - } -# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \ - (void)current_kernel; \ - } +ccl_device_forceinline void integrator_path_init(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel, + const uint32_t key) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)key; +} + +ccl_device_forceinline void integrator_path_next(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel, + const uint32_t key) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)key; + (void)current_kernel; +} + +ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( + KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao) +{ + IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; + return shadow_state; +} + +ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel) +{ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; + (void)current_kernel; +} #endif diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index 1e6fcf4aff0..09e59919c86 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -177,17 +177,17 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } |