diff options
Diffstat (limited to 'intern/cycles/kernel/integrator/integrator_intersect_shadow.h')
-rw-r--r-- | intern/cycles/kernel/integrator/integrator_intersect_shadow.h | 108 |
1 files changed, 77 insertions, 31 deletions
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h index 00d44f0e5ed..90422445fad 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h @@ -19,19 +19,21 @@ CCL_NAMESPACE_BEGIN /* Visibility for the shadow ray. */ -ccl_device_forceinline uint integrate_intersect_shadow_visibility(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_forceinline uint integrate_intersect_shadow_visibility(KernelGlobals kg, + ConstIntegratorShadowState state) { uint visibility = PATH_RAY_SHADOW; #ifdef __SHADOW_CATCHER__ - const uint32_t path_flag = INTEGRATOR_STATE(shadow_path, flag); + const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag); visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility); #endif return visibility; } -ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS, +ccl_device bool integrate_intersect_shadow_opaque(KernelGlobals kg, + IntegratorShadowState state, ccl_private const Ray *ray, const uint visibility) { @@ -46,82 +48,126 @@ ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS, const bool opaque_hit = scene_intersect(kg, ray, visibility & opaque_mask, &isect); if (!opaque_hit) { - INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0; + INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0; } return opaque_hit; } -ccl_device_forceinline int integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals kg, + ConstIntegratorShadowState state) { const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; - const int transparent_bounce = INTEGRATOR_STATE(shadow_path, transparent_bounce); + const int transparent_bounce = INTEGRATOR_STATE(state, shadow_path, transparent_bounce); return max(transparent_max_bounce - transparent_bounce - 1, 0); } #ifdef __TRANSPARENT_SHADOWS__ -ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS, +# if defined(__KERNEL_CPU__) +ccl_device int shadow_intersections_compare(const void *a, const void *b) +{ + const Intersection *isect_a = (const Intersection *)a; + const Intersection *isect_b = (const Intersection *)b; + + if (isect_a->t < isect_b->t) + return -1; + else if (isect_a->t > isect_b->t) + return 1; + else + return 0; +} +# endif + +ccl_device_inline void sort_shadow_intersections(IntegratorShadowState state, uint num_hits) +{ + kernel_assert(num_hits > 0); + +# ifdef __KERNEL_GPU__ + /* Use bubble sort which has more friendly memory pattern on GPU. */ + bool swapped; + do { + swapped = false; + for (int j = 0; j < num_hits - 1; ++j) { + if (INTEGRATOR_STATE_ARRAY(state, shadow_isect, j, t) > + INTEGRATOR_STATE_ARRAY(state, shadow_isect, j + 1, t)) { + struct Intersection tmp_j ccl_optional_struct_init; + struct Intersection tmp_j_1 ccl_optional_struct_init; + integrator_state_read_shadow_isect(state, &tmp_j, j); + integrator_state_read_shadow_isect(state, &tmp_j_1, j + 1); + integrator_state_write_shadow_isect(state, &tmp_j_1, j); + integrator_state_write_shadow_isect(state, &tmp_j, j + 1); + swapped = true; + } + } + --num_hits; + } while (swapped); +# else + Intersection *isect_array = (Intersection *)state->shadow_isect; + qsort(isect_array, num_hits, sizeof(Intersection), shadow_intersections_compare); +# endif +} + +ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg, + IntegratorShadowState state, ccl_private const Ray *ray, const uint visibility) { - Intersection isect[INTEGRATOR_SHADOW_ISECT_SIZE]; - /* Limit the number hits to the max transparent bounces allowed and the size that we * have available in the integrator state. */ - const uint max_transparent_hits = integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_PASS); - const uint max_hits = min(max_transparent_hits, (uint)INTEGRATOR_SHADOW_ISECT_SIZE); + const uint max_hits = integrate_shadow_max_transparent_hits(kg, state); uint num_hits = 0; - bool opaque_hit = scene_intersect_shadow_all(kg, ray, isect, visibility, max_hits, &num_hits); + float throughput = 1.0f; + bool opaque_hit = scene_intersect_shadow_all( + kg, state, ray, visibility, max_hits, &num_hits, &throughput); + + /* Computed throughput from baked shadow transparency, where we can bypass recording + * intersections and shader evaluation. */ + if (throughput != 1.0f) { + INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) *= throughput; + } /* If number of hits exceed the transparent bounces limit, make opaque. */ - if (num_hits > max_transparent_hits) { + if (num_hits > max_hits) { opaque_hit = true; } if (!opaque_hit) { - uint num_recorded_hits = min(num_hits, max_hits); + const uint num_recorded_hits = min(num_hits, min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE)); if (num_recorded_hits > 0) { - sort_intersections(isect, num_recorded_hits); - - /* Write intersection result into global integrator state memory. */ - for (int hit = 0; hit < num_recorded_hits; hit++) { - integrator_state_write_shadow_isect(INTEGRATOR_STATE_PASS, &isect[hit], hit); - } + sort_shadow_intersections(state, num_recorded_hits); } - INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = num_hits; + INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = num_hits; } else { - INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0; + INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0; } return opaque_hit; } #endif -ccl_device void integrator_intersect_shadow(INTEGRATOR_STATE_ARGS) +ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowState state) { PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW); /* Read ray from integrator state into local memory. */ Ray ray ccl_optional_struct_init; - integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_read_shadow_ray(kg, state, &ray); /* Compute visibility. */ - const uint visibility = integrate_intersect_shadow_visibility(INTEGRATOR_STATE_PASS); + const uint visibility = integrate_intersect_shadow_visibility(kg, state); #ifdef __TRANSPARENT_SHADOWS__ /* TODO: compile different kernels depending on this? Especially for OptiX * conditional trace calls are bad. */ - const bool opaque_hit = - (kernel_data.integrator.transparent_shadows) ? - integrate_intersect_shadow_transparent(INTEGRATOR_STATE_PASS, &ray, visibility) : - integrate_intersect_shadow_opaque(INTEGRATOR_STATE_PASS, &ray, visibility); + const bool opaque_hit = (kernel_data.integrator.transparent_shadows) ? + integrate_intersect_shadow_transparent(kg, state, &ray, visibility) : + integrate_intersect_shadow_opaque(kg, state, &ray, visibility); #else - const bool opaque_hit = integrate_intersect_shadow_opaque( - INTEGRATOR_STATE_PASS, &ray, visibility); + const bool opaque_hit = integrate_intersect_shadow_opaque(kg, state, &ray, visibility); #endif if (opaque_hit) { |