diff options
Diffstat (limited to 'intern/cycles/kernel/integrator')
15 files changed, 1425 insertions, 44 deletions
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h index de916be24e7..5790cfd3f22 100644 --- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h +++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h @@ -16,11 +16,14 @@ #pragma once -#include "kernel/kernel_accumulate.h" -#include "kernel/kernel_adaptive_sampling.h" -#include "kernel/kernel_camera.h" -#include "kernel/kernel_path_state.h" -#include "kernel/kernel_random.h" +#include "kernel/camera/camera.h" + +#include "kernel/film/film_accumulate.h" +#include "kernel/film/film_adaptive_sampling.h" + +#include "kernel/integrator/integrator_path_state.h" + +#include "kernel/sample/sample_pattern.h" #include "kernel/geom/geom.h" diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h index 5bab6b2e2fd..499a72ffbc4 100644 --- a/intern/cycles/kernel/integrator/integrator_init_from_camera.h +++ b/intern/cycles/kernel/integrator/integrator_init_from_camera.h @@ -16,12 +16,15 @@ #pragma once -#include "kernel/kernel_accumulate.h" -#include "kernel/kernel_adaptive_sampling.h" -#include "kernel/kernel_camera.h" -#include "kernel/kernel_path_state.h" -#include "kernel/kernel_random.h" -#include "kernel/kernel_shadow_catcher.h" +#include "kernel/camera/camera.h" + +#include "kernel/film/film_accumulate.h" +#include "kernel/film/film_adaptive_sampling.h" + +#include "kernel/integrator/integrator_path_state.h" +#include "kernel/integrator/integrator_shadow_catcher.h" + +#include "kernel/sample/sample_pattern.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h index c1315d48694..41d3dfde41a 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h @@ -16,11 +16,14 @@ #pragma once -#include "kernel/kernel_differential.h" -#include "kernel/kernel_light.h" -#include "kernel/kernel_path_state.h" -#include "kernel/kernel_projection.h" -#include "kernel/kernel_shadow_catcher.h" +#include "kernel/camera/camera_projection.h" + +#include "kernel/integrator/integrator_path_state.h" +#include "kernel/integrator/integrator_shadow_catcher.h" + +#include "kernel/light/light.h" + +#include "kernel/util/util_differential.h" #include "kernel/geom/geom.h" diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h index 7def3e2f3f3..505d9687948 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h @@ -18,8 +18,8 @@ #include "kernel/bvh/bvh.h" #include "kernel/geom/geom.h" +#include "kernel/integrator/integrator_shader_eval.h" #include "kernel/integrator/integrator_volume_stack.h" -#include "kernel/kernel_shader.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/integrator/integrator_path_state.h b/intern/cycles/kernel/integrator/integrator_path_state.h new file mode 100644 index 00000000000..73062b26682 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_path_state.h @@ -0,0 +1,376 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/sample/sample_pattern.h" + +CCL_NAMESPACE_BEGIN + +/* Initialize queues, so that the this path is considered terminated. + * Used for early outputs in the camera ray initialization, as well as initialization of split + * states for shadow catcher. */ +ccl_device_inline void path_state_init_queues(IntegratorState state) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; +#ifdef __KERNEL_CPU__ + INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0; +#endif +} + +/* Minimalistic initialization of the path state, which is needed for early outputs in the + * integrator initialization to work. */ +ccl_device_inline void path_state_init(IntegratorState state, + ccl_global const KernelWorkTile *ccl_restrict tile, + const int x, + const int y) +{ + const uint render_pixel_index = (uint)tile->offset + x + y * tile->stride; + + INTEGRATOR_STATE_WRITE(state, path, render_pixel_index) = render_pixel_index; + + path_state_init_queues(state); +} + +/* Initialize the rest of the path state needed to continue the path integration. */ +ccl_device_inline void path_state_init_integrator(KernelGlobals kg, + IntegratorState state, + const int sample, + const uint rng_hash) +{ + INTEGRATOR_STATE_WRITE(state, path, sample) = sample; + INTEGRATOR_STATE_WRITE(state, path, bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM; + INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | + PATH_RAY_TRANSPARENT_BACKGROUND; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; + INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f); + + if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) { + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, object) = OBJECT_NONE; + INTEGRATOR_STATE_ARRAY_WRITE( + state, volume_stack, 0, shader) = kernel_data.background.volume_shader; + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, object) = OBJECT_NONE; + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE; + } + +#ifdef __DENOISING_FEATURES__ + if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) { + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES; + INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3(); + } +#endif +} + +ccl_device_inline void path_state_next(KernelGlobals kg, IntegratorState state, int label) +{ + uint32_t flag = INTEGRATOR_STATE(state, path, flag); + + /* ray through transparent keeps same flags from previous ray and is + * not counted as a regular bounce, transparent has separate max */ + if (label & LABEL_TRANSPARENT) { + uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1; + + flag |= PATH_RAY_TRANSPARENT; + if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { + flag |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE; + } + + if (!kernel_data.integrator.transparent_shadows) + flag |= PATH_RAY_MIS_SKIP; + + INTEGRATOR_STATE_WRITE(state, path, flag) = flag; + INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce; + /* Random number generator next bounce. */ + INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; + return; + } + + uint32_t bounce = INTEGRATOR_STATE(state, path, bounce) + 1; + if (bounce >= kernel_data.integrator.max_bounce) { + flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + + flag &= ~(PATH_RAY_ALL_VISIBILITY | PATH_RAY_MIS_SKIP); + +#ifdef __VOLUME__ + if (label & LABEL_VOLUME_SCATTER) { + /* volume scatter */ + flag |= PATH_RAY_VOLUME_SCATTER; + flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; + if (bounce == 1) { + flag |= PATH_RAY_VOLUME_PASS; + } + + const int volume_bounce = INTEGRATOR_STATE(state, path, volume_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = volume_bounce; + if (volume_bounce >= kernel_data.integrator.max_volume_bounce) { + flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + } + else +#endif + { + /* surface reflection/transmission */ + if (label & LABEL_REFLECT) { + flag |= PATH_RAY_REFLECT; + flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; + + if (label & LABEL_DIFFUSE) { + const int diffuse_bounce = INTEGRATOR_STATE(state, path, diffuse_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = diffuse_bounce; + if (diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) { + flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + } + else { + const int glossy_bounce = INTEGRATOR_STATE(state, path, glossy_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = glossy_bounce; + if (glossy_bounce >= kernel_data.integrator.max_glossy_bounce) { + flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + } + } + else { + kernel_assert(label & LABEL_TRANSMIT); + + flag |= PATH_RAY_TRANSMIT; + + if (!(label & LABEL_TRANSMIT_TRANSPARENT)) { + flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; + } + + const int transmission_bounce = INTEGRATOR_STATE(state, path, transmission_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = transmission_bounce; + if (transmission_bounce >= kernel_data.integrator.max_transmission_bounce) { + flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + } + + /* diffuse/glossy/singular */ + if (label & LABEL_DIFFUSE) { + flag |= PATH_RAY_DIFFUSE | PATH_RAY_DIFFUSE_ANCESTOR; + } + else if (label & LABEL_GLOSSY) { + flag |= PATH_RAY_GLOSSY; + } + else { + kernel_assert(label & LABEL_SINGULAR); + flag |= PATH_RAY_GLOSSY | PATH_RAY_SINGULAR | PATH_RAY_MIS_SKIP; + } + + /* Render pass categories. */ + if (bounce == 1) { + flag |= (label & LABEL_TRANSMIT) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS; + } + } + + INTEGRATOR_STATE_WRITE(state, path, flag) = flag; + INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce; + + /* Random number generator next bounce. */ + INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; +} + +#ifdef __VOLUME__ +ccl_device_inline bool path_state_volume_next(IntegratorState state) +{ + /* For volume bounding meshes we pass through without counting transparent + * bounces, only sanity check in case self intersection gets us stuck. */ + uint32_t volume_bounds_bounce = INTEGRATOR_STATE(state, path, volume_bounds_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = volume_bounds_bounce; + if (volume_bounds_bounce > VOLUME_BOUNDS_MAX) { + return false; + } + + /* Random number generator next bounce. */ + if (volume_bounds_bounce > 1) { + INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; + } + + return true; +} +#endif + +ccl_device_inline uint path_state_ray_visibility(ConstIntegratorState state) +{ + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + + uint32_t visibility = path_flag & PATH_RAY_ALL_VISIBILITY; + + /* For visibility, diffuse/glossy are for reflection only. */ + if (visibility & PATH_RAY_TRANSMIT) { + visibility &= ~(PATH_RAY_DIFFUSE | PATH_RAY_GLOSSY); + } + + /* todo: this is not supported as its own ray visibility yet. */ + if (path_flag & PATH_RAY_VOLUME_SCATTER) { + visibility |= PATH_RAY_DIFFUSE; + } + + visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility); + + return visibility; +} + +ccl_device_inline float path_state_continuation_probability(KernelGlobals kg, + ConstIntegratorState state, + const uint32_t path_flag) +{ + if (path_flag & PATH_RAY_TRANSPARENT) { + const uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); + /* Do at least specified number of bounces without RR. */ + if (transparent_bounce <= kernel_data.integrator.transparent_min_bounce) { + return 1.0f; + } + } + else { + const uint32_t bounce = INTEGRATOR_STATE(state, path, bounce); + /* Do at least specified number of bounces without RR. */ + if (bounce <= kernel_data.integrator.min_bounce) { + return 1.0f; + } + } + + /* Probabilistic termination: use sqrt() to roughly match typical view + * transform and do path termination a bit later on average. */ + return min(sqrtf(max3(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f); +} + +ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state) +{ + if (!kernel_data.integrator.ao_bounces) { + return false; + } + + const int bounce = INTEGRATOR_STATE(state, path, bounce) - + INTEGRATOR_STATE(state, path, transmission_bounce) - + (INTEGRATOR_STATE(state, path, glossy_bounce) > 0) + 1; + return (bounce > kernel_data.integrator.ao_bounces); +} + +/* Random Number Sampling Utility Functions + * + * For each random number in each step of the path we must have a unique + * dimension to avoid using the same sequence twice. + * + * For branches in the path we must be careful not to reuse the same number + * in a sequence and offset accordingly. + */ + +/* RNG State loaded onto stack. */ +typedef struct RNGState { + uint rng_hash; + uint rng_offset; + int sample; +} RNGState; + +ccl_device_inline void path_state_rng_load(ConstIntegratorState state, + ccl_private RNGState *rng_state) +{ + rng_state->rng_hash = INTEGRATOR_STATE(state, path, rng_hash); + rng_state->rng_offset = INTEGRATOR_STATE(state, path, rng_offset); + rng_state->sample = INTEGRATOR_STATE(state, path, sample); +} + +ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState state, + ccl_private RNGState *rng_state) +{ + rng_state->rng_hash = INTEGRATOR_STATE(state, shadow_path, rng_hash); + rng_state->rng_offset = INTEGRATOR_STATE(state, shadow_path, rng_offset); + rng_state->sample = INTEGRATOR_STATE(state, shadow_path, sample); +} + +ccl_device_inline float path_state_rng_1D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + int dimension) +{ + return path_rng_1D( + kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension); +} + +ccl_device_inline void path_state_rng_2D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + int dimension, + ccl_private float *fx, + ccl_private float *fy) +{ + path_rng_2D( + kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy); +} + +ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg, + ccl_private const RNGState *rng_state, + uint hash) +{ + /* Use a hash instead of dimension, this is not great but avoids adding + * more dimensions to each bounce which reduces quality of dimensions we + * are already using. */ + return path_rng_1D( + kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset); +} + +ccl_device_inline float path_branched_rng_1D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + int branch, + int num_branches, + int dimension) +{ + return path_rng_1D(kg, + rng_state->rng_hash, + rng_state->sample * num_branches + branch, + rng_state->rng_offset + dimension); +} + +ccl_device_inline void path_branched_rng_2D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + int branch, + int num_branches, + int dimension, + ccl_private float *fx, + ccl_private float *fy) +{ + path_rng_2D(kg, + rng_state->rng_hash, + rng_state->sample * num_branches + branch, + rng_state->rng_offset + dimension, + fx, + fy); +} + +/* Utility functions to get light termination value, + * since it might not be needed in many cases. + */ +ccl_device_inline float path_state_rng_light_termination(KernelGlobals kg, + ccl_private const RNGState *state) +{ + if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) { + return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE); + } + return 0.0f; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h index 287c54d7243..b3bef9a234e 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_background.h +++ b/intern/cycles/kernel/integrator/integrator_shade_background.h @@ -16,10 +16,11 @@ #pragma once -#include "kernel/kernel_accumulate.h" -#include "kernel/kernel_emission.h" -#include "kernel/kernel_light.h" -#include "kernel/kernel_shader.h" +#include "kernel/film/film_accumulate.h" +#include "kernel/integrator/integrator_shader_eval.h" +#include "kernel/light/light.h" +#include "kernel/light/light_sample.h" +#include "kernel/sample/sample_mis.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h index 4f0f5a39756..7d220006322 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_light.h +++ b/intern/cycles/kernel/integrator/integrator_shade_light.h @@ -16,10 +16,10 @@ #pragma once -#include "kernel/kernel_accumulate.h" -#include "kernel/kernel_emission.h" -#include "kernel/kernel_light.h" -#include "kernel/kernel_shader.h" +#include "kernel/film/film_accumulate.h" +#include "kernel/integrator/integrator_shader_eval.h" +#include "kernel/light/light.h" +#include "kernel/light/light_sample.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h index a82254e1dea..0c4eeb8d10d 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h +++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h @@ -18,8 +18,7 @@ #include "kernel/integrator/integrator_shade_volume.h" #include "kernel/integrator/integrator_volume_stack.h" - -#include "kernel/kernel_shader.h" +#include "kernel/integrator/integrator_shader_eval.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h index 2a0bf4a3046..70dce1c4913 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_surface.h +++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h @@ -16,16 +16,19 @@ #pragma once -#include "kernel/kernel_accumulate.h" -#include "kernel/kernel_emission.h" -#include "kernel/kernel_light.h" -#include "kernel/kernel_passes.h" -#include "kernel/kernel_path_state.h" -#include "kernel/kernel_shader.h" +#include "kernel/film/film_accumulate.h" +#include "kernel/film/film_passes.h" +#include "kernel/integrator/integrator_path_state.h" +#include "kernel/integrator/integrator_shader_eval.h" #include "kernel/integrator/integrator_subsurface.h" #include "kernel/integrator/integrator_volume_stack.h" +#include "kernel/light/light.h" +#include "kernel/light/light_sample.h" + +#include "kernel/sample/sample_mis.h" + CCL_NAMESPACE_BEGIN ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg, diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h index d0aabb550c0..44ef4803575 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_volume.h +++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h @@ -16,16 +16,19 @@ #pragma once -#include "kernel/kernel_accumulate.h" -#include "kernel/kernel_emission.h" -#include "kernel/kernel_light.h" -#include "kernel/kernel_passes.h" -#include "kernel/kernel_path_state.h" -#include "kernel/kernel_shader.h" +#include "kernel/film/film_accumulate.h" +#include "kernel/film/film_passes.h" +#include "kernel/integrator/integrator_path_state.h" +#include "kernel/integrator/integrator_shader_eval.h" #include "kernel/integrator/integrator_intersect_closest.h" #include "kernel/integrator/integrator_volume_stack.h" +#include "kernel/light/light.h" +#include "kernel/light/light_sample.h" + +#include "kernel/sample/sample_mis.h" + CCL_NAMESPACE_BEGIN #ifdef __VOLUME__ diff --git a/intern/cycles/kernel/integrator/integrator_shader_eval.h b/intern/cycles/kernel/integrator/integrator_shader_eval.h new file mode 100644 index 00000000000..04a3a965fd3 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_shader_eval.h @@ -0,0 +1,869 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Functions to evaluate shaders and use the resulting shader closures. */ + +#pragma once + +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf_util.h" +#include "kernel/closure/bsdf.h" +#include "kernel/closure/emissive.h" + +#include "kernel/film/film_accumulate.h" + +#include "kernel/svm/svm.h" + +#ifdef __OSL__ +# include "kernel/osl/osl_shader.h" +#endif + +CCL_NAMESPACE_BEGIN + +/* Merging */ + +#if defined(__VOLUME__) +ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd) +{ + /* Merge identical closures to save closure space with stacked volumes. */ + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sci = &sd->closure[i]; + + if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { + continue; + } + + for (int j = i + 1; j < sd->num_closure; j++) { + ccl_private ShaderClosure *scj = &sd->closure[j]; + if (sci->type != scj->type) { + continue; + } + + ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *) + sci; + ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *) + scj; + if (!(hgi->g == hgj->g)) { + continue; + } + + sci->weight += scj->weight; + sci->sample_weight += scj->sample_weight; + + int size = sd->num_closure - (j + 1); + if (size > 0) { + for (int k = 0; k < size; k++) { + scj[k] = scj[k + 1]; + } + } + + sd->num_closure--; + kernel_assert(sd->num_closure >= 0); + j--; + } + } +} + +ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict + phases, + ccl_private const ShaderData *ccl_restrict sd) +{ + phases->num_closure = 0; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *from_sc = &sd->closure[i]; + ccl_private const HenyeyGreensteinVolume *from_hg = + (ccl_private const HenyeyGreensteinVolume *)from_sc; + + if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { + ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure]; + + to_sc->weight = from_sc->weight; + to_sc->sample_weight = from_sc->sample_weight; + to_sc->g = from_hg->g; + phases->num_closure++; + if (phases->num_closure >= MAX_VOLUME_CLOSURE) { + break; + } + } + } +} +#endif /* __VOLUME__ */ + +ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg, + ConstIntegratorState state, + ccl_private ShaderData *sd) +{ + /* Defensive sampling. + * + * We can likely also do defensive sampling at deeper bounces, particularly + * for cases like a perfect mirror but possibly also others. This will need + * a good heuristic. */ + if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) == + 0 && + sd->num_closure > 1) { + float sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sc->sample_weight = max(sc->sample_weight, 0.125f * sum); + } + } + } + + /* Filter glossy. + * + * Blurring of bsdf after bounces, for rays that have a small likelihood + * of following this particular path (diffuse, rough glossy) */ + if (kernel_data.integrator.filter_glossy != FLT_MAX) { + float blur_pdf = kernel_data.integrator.filter_glossy * + INTEGRATOR_STATE(state, path, min_ray_pdf); + + if (blur_pdf < 1.0f) { + float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF(sc->type)) { + bsdf_blur(kg, sc, blur_roughness); + } + } + } + } +} + +/* BSDF */ + +ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd, + const float3 omega_in) +{ + return dot(sd->N, omega_in) < 0.0f; +} + +ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_shader_flags) +{ + if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) { + return false; + } + if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) { + if (CLOSURE_IS_BSDF_DIFFUSE(type)) { + return true; + } + } + if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) { + if (CLOSURE_IS_BSDF_GLOSSY(type)) { + return true; + } + } + if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) { + if (CLOSURE_IS_BSDF_TRANSMISSION(type)) { + return true; + } + } + return false; +} + +ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg, + ccl_private ShaderData *sd, + const float3 omega_in, + const bool is_transmission, + ccl_private const ShaderClosure *skip_sc, + ccl_private BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight, + const uint light_shader_flags) +{ + /* This is the veach one-sample model with balance heuristic, + * some PDF factors drop out when using balance heuristic weighting. */ + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (sc == skip_sc) { + continue; + } + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + if (CLOSURE_IS_BSDF(sc->type) && !_shader_bsdf_exclude(sc->type, light_shader_flags)) { + float bsdf_pdf = 0.0f; + float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf); + + if (bsdf_pdf != 0.0f) { + const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type); + bsdf_eval_accum(result_eval, is_diffuse, eval * sc->weight, 1.0f); + sum_pdf += bsdf_pdf * sc->sample_weight; + } + } + + sum_sample_weight += sc->sample_weight; + } + } + + return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; +} + +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_inline +#endif + float + shader_bsdf_eval(KernelGlobals kg, + ccl_private ShaderData *sd, + const float3 omega_in, + const bool is_transmission, + ccl_private BsdfEval *bsdf_eval, + const uint light_shader_flags) +{ + bsdf_eval_init(bsdf_eval, false, zero_float3()); + + return _shader_bsdf_multi_eval( + kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags); +} + +/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */ +ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick( + ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu) +{ + int sampled = 0; + + if (sd->num_closure > 1) { + /* Pick a BSDF or based on sample weights. */ + float sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + + float r = (*randu) * sum; + float partial_sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + float next_sum = partial_sum + sc->sample_weight; + + if (r < next_sum) { + sampled = i; + + /* Rescale to reuse for direction sample, to better preserve stratification. */ + *randu = (r - partial_sum) / sc->sample_weight; + break; + } + + partial_sum = next_sum; + } + } + } + + return &sd->closure[sampled]; +} + +/* Return weight for picked BSSRDF. */ +ccl_device_inline float3 +shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd, + ccl_private const ShaderClosure *ccl_restrict bssrdf_sc) +{ + float3 weight = bssrdf_sc->weight; + + if (sd->num_closure > 1) { + float sum = 0.0f; + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + weight *= sum / bssrdf_sc->sample_weight; + } + + return weight; +} + +/* Sample direction for picked BSDF, and return evaluation and pdf for all + * BSDFs combined using MIS. */ +ccl_device int shader_bsdf_sample_closure(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private const ShaderClosure *sc, + float randu, + float randv, + ccl_private BsdfEval *bsdf_eval, + ccl_private float3 *omega_in, + ccl_private differential3 *domega_in, + ccl_private float *pdf) +{ + /* BSSRDF should already have been handled elsewhere. */ + kernel_assert(CLOSURE_IS_BSDF(sc->type)); + + int label; + float3 eval = zero_float3(); + + *pdf = 0.0f; + label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); + + if (*pdf != 0.0f) { + const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type); + bsdf_eval_init(bsdf_eval, is_diffuse, eval * sc->weight); + + if (sd->num_closure > 1) { + const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in); + float sweight = sc->sample_weight; + *pdf = _shader_bsdf_multi_eval( + kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0); + } + } + + return label; +} + +ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd) +{ + float roughness = 0.0f; + float sum_weight = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF(sc->type)) { + /* sqrt once to undo the squaring from multiplying roughness on the + * two axes, and once for the squared roughness convention. */ + float weight = fabsf(average(sc->weight)); + roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc))); + sum_weight += weight; + } + } + + return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; +} + +ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_HAS_ONLY_VOLUME) { + return one_float3(); + } + else if (sd->flag & SD_TRANSPARENT) { + return sd->closure_transparent_extinction; + } + else { + return zero_float3(); + } +} + +ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd) +{ + if (sd->flag & SD_TRANSPARENT) { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + + if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { + sc->sample_weight = 0.0f; + sc->weight = zero_float3(); + } + } + + sd->flag &= ~SD_TRANSPARENT; + } +} + +ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd); + + alpha = max(alpha, zero_float3()); + alpha = min(alpha, one_float3()); + + return alpha; +} + +ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + float3 eval = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + float3 eval = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + float3 eval = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) + N += sc->N * fabsf(average(sc->weight)); + } + + return (is_zero(N)) ? sd->N : normalize(N); +} + +ccl_device float3 shader_bsdf_ao(KernelGlobals kg, + ccl_private const ShaderData *sd, + const float ao_factor, + ccl_private float3 *N_) +{ + float3 eval = zero_float3(); + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { + ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; + eval += sc->weight * ao_factor; + N += bsdf->N * fabsf(average(sc->weight)); + } + } + + *N_ = (is_zero(N)) ? sd->N : normalize(N); + return eval; +} + +#ifdef __SUBSURFACE__ +ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd) +{ + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSSRDF(sc->type)) { + ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc; + float avg_weight = fabsf(average(sc->weight)); + + N += bssrdf->N * avg_weight; + } + } + + return (is_zero(N)) ? sd->N : normalize(N); +} +#endif /* __SUBSURFACE__ */ + +/* Constant emission optimization */ + +ccl_device bool shader_constant_emission_eval(KernelGlobals kg, + int shader, + ccl_private float3 *eval) +{ + int shader_index = shader & SHADER_MASK; + int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags; + + if (shader_flag & SD_HAS_CONSTANT_EMISSION) { + *eval = make_float3(kernel_tex_fetch(__shaders, shader_index).constant_emission[0], + kernel_tex_fetch(__shaders, shader_index).constant_emission[1], + kernel_tex_fetch(__shaders, shader_index).constant_emission[2]); + + return true; + } + + return false; +} + +/* Background */ + +ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_EMISSION) { + return sd->closure_emission_background; + } + else { + return zero_float3(); + } +} + +/* Emission */ + +ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_EMISSION) { + return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; + } + else { + return zero_float3(); + } +} + +/* Holdout */ + +ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd) +{ + float3 weight = zero_float3(); + + /* For objects marked as holdout, preserve transparency and remove all other + * closures, replacing them with a holdout weight. */ + if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { + if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) { + weight = one_float3() - sd->closure_transparent_extinction; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { + sc->type = NBUILTIN_CLOSURES; + } + } + + sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF)); + } + else { + weight = one_float3(); + } + } + else { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_HOLDOUT(sc->type)) { + weight += sc->weight; + } + } + } + + return weight; +} + +/* Surface Evaluation */ + +template<uint node_feature_mask, typename ConstIntegratorGenericState> +ccl_device void shader_eval_surface(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *ccl_restrict sd, + ccl_global float *ccl_restrict buffer, + uint32_t path_flag) +{ + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = kernel_data.max_closures; + } + + sd->num_closure = 0; + sd->num_closure_left = max_closures; + +#ifdef __OSL__ + if (kg->osl) { + if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) { + OSLShader::eval_background(kg, state, sd, path_flag); + } + else { + OSLShader::eval_surface(kg, state, sd, path_flag); + } + } + else +#endif + { +#ifdef __SVM__ + svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag); +#else + if (sd->object == OBJECT_NONE) { + sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f); + sd->flag |= SD_EMISSION; + } + else { + ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( + sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f)); + if (bsdf != NULL) { + bsdf->N = sd->N; + sd->flag |= bsdf_diffuse_setup(bsdf); + } + } +#endif + } +} + +/* Volume */ + +#ifdef __VOLUME__ + +ccl_device_inline float _shader_volume_phase_multi_eval( + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + const float3 omega_in, + int skip_phase, + ccl_private BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight) +{ + for (int i = 0; i < phases->num_closure; i++) { + if (i == skip_phase) + continue; + + ccl_private const ShaderVolumeClosure *svc = &phases->closure[i]; + float phase_pdf = 0.0f; + float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf); + + if (phase_pdf != 0.0f) { + bsdf_eval_accum(result_eval, false, eval, 1.0f); + sum_pdf += phase_pdf * svc->sample_weight; + } + + sum_sample_weight += svc->sample_weight; + } + + return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; +} + +ccl_device float shader_volume_phase_eval(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + const float3 omega_in, + ccl_private BsdfEval *phase_eval) +{ + bsdf_eval_init(phase_eval, false, zero_float3()); + + return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f); +} + +ccl_device int shader_volume_phase_sample(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + float randu, + float randv, + ccl_private BsdfEval *phase_eval, + ccl_private float3 *omega_in, + ccl_private differential3 *domega_in, + ccl_private float *pdf) +{ + int sampled = 0; + + if (phases->num_closure > 1) { + /* pick a phase closure based on sample weights */ + float sum = 0.0f; + + for (sampled = 0; sampled < phases->num_closure; sampled++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + sum += svc->sample_weight; + } + + float r = randu * sum; + float partial_sum = 0.0f; + + for (sampled = 0; sampled < phases->num_closure; sampled++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + float next_sum = partial_sum + svc->sample_weight; + + if (r <= next_sum) { + /* Rescale to reuse for BSDF direction sample. */ + randu = (r - partial_sum) / svc->sample_weight; + break; + } + + partial_sum = next_sum; + } + + if (sampled == phases->num_closure) { + *pdf = 0.0f; + return LABEL_NONE; + } + } + + /* todo: this isn't quite correct, we don't weight anisotropy properly + * depending on color channels, even if this is perhaps not a common case */ + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + int label; + float3 eval = zero_float3(); + + *pdf = 0.0f; + label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf); + + if (*pdf != 0.0f) { + bsdf_eval_init(phase_eval, false, eval); + } + + return label; +} + +ccl_device int shader_phase_sample_closure(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumeClosure *sc, + float randu, + float randv, + ccl_private BsdfEval *phase_eval, + ccl_private float3 *omega_in, + ccl_private differential3 *domega_in, + ccl_private float *pdf) +{ + int label; + float3 eval = zero_float3(); + + *pdf = 0.0f; + label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); + + if (*pdf != 0.0f) + bsdf_eval_init(phase_eval, false, eval); + + return label; +} + +/* Volume Evaluation */ + +template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState> +ccl_device_inline void shader_eval_volume(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *ccl_restrict sd, + const uint32_t path_flag, + StackReadOp stack_read) +{ + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = kernel_data.max_closures; + } + + /* reset closures once at the start, we will be accumulating the closures + * for all volumes in the stack into a single array of closures */ + sd->num_closure = 0; + sd->num_closure_left = max_closures; + sd->flag = 0; + sd->object_flag = 0; + + for (int i = 0;; i++) { + const VolumeStack entry = stack_read(i); + if (entry.shader == SHADER_NONE) { + break; + } + + /* Setup shader-data from stack. it's mostly setup already in + * shader_setup_from_volume, this switching should be quick. */ + sd->object = entry.object; + sd->lamp = LAMP_NONE; + sd->shader = entry.shader; + + sd->flag &= ~SD_SHADER_FLAGS; + sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->object_flag &= ~SD_OBJECT_FLAGS; + + if (sd->object != OBJECT_NONE) { + sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object); + +# ifdef __OBJECT_MOTION__ + /* todo: this is inefficient for motion blur, we should be + * caching matrices instead of recomputing them each step */ + shader_setup_object_transforms(kg, sd, sd->time); +# endif + } + + /* evaluate shader */ +# ifdef __SVM__ +# ifdef __OSL__ + if (kg->osl) { + OSLShader::eval_volume(kg, state, sd, path_flag); + } + else +# endif + { + svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>( + kg, state, sd, NULL, path_flag); + } +# endif + + /* Merge closures to avoid exceeding number of closures limit. */ + if (!shadow) { + if (i > 0) { + shader_merge_volume_closures(sd); + } + } + } +} + +#endif /* __VOLUME__ */ + +/* Displacement Evaluation */ + +template<typename ConstIntegratorGenericState> +ccl_device void shader_eval_displacement(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *sd) +{ + sd->num_closure = 0; + sd->num_closure_left = 0; + + /* this will modify sd->P */ +#ifdef __SVM__ +# ifdef __OSL__ + if (kg->osl) + OSLShader::eval_displacement(kg, state, sd); + else +# endif + { + svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>( + kg, state, sd, NULL, 0); + } +#endif +} + +/* Cryptomatte */ + +ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader) +{ + return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_shadow_catcher.h b/intern/cycles/kernel/integrator/integrator_shadow_catcher.h new file mode 100644 index 00000000000..24d03466393 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_shadow_catcher.h @@ -0,0 +1,120 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/integrator/integrator_path_state.h" +#include "kernel/integrator/integrator_state_util.h" + +CCL_NAMESPACE_BEGIN + +/* Check whether current surface bounce is where path is to be split for the shadow catcher. */ +ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals kg, + IntegratorState state, + const int object_flag) +{ +#ifdef __SHADOW_CATCHER__ + if (!kernel_data.integrator.has_shadow_catcher) { + return false; + } + + /* Check the flag first, avoiding fetches form global memory. */ + if ((object_flag & SD_OBJECT_SHADOW_CATCHER) == 0) { + return false; + } + if (object_flag & SD_OBJECT_HOLDOUT_MASK) { + return false; + } + + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + + if ((path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) == 0) { + /* Split only on primary rays, secondary bounces are to treat shadow catcher as a regular + * object. */ + return false; + } + + if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) { + return false; + } + + return true; +#else + (void)object_flag; + return false; +#endif +} + +/* Check whether the current path can still split. */ +ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg, + ConstIntegratorState state) +{ + if (INTEGRATOR_PATH_IS_TERMINATED) { + return false; + } + + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + + if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) { + /* Shadow catcher was already hit and the state was split. No further split is allowed. */ + return false; + } + + return (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) != 0; +} + +/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths + * after this function. */ +ccl_device_inline bool kernel_shadow_catcher_split(KernelGlobals kg, + IntegratorState state, + const int object_flags) +{ +#ifdef __SHADOW_CATCHER__ + + if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) { + return false; + } + + /* The split is to be done. Mark the current state as such, so that it stops contributing to the + * shadow catcher matte pass, but keeps contributing to the combined pass. */ + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT; + + /* Split new state from the current one. This new state will only track contribution of shadow + * catcher objects ignoring non-catcher objects. */ + integrator_state_shadow_catcher_split(kg, state); + + return true; +#else + (void)object_flags; + return false; +#endif +} + +#ifdef __SHADOW_CATCHER__ + +ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(const uint32_t path_flag) +{ + return (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) == 0; +} + +ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t path_flag) +{ + return path_flag & PATH_RAY_SHADOW_CATCHER_PASS; +} + +#endif /* __SHADOW_CATCHER__ */ + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h index 6e6b7f8a40f..0b1f67daa92 100644 --- a/intern/cycles/kernel/integrator/integrator_state_util.h +++ b/intern/cycles/kernel/integrator/integrator_state_util.h @@ -17,7 +17,8 @@ #pragma once #include "kernel/integrator/integrator_state.h" -#include "kernel/kernel_differential.h" + +#include "kernel/util/util_differential.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h index e3bf9db80f7..9560641c460 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface.h @@ -16,9 +16,7 @@ #pragma once -#include "kernel/kernel_path_state.h" -#include "kernel/kernel_projection.h" -#include "kernel/kernel_shader.h" +#include "kernel/camera/camera_projection.h" #include "kernel/bvh/bvh.h" @@ -29,6 +27,8 @@ #include "kernel/closure/volume.h" #include "kernel/integrator/integrator_intersect_volume_stack.h" +#include "kernel/integrator/integrator_path_state.h" +#include "kernel/integrator/integrator_shader_eval.h" #include "kernel/integrator/integrator_subsurface_disk.h" #include "kernel/integrator/integrator_subsurface_random_walk.h" diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h index 2ab6d0961e3..b98acda1f4d 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel/kernel_projection.h" +#include "kernel/camera/camera_projection.h" #include "kernel/bvh/bvh.h" |