1 files changed, 519 insertions, 0 deletions
diff --git a/intern/cycles/kernel/integrator/volume_shader.h b/intern/cycles/kernel/integrator/volume_shader.h
new file mode 100644
index 00000000000..0ff968723a1
--- /dev/null
+++ b/intern/cycles/kernel/integrator/volume_shader.h
@@ -0,0 +1,519 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+/* Volume shader evaluation and sampling. */
+
+#pragma once
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/emissive.h"
+
+#ifdef __SVM__
+#  include "kernel/svm/svm.h"
+#endif
+#ifdef __OSL__
+#  include "kernel/osl/osl.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __VOLUME__
+
+/* Merging */
+
+ccl_device_inline void volume_shader_merge_closures(ccl_private ShaderData *sd)
+{
+  /* Merge identical closures to save closure space with stacked volumes. */
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private ShaderClosure *sci = &sd->closure[i];
+
+    if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+      continue;
+    }
+
+    for (int j = i + 1; j < sd->num_closure; j++) {
+      ccl_private ShaderClosure *scj = &sd->closure[j];
+      if (sci->type != scj->type) {
+        continue;
+      }
+
+      ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *)
+          sci;
+      ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *)
+          scj;
+      if (!(hgi->g == hgj->g)) {
+        continue;
+      }
+
+      sci->weight += scj->weight;
+      sci->sample_weight += scj->sample_weight;
+
+      int size = sd->num_closure - (j + 1);
+      if (size > 0) {
+        for (int k = 0; k < size; k++) {
+          scj[k] = scj[k + 1];
+        }
+      }
+
+      sd->num_closure--;
+      kernel_assert(sd->num_closure >= 0);
+      j--;
+    }
+  }
+}
+
+ccl_device_inline void volume_shader_copy_phases(ccl_private ShaderVolumePhases *ccl_restrict
+                                                     phases,
+                                                 ccl_private const ShaderData *ccl_restrict sd)
+{
+  phases->num_closure = 0;
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *from_sc = &sd->closure[i];
+    ccl_private const HenyeyGreensteinVolume *from_hg =
+        (ccl_private const HenyeyGreensteinVolume *)from_sc;
+
+    if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+      ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
+
+      to_sc->weight = from_sc->weight;
+      to_sc->sample_weight = from_sc->sample_weight;
+      to_sc->g = from_hg->g;
+      phases->num_closure++;
+      if (phases->num_closure >= MAX_VOLUME_CLOSURE) {
+        break;
+      }
+    }
+  }
+}
+
+/* Guiding */
+
+#  ifdef __PATH_GUIDING__
+ccl_device_inline void volume_shader_prepare_guiding(KernelGlobals kg,
+                                                     IntegratorState state,
+                                                     ccl_private ShaderData *sd,
+                                                     ccl_private const RNGState *rng_state,
+                                                     const float3 P,
+                                                     const float3 D,
+                                                     ccl_private ShaderVolumePhases *phases,
+                                                     const VolumeSampleMethod direct_sample_method)
+{
+  /* Have any phase functions to guide? */
+  const int num_phases = phases->num_closure;
+  if (!kernel_data.integrator.use_volume_guiding || num_phases == 0) {
+    state->guiding.use_volume_guiding = false;
+    return;
+  }
+
+  const float volume_guiding_probability = kernel_data.integrator.volume_guiding_probability;
+  float rand_phase_guiding = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_GUIDING);
+
+  /* If we have more than one phase function we select one random based on its
+   * sample weight to calculate the product distribution for guiding. */
+  int phase_id = 0;
+  float phase_weight = 1.0f;
+
+  if (num_phases > 1) {
+    /* Pick a phase closure based on sample weights. */
+    float sum = 0.0f;
+
+    for (phase_id = 0; phase_id < num_phases; phase_id++) {
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[phase_id];
+      sum += svc->sample_weight;
+    }
+
+    float r = rand_phase_guiding * sum;
+    float partial_sum = 0.0f;
+
+    for (phase_id = 0; phase_id < num_phases; phase_id++) {
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[phase_id];
+      float next_sum = partial_sum + svc->sample_weight;
+
+      if (r <= next_sum) {
+        /* Rescale to reuse. */
+        rand_phase_guiding = (r - partial_sum) / svc->sample_weight;
+        phase_weight = svc->sample_weight / sum;
+        break;
+      }
+
+      partial_sum = next_sum;
+    }
+
+    /* Adjust the sample weight of the component used for guiding. */
+    phases->closure[phase_id].sample_weight *= volume_guiding_probability;
+  }
+
+  /* Init guiding for selected phase function. */
+  ccl_private const ShaderVolumeClosure *svc = &phases->closure[phase_id];
+  if (!guiding_phase_init(kg, state, P, D, svc->g, rand_phase_guiding)) {
+    state->guiding.use_volume_guiding = false;
+    return;
+  }
+
+  state->guiding.use_volume_guiding = true;
+  state->guiding.sample_volume_guiding_rand = rand_phase_guiding;
+  state->guiding.volume_guiding_sampling_prob = volume_guiding_probability * phase_weight;
+
+  kernel_assert(state->guiding.volume_guiding_sampling_prob > 0.0f &&
+                state->guiding.volume_guiding_sampling_prob <= 1.0f);
+}
+#  endif
+
+/* Phase Evaluation & Sampling */
+
+/* Randomly sample a volume phase function proportional to ShaderClosure.sample_weight. */
+ccl_device_inline ccl_private const ShaderVolumeClosure *volume_shader_phase_pick(
+    ccl_private const ShaderVolumePhases *phases, ccl_private float2 *rand_phase)
+{
+  int sampled = 0;
+
+  if (phases->num_closure > 1) {
+    /* pick a phase closure based on sample weights */
+    float sum = 0.0f;
+
+    for (int i = 0; i < phases->num_closure; i++) {
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
+      sum += svc->sample_weight;
+    }
+
+    float r = (*rand_phase).x * sum;
+    float partial_sum = 0.0f;
+
+    for (int i = 0; i < phases->num_closure; i++) {
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
+      float next_sum = partial_sum + svc->sample_weight;
+
+      if (r <= next_sum) {
+        /* Rescale to reuse for volume phase direction sample. */
+        sampled = i;
+        (*rand_phase).x = (r - partial_sum) / svc->sample_weight;
+        break;
+      }
+
+      partial_sum = next_sum;
+    }
+  }
+
+  /* todo: this isn't quite correct, we don't weight anisotropy properly
+   * depending on color channels, even if this is perhaps not a common case */
+  return &phases->closure[sampled];
+}
+
+ccl_device_inline float _volume_shader_phase_eval_mis(ccl_private const ShaderData *sd,
+                                                      ccl_private const ShaderVolumePhases *phases,
+                                                      const float3 omega_in,
+                                                      int skip_phase,
+                                                      ccl_private BsdfEval *result_eval,
+                                                      float sum_pdf,
+                                                      float sum_sample_weight)
+{
+  for (int i = 0; i < phases->num_closure; i++) {
+    if (i == skip_phase)
+      continue;
+
+    ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
+    float phase_pdf = 0.0f;
+    Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
+
+    if (phase_pdf != 0.0f) {
+      bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+      sum_pdf += phase_pdf * svc->sample_weight;
+    }
+
+    sum_sample_weight += svc->sample_weight;
+  }
+
+  return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
+}
+
+ccl_device float volume_shader_phase_eval(KernelGlobals kg,
+                                          ccl_private const ShaderData *sd,
+                                          ccl_private const ShaderVolumeClosure *svc,
+                                          const float3 omega_in,
+                                          ccl_private BsdfEval *phase_eval)
+{
+  float phase_pdf = 0.0f;
+  Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
+
+  if (phase_pdf != 0.0f) {
+    bsdf_eval_accum(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+  }
+
+  return phase_pdf;
+}
+
+ccl_device float volume_shader_phase_eval(KernelGlobals kg,
+                                          IntegratorState state,
+                                          ccl_private const ShaderData *sd,
+                                          ccl_private const ShaderVolumePhases *phases,
+                                          const float3 omega_in,
+                                          ccl_private BsdfEval *phase_eval)
+{
+  bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum());
+
+  float pdf = _volume_shader_phase_eval_mis(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
+
+#  if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+  if (state->guiding.use_volume_guiding) {
+    const float guiding_sampling_prob = state->guiding.volume_guiding_sampling_prob;
+    const float guide_pdf = guiding_phase_pdf(kg, state, omega_in);
+    pdf = (guiding_sampling_prob * guide_pdf) + (1.0f - guiding_sampling_prob) * pdf;
+  }
+#  endif
+
+  return pdf;
+}
+
+#  ifdef __PATH_GUIDING__
+ccl_device int volume_shader_phase_guided_sample(KernelGlobals kg,
+                                                 IntegratorState state,
+                                                 ccl_private const ShaderData *sd,
+                                                 ccl_private const ShaderVolumeClosure *svc,
+                                                 const float2 rand_phase,
+                                                 ccl_private BsdfEval *phase_eval,
+                                                 ccl_private float3 *omega_in,
+                                                 ccl_private float *phase_pdf,
+                                                 ccl_private float *unguided_phase_pdf,
+                                                 ccl_private float *sampled_roughness)
+{
+  const bool use_volume_guiding = state->guiding.use_volume_guiding;
+  const float guiding_sampling_prob = state->guiding.volume_guiding_sampling_prob;
+
+  /* Decide between sampling guiding distribution and phase. */
+  float rand_phase_guiding = state->guiding.sample_volume_guiding_rand;
+  bool sample_guiding = false;
+  if (use_volume_guiding && rand_phase_guiding < guiding_sampling_prob) {
+    sample_guiding = true;
+    rand_phase_guiding /= guiding_sampling_prob;
+  }
+  else {
+    rand_phase_guiding -= guiding_sampling_prob;
+    rand_phase_guiding /= (1.0f - guiding_sampling_prob);
+  }
+
+  /* Initialize to zero. */
+  int label = LABEL_NONE;
+  Spectrum eval = zero_spectrum();
+
+  *unguided_phase_pdf = 0.0f;
+  float guide_pdf = 0.0f;
+  *sampled_roughness = 1.0f - fabsf(svc->g);
+
+  bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum());
+
+  if (sample_guiding) {
+    /* Sample guiding distribution. */
+    guide_pdf = guiding_phase_sample(kg, state, rand_phase, omega_in);
+    *phase_pdf = 0.0f;
+
+    if (guide_pdf != 0.0f) {
+      *unguided_phase_pdf = volume_shader_phase_eval(kg, sd, svc, *omega_in, phase_eval);
+      *phase_pdf = (guiding_sampling_prob * guide_pdf) +
+                   ((1.0f - guiding_sampling_prob) * (*unguided_phase_pdf));
+      label = LABEL_VOLUME_SCATTER;
+    }
+  }
+  else {
+    /* Sample phase. */
+    *phase_pdf = 0.0f;
+    label = volume_phase_sample(
+        sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, unguided_phase_pdf);
+
+    if (*unguided_phase_pdf != 0.0f) {
+      bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+
+      *phase_pdf = *unguided_phase_pdf;
+      if (use_volume_guiding) {
+        guide_pdf = guiding_phase_pdf(kg, state, *omega_in);
+        *phase_pdf *= 1.0f - guiding_sampling_prob;
+        *phase_pdf += guiding_sampling_prob * guide_pdf;
+      }
+
+      kernel_assert(reduce_min(bsdf_eval_sum(phase_eval)) >= 0.0f);
+    }
+    else {
+      bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum());
+    }
+
+    kernel_assert(reduce_min(bsdf_eval_sum(phase_eval)) >= 0.0f);
+  }
+
+  return label;
+}
+#  endif
+
+ccl_device int volume_shader_phase_sample(KernelGlobals kg,
+                                          ccl_private const ShaderData *sd,
+                                          ccl_private const ShaderVolumePhases *phases,
+                                          ccl_private const ShaderVolumeClosure *svc,
+                                          float2 rand_phase,
+                                          ccl_private BsdfEval *phase_eval,
+                                          ccl_private float3 *omega_in,
+                                          ccl_private float *pdf,
+                                          ccl_private float *sampled_roughness)
+{
+  *sampled_roughness = 1.0f - fabsf(svc->g);
+  Spectrum eval = zero_spectrum();
+
+  *pdf = 0.0f;
+  int label = volume_phase_sample(sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf);
+
+  if (*pdf != 0.0f) {
+    bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+  }
+
+  return label;
+}
+
+/* Motion Blur */
+
+#  ifdef __OBJECT_MOTION__
+ccl_device_inline void volume_shader_motion_blur(KernelGlobals kg,
+                                                 ccl_private ShaderData *ccl_restrict sd)
+{
+  if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) == 0) {
+    return;
+  }
+
+  AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY);
+  kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND);
+
+  const float3 P = sd->P;
+  const float velocity_scale = kernel_data_fetch(objects, sd->object).velocity_scale;
+  const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ? 0.5f :
+                                                                                        0.0f;
+  const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ?
+                         (1.0f - kernel_data.cam.shuttertime) + sd->time :
+                         sd->time;
+
+  /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity
+   * existed, or will exist, at the given time:
+   *
+   * `phi(x, T) = phi(x - (T - t) * u(x, T), t)`
+   *
+   * where
+   *
+   * x : position
+   * T : super-sampled time (or ray time)
+   * t : current time of the simulation (in rendering we assume this is center frame with
+   * relative time = 0)
+   * phi : the volume quantity
+   * u : the velocity field
+   *
+   * But first we need to determine the velocity field `u(x, T)`, which we can estimate also
+   * using semi-lagrangian advection.
+   *
+   * `u(x, T) = u(x - (T - t) * u(x, T), t)`
+   *
+   * This is the typical way to model self-advection in fluid dynamics, however, we do not
+   * account for other forces affecting the velocity during simulation (pressure, buoyancy,
+   * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better
+   * results, a higher order interpolation scheme can be used (at the cost of more lookups),
+   * or an interpolation of the velocity fields for the previous and next frames could also
+   * be used to estimate `u(x, T)` (which will cost more memory and lookups).
+   *
+   * References:
+   * "Eulerian Motion Blur", Kim and Ko, 2007
+   * "Production Volume Rendering", Wreninge et al., 2012
+   */
+
+  /* Find velocity. */
+  float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
+  object_dir_transform(kg, sd, &velocity);
+
+  /* Find advected P. */
+  sd->P = P - (time - time_offset) * velocity_scale * velocity;
+
+  /* Find advected velocity. */
+  velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
+  object_dir_transform(kg, sd, &velocity);
+
+  /* Find advected P. */
+  sd->P = P - (time - time_offset) * velocity_scale * velocity;
+}
+#  endif
+
+/* Volume Evaluation */
+
+template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
+ccl_device_inline void volume_shader_eval(KernelGlobals kg,
+                                          ConstIntegratorGenericState state,
+                                          ccl_private ShaderData *ccl_restrict sd,
+                                          const uint32_t path_flag,
+                                          StackReadOp stack_read)
+{
+  /* If path is being terminated, we are tracing a shadow ray or evaluating
+   * emission, then we don't need to store closures. The emission and shadow
+   * shader data also do not have a closure array to save GPU memory. */
+  int max_closures;
+  if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+    max_closures = 0;
+  }
+  else {
+    max_closures = kernel_data.max_closures;
+  }
+
+  /* reset closures once at the start, we will be accumulating the closures
+   * for all volumes in the stack into a single array of closures */
+  sd->num_closure = 0;
+  sd->num_closure_left = max_closures;
+  sd->flag = 0;
+  sd->object_flag = 0;
+
+  for (int i = 0;; i++) {
+    const VolumeStack entry = stack_read(i);
+    if (entry.shader == SHADER_NONE) {
+      break;
+    }
+
+    /* Setup shader-data from stack. it's mostly setup already in
+     * shader_setup_from_volume, this switching should be quick. */
+    sd->object = entry.object;
+    sd->lamp = LAMP_NONE;
+    sd->shader = entry.shader;
+
+    sd->flag &= ~SD_SHADER_FLAGS;
+    sd->flag |= kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags;
+    sd->object_flag &= ~SD_OBJECT_FLAGS;
+
+    if (sd->object != OBJECT_NONE) {
+      sd->object_flag |= kernel_data_fetch(object_flag, sd->object);
+
+#  ifdef __OBJECT_MOTION__
+      /* todo: this is inefficient for motion blur, we should be
+       * caching matrices instead of recomputing them each step */
+      shader_setup_object_transforms(kg, sd, sd->time);
+
+      volume_shader_motion_blur(kg, sd);
+#  endif
+    }
+
+    /* evaluate shader */
+#  ifdef __OSL__
+    if (kg->osl) {
+      OSLShader::eval_volume(kg, state, sd, path_flag);
+    }
+    else
+#  endif
+    {
+#  ifdef __SVM__
+      svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>(
+          kg, state, sd, NULL, path_flag);
+#  endif
+    }
+
+    /* Merge closures to avoid exceeding number of closures limit. */
+    if (!shadow) {
+      if (i > 0) {
+        volume_shader_merge_closures(sd);
+      }
+    }
+  }
+}
+
+#endif /* __VOLUME__ */
+
+CCL_NAMESPACE_END