11 files changed, 33 insertions, 72 deletions
diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h
index ea86523c0f6..39c3ecd78c0 100644
--- a/intern/cycles/kernel/bvh/util.h
+++ b/intern/cycles/kernel/bvh/util.h
@@ -21,54 +21,22 @@ CCL_NAMESPACE_BEGIN
 /* Ray offset to avoid self intersection.
  *
  * This function should be used to compute a modified ray start position for
- * rays leaving from a surface. */
-
+ * rays leaving from a surface. This is from "A Fast and Robust Method for Avoiding
+ * Self-Intersection" see https://research.nvidia.com/publication/2019-03_A-Fast-and
+ */
 ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
 {
-#ifdef __INTERSECTION_REFINE__
-  const float epsilon_f = 1e-5f;
-  /* ideally this should match epsilon_f, but instancing and motion blur
-   * precision makes it problematic */
-  const float epsilon_test = 1.0f;
-  const int epsilon_i = 32;
-
-  float3 res;
-
-  /* x component */
-  if (fabsf(P.x) < epsilon_test) {
-    res.x = P.x + Ng.x * epsilon_f;
-  }
-  else {
-    uint ix = __float_as_uint(P.x);
-    ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i;
-    res.x = __uint_as_float(ix);
-  }
-
-  /* y component */
-  if (fabsf(P.y) < epsilon_test) {
-    res.y = P.y + Ng.y * epsilon_f;
-  }
-  else {
-    uint iy = __float_as_uint(P.y);
-    iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i;
-    res.y = __uint_as_float(iy);
-  }
-
-  /* z component */
-  if (fabsf(P.z) < epsilon_test) {
-    res.z = P.z + Ng.z * epsilon_f;
-  }
-  else {
-    uint iz = __float_as_uint(P.z);
-    iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i;
-    res.z = __uint_as_float(iz);
-  }
-
-  return res;
-#else
-  const float epsilon_f = 1e-4f;
-  return P + epsilon_f * Ng;
-#endif
+  const float int_scale = 256.0f;
+  int3 of_i = make_int3((int)(int_scale * Ng.x), (int)(int_scale * Ng.y), (int)(int_scale * Ng.z));
+
+  float3 p_i = make_float3(__int_as_float(__float_as_int(P.x) + ((P.x < 0) ? -of_i.x : of_i.x)),
+                           __int_as_float(__float_as_int(P.y) + ((P.y < 0) ? -of_i.y : of_i.y)),
+                           __int_as_float(__float_as_int(P.z) + ((P.z < 0) ? -of_i.z : of_i.z)));
+  const float origin = 1.0f / 32.0f;
+  const float float_scale = 1.0f / 65536.0f;
+  return make_float3(fabsf(P.x) < origin ? P.x + float_scale * Ng.x : p_i.x,
+                     fabsf(P.y) < origin ? P.y + float_scale * Ng.y : p_i.y,
+                     fabsf(P.z) < origin ? P.z + float_scale * Ng.z : p_i.z);
 }
 
 #if defined(__KERNEL_CPU__)
diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h
index aa7be879995..ee3d82ebacb 100644
--- a/intern/cycles/kernel/integrator/intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h
@@ -71,7 +71,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
     volume_stack_enter_exit(kg, state, stack_sd);
 
     /* Move ray forward. */
-    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+    volume_ray.P = stack_sd->P;
     if (volume_ray.t != FLT_MAX) {
       volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t);
     }
@@ -210,7 +210,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
     }
 
     /* Move ray forward. */
-    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+    volume_ray.P = stack_sd->P;
     ++step;
   }
 #endif
diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h
index 97ca430752c..0a82c9cadef 100644
--- a/intern/cycles/kernel/integrator/shade_light.h
+++ b/intern/cycles/kernel/integrator/shade_light.h
@@ -37,8 +37,9 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
 
   /* Advance ray beyond light. */
   /* TODO: can we make this more numerically robust to avoid reintersecting the
-   * same light in some cases? */
-  const float3 new_ray_P = ray_offset(ray_P + ray_D * isect.t, ray_D);
+   * same light in some cases? Ray should not intersect surface anymore as the
+   * object and prim ids will prevent self intersection. */
+  const float3 new_ray_P = ray_P + ray_D * isect.t;
   INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P;
   INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t;
 
@@ -46,7 +47,7 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
   const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
   ray_P -= ray_D * mis_ray_t;
   isect.t += mis_ray_t;
-  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = mis_ray_t + isect.t;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = isect.t;
 
   LightSample ls ccl_optional_struct_init;
   const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h
index 10ec48c8637..3e8eba29ef7 100644
--- a/intern/cycles/kernel/integrator/shade_shadow.h
+++ b/intern/cycles/kernel/integrator/shade_shadow.h
@@ -152,7 +152,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
     const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t);
     const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P);
     const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D);
-    INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D);
+    INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_P + last_hit_t * ray_D;
     INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t;
   }
 
diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h
index 3ca9e773591..329380fc464 100644
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -271,13 +271,11 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
   }
 
   /* Setup ray. Note that clipping works through transparent bounces. */
-  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P,
-                                                     (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
   INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
   INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
                                               INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
                                               FLT_MAX;
-
 #ifdef __RAY_DIFFERENTIALS__
   INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
   INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
@@ -321,7 +319,7 @@ ccl_device_forceinline bool integrate_surface_volume_only_bounce(IntegratorState
   }
 
   /* Setup ray position, direction stays unchanged. */
-  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P, -sd->Ng);
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
 
   /* Clipping works through transparent. */
   INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
@@ -365,7 +363,7 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
   }
 
   Ray ray ccl_optional_struct_init;
-  ray.P = ray_offset(sd->P, sd->Ng);
+  ray.P = sd->P;
   ray.D = ao_D;
   ray.t = kernel_data.integrator.ao_bounces_distance;
   ray.time = sd->time;
diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index 107d5ec1795..e8d7bfa1374 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -877,7 +877,6 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
   INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
   INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
   INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
-
 #  ifdef __RAY_DIFFERENTIALS__
   INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
   INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h
index 5a09fc51d75..6c0f815afea 100644
--- a/intern/cycles/kernel/integrator/subsurface.h
+++ b/intern/cycles/kernel/integrator/subsurface.h
@@ -164,10 +164,8 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
 
     if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
       float3 P = INTEGRATOR_STATE(state, ray, P);
-      const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
-      const float3 offset_P = ray_offset(P, -Ng);
 
-      integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P);
+      integrator_volume_stack_update_for_subsurface(kg, state, P, ray.P);
     }
   }
 #  endif /* __VOLUME__ */
diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h
index 43676fccfe5..993c54d9050 100644
--- a/intern/cycles/kernel/integrator/subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h
@@ -206,7 +206,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
   }
 
   /* Setup ray. */
-  ray.P = ray_offset(P, -Ng);
+  ray.P = P;
   ray.D = D;
   ray.t = FLT_MAX;
   ray.time = time;
@@ -421,13 +421,6 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
     if (hit) {
       t = ray.t;
     }
-    else if (bounce == 0) {
-      /* Restore original position if nothing was hit after the first bounce,
-       * without the ray_offset() that was added to avoid self-intersection.
-       * Otherwise if that offset is relatively large compared to the scattering
-       * radius, we never go back up high enough to exit the surface. */
-      ray.P = P;
-    }
 
     /* Advance to new scatter location. */
     ray.P += t * ray.D;
diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h
index 65e87e77c36..521ad2f7066 100644
--- a/intern/cycles/kernel/light/sample.h
+++ b/intern/cycles/kernel/light/sample.h
@@ -198,7 +198,7 @@ ccl_device_inline float3 shadow_ray_offset(KernelGlobals kg,
   float NL = dot(sd->N, L);
   bool transmit = (NL < 0.0f);
   float3 Ng = (transmit ? -sd->Ng : sd->Ng);
-  float3 P = ray_offset(sd->P, Ng);
+  float3 P = sd->P;
 
   if ((sd->type & PRIMITIVE_TRIANGLE) && (sd->shader & SHADER_SMOOTH_NORMAL)) {
     const float offset_cutoff =
@@ -243,7 +243,7 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri
     }
     else {
       /* other lights, avoid self-intersection */
-      ray->D = ray_offset(ls->P, ls->Ng) - P;
+      ray->D = ls->P - P;
       ray->D = normalize_len(ray->D, &ray->t);
     }
   }
diff --git a/intern/cycles/kernel/svm/ao.h b/intern/cycles/kernel/svm/ao.h
index e3abc9d69ff..dcb1a79717d 100644
--- a/intern/cycles/kernel/svm/ao.h
+++ b/intern/cycles/kernel/svm/ao.h
@@ -70,7 +70,7 @@ ccl_device float svm_ao(
 
     /* Create ray. */
     Ray ray;
-    ray.P = ray_offset(sd->P, N);
+    ray.P = sd->P;
     ray.D = D.x * T + D.y * B + D.z * N;
     ray.t = max_dist;
     ray.time = sd->time;
diff --git a/tests/python/cycles_render_tests.py b/tests/python/cycles_render_tests.py
index 98b2cd4a200..7272cd4660b 100644
--- a/tests/python/cycles_render_tests.py
+++ b/tests/python/cycles_render_tests.py
@@ -36,8 +36,12 @@ BLACKLIST_GPU = [
     'hair_instancer_uv.blend',
     'hair_length_info.blend',
     'hair_particle_random.blend',
+    "hair_transmission.blend",
     'principled_hair_.*.blend',
     'transparent_shadow_hair.*.blend',
+    # Inconsistent handling of overlapping objects.
+    "T41143.blend",
+    "visibility_particles.blend",
 ]