Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-11-22 22:41:19 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-11-22 23:02:46 +0300
commit29681f186e1a6865da0b4936805df5a608b90ee9 (patch)
treea3a4e9dc61195261c3409259dc2d1e4ddf31b976 /intern/cycles/kernel
parent73b1ad1920e1bfe47613744e32b67f39108df2b1 (diff)
Fix T93283: Cycles render error with CUDA CPU + GPU after recent optimization
BVH2 triangle intersection was broken on the GPU since packed floats can't be loaded directly into SSE. The better long term solution for performance would be to build a BVH2 for GPU and Embree for CPU, similar to what we do for OptiX.
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/geom/motion_triangle_intersect.h28
-rw-r--r--intern/cycles/kernel/geom/triangle_intersect.h41
-rw-r--r--intern/cycles/kernel/light/light.h14
3 files changed, 5 insertions, 78 deletions
diff --git a/intern/cycles/kernel/geom/motion_triangle_intersect.h b/intern/cycles/kernel/geom/motion_triangle_intersect.h
index 256e7add21e..72ad237eeeb 100644
--- a/intern/cycles/kernel/geom/motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h
@@ -163,19 +163,7 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if (ray_triangle_intersect(P,
- dir,
- tmax,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- (ssef *)verts,
-#else
- verts[0],
- verts[1],
- verts[2],
-#endif
- &u,
- &v,
- &t)) {
+ if (ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
#ifdef __VISIBILITY_FLAG__
/* Visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags.
@@ -229,19 +217,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
motion_triangle_vertices(kg, local_object, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if (!ray_triangle_intersect(P,
- dir,
- tmax,
-# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- (ssef *)verts,
-# else
- verts[0],
- verts[1],
- verts[2],
-# endif
- &u,
- &v,
- &t)) {
+ if (!ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
return false;
}
diff --git a/intern/cycles/kernel/geom/triangle_intersect.h b/intern/cycles/kernel/geom/triangle_intersect.h
index 720eceec4ed..57a6ae7fe72 100644
--- a/intern/cycles/kernel/geom/triangle_intersect.h
+++ b/intern/cycles/kernel/geom/triangle_intersect.h
@@ -37,27 +37,11 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
{
const int prim = kernel_tex_fetch(__prim_index, prim_addr);
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
-#else
const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
-#endif
float t, u, v;
- if (ray_triangle_intersect(P,
- dir,
- tmax,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- ssef_verts,
-#else
- tri_a,
- tri_b,
- tri_c,
-#endif
- &u,
- &v,
- &t)) {
+ if (ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
#ifdef __VISIBILITY_FLAG__
/* Visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags.
@@ -106,27 +90,11 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
const int prim = kernel_tex_fetch(__prim_index, prim_addr);
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
-# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
-# else
const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
-# endif
float t, u, v;
- if (!ray_triangle_intersect(P,
- dir,
- tmax,
-# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- ssef_verts,
-# else
- tri_a,
- tri_b,
- tri_c,
-# endif
- &u,
- &v,
- &t)) {
+ if (!ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
return false;
}
@@ -178,11 +146,6 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
isect->t = t;
/* Record geometric normal. */
-# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
- tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
- tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
-# endif
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
return false;
diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h
index 2e7f862a715..3f7d0e0899e 100644
--- a/intern/cycles/kernel/light/light.h
+++ b/intern/cycles/kernel/light/light.h
@@ -676,19 +676,7 @@ ccl_device_forceinline void triangle_light_sample(KernelGlobals kg,
ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B);
/* calculate intersection with the planar triangle */
- if (!ray_triangle_intersect(P,
- ls->D,
- FLT_MAX,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
- (ssef *)V,
-#else
- V[0],
- V[1],
- V[2],
-#endif
- &ls->u,
- &ls->v,
- &ls->t)) {
+ if (!ray_triangle_intersect(P, ls->D, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) {
ls->pdf = 0.0f;
return;
}