Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/geom')
-rw-r--r--intern/cycles/kernel/geom/geom.h2
-rw-r--r--intern/cycles/kernel/geom/geom_attribute.h8
-rw-r--r--intern/cycles/kernel/geom/geom_bvh.h113
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_shadow.h6
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_subsurface.h4
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_traversal.h9
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_volume.h4
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_volume_all.h454
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h130
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle.h44
-rw-r--r--intern/cycles/kernel/geom/geom_object.h75
-rw-r--r--intern/cycles/kernel/geom/geom_primitive.h38
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_shadow.h10
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_subsurface.h4
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_traversal.h11
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_volume.h8
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_volume_all.h446
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h42
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h114
-rw-r--r--intern/cycles/kernel/geom/geom_volume.h2
20 files changed, 1271 insertions, 253 deletions
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index bf0d86e6206..5ab900d47aa 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -22,7 +22,9 @@
#define BVH_STACK_SIZE 192
#define BVH_QSTACK_SIZE 384
#define BVH_NODE_SIZE 4
+#define BVH_NODE_LEAF_SIZE 1
#define BVH_QNODE_SIZE 7
+#define BVH_QNODE_LEAF_SIZE 1
#define TRI_NODE_SIZE 3
/* silly workaround for float extended precision that happens when compiling
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index 9ac16e86085..c7364e9edac 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -29,13 +29,13 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem)
{
- if(sd->object == PRIM_NONE)
+ if(ccl_fetch(sd, object) == PRIM_NONE)
return (int)ATTR_STD_NOT_FOUND;
/* for SVM, find attribute by unique id */
- uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride;
+ uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride;
#ifdef __HAIR__
- attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset;
+ attr_offset = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset;
#endif
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
@@ -49,7 +49,7 @@ ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, ui
*elem = (AttributeElement)attr_map.y;
- if(sd->prim == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH)
+ if(ccl_fetch(sd, prim) == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH)
return ATTR_STD_NOT_FOUND;
/* return result */
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index c0eefcd9c7f..3d0d406dd0b 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -115,7 +115,39 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_subsurface.h"
#endif
-/* Record all BVH intersection for shadows */
+/* Volume BVH traversal */
+
+#if defined(__VOLUME__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume
+#define BVH_FUNCTION_FEATURES 0
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__INSTANCING__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__HAIR__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_hair
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
+#include "geom_bvh_volume.h"
+#endif
+
+/* Record all intersections - Shadow BVH traversal */
#if defined(__SHADOW_RECORD_ALL__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all
@@ -147,36 +179,36 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_shadow.h"
#endif
-/* Camera inside Volume BVH intersection */
+/* Record all intersections - Volume BVH traversal */
-#if defined(__VOLUME__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume
+#if defined(__VOLUME_RECORD_ALL__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all
#define BVH_FUNCTION_FEATURES 0
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
-#if defined(__VOLUME__) && defined(__INSTANCING__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
+#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
#define BVH_FUNCTION_FEATURES BVH_INSTANCING
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
-#if defined(__VOLUME__) && defined(__HAIR__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume_hair
+#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
-#if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
-#if defined(__VOLUME__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion
+#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
#undef BVH_FEATURE
@@ -330,6 +362,37 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
}
#endif
+#ifdef __VOLUME_RECORD_ALL__
+ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect,
+ const uint max_hits)
+{
+#ifdef __OBJECT_MOTION__
+ if(kernel_data.bvh.have_motion) {
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_volume_all_hair_motion(kg, ray, isect, max_hits);
+#endif /* __HAIR__ */
+
+ return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits);
+ }
+#endif /* __OBJECT_MOTION__ */
+
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_volume_all_hair(kg, ray, isect, max_hits);
+#endif /* __HAIR__ */
+
+#ifdef __INSTANCING__
+ if(kernel_data.bvh.have_instancing)
+ return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits);
+#endif /* __INSTANCING__ */
+
+ return bvh_intersect_volume_all(kg, ray, isect, max_hits);
+}
+#endif
+
/* Ray offset to avoid self intersection.
*
@@ -384,5 +447,21 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
#endif
}
+#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__)
+/* ToDo: Move to another file? */
+ccl_device int intersections_compare(const void *a, const void *b)
+{
+ const Intersection *isect_a = (const Intersection*)a;
+ const Intersection *isect_b = (const Intersection*)b;
+
+ if(isect_a->t < isect_b->t)
+ return -1;
+ else if(isect_a->t > isect_b->t)
+ return 1;
+ else
+ return 0;
+}
+#endif
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
index 193f49074a3..e4cba99dc96 100644
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -200,7 +200,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -226,7 +226,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
@@ -264,7 +264,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
{
- shader = kernel_tex_fetch(__tri_shader, prim);
+ shader = kernel_tex_fetch(__tri_shader, prim);
}
#ifdef __HAIR__
else {
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index 290297ef5c5..a73139f9c88 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -187,7 +187,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -210,7 +210,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
- triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
}
break;
}
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index 0298e687de2..73d79fd78ee 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -76,6 +76,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if defined(__KERNEL_DEBUG__)
isect->num_traversal_steps = 0;
+ isect->num_traversed_instances = 0;
#endif
#if defined(__KERNEL_SSE2__)
@@ -248,7 +249,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -269,7 +270,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
isect->num_traversal_steps++;
#endif
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr)) {
+ if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
if(visibility == PATH_RAY_SHADOW_OPAQUE)
@@ -362,6 +363,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
+
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversed_instances++;
+#endif
}
}
#endif /* FEATURE(BVH_INSTANCING) */
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h
index 0862812a170..41c784869f2 100644
--- a/intern/cycles/kernel/geom/geom_bvh_volume.h
+++ b/intern/cycles/kernel/geom/geom_bvh_volume.h
@@ -188,7 +188,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -213,7 +213,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
- triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
+ triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
}
break;
}
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume_all.h b/intern/cycles/kernel/geom/geom_bvh_volume_all.h
new file mode 100644
index 00000000000..b6db36f4b17
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_bvh_volume_all.h
@@ -0,0 +1,454 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __QBVH__
+#include "geom_qbvh_volume_all.h"
+#endif
+
+/* This is a template BVH traversal function for volumes, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ const uint max_hits)
+{
+ /* todo:
+ * - test if pushing distance on the stack helps (for non shadow rays)
+ * - separate version for shadow rays
+ * - likely and unlikely for if() statements
+ * - test restrict attribute for pointers
+ */
+
+ /* traversal stack in CUDA thread-local memory */
+ int traversalStack[BVH_STACK_SIZE];
+ traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+ /* traversal variables in registers */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* ray parameters in registers */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
+
+ const uint visibility = PATH_RAY_ALL_VISIBILITY;
+
+#if BVH_FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ int num_hits_in_instance = 0;
+#endif
+
+ uint num_hits = 0;
+ isect_array->t = tmax;
+
+#if defined(__KERNEL_SSE2__)
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
+ shuffle_swap_t shufflexyz[3];
+
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
+
+ ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ /* traversal loop */
+ do {
+ do {
+ /* traverse internal nodes */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ bool traverseChild0, traverseChild1;
+ int nodeAddrChild1;
+
+#if !defined(__KERNEL_SSE2__)
+ /* Intersect two child bounding boxes, non-SSE version */
+ float t = isect_array->t;
+
+ /* fetch node data */
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
+
+ /* intersect ray against child nodes */
+ NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+ NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+ /* decide which nodes to traverse next */
+ traverseChild0 = (c0max >= c0min);
+ traverseChild1 = (c1max >= c1min);
+
+#else // __KERNEL_SSE2__
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+
+ /* fetch node data */
+ const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ const float4 cnodes = ((float4*)bvh_nodes)[3];
+
+ /* intersect ray against child nodes */
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+
+ /* decide which nodes to traverse next */
+ traverseChild0 = (movemask(lrhit) & 1);
+ traverseChild1 = (movemask(lrhit) & 2);
+#endif // __KERNEL_SSE2__
+
+ nodeAddr = __float_as_int(cnodes.x);
+ nodeAddrChild1 = __float_as_int(cnodes.y);
+
+ if(traverseChild0 && traverseChild1) {
+ /* both children were intersected, push the farther one */
+#if !defined(__KERNEL_SSE2__)
+ bool closestChild1 = (c1min < c0min);
+#else
+ bool closestChild1 = tminmax[1] < tminmax[0];
+#endif
+
+ if(closestChild1) {
+ int tmp = nodeAddr;
+ nodeAddr = nodeAddrChild1;
+ nodeAddrChild1 = tmp;
+ }
+
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_STACK_SIZE);
+ traversalStack[stackPtr] = nodeAddrChild1;
+ }
+ else {
+ /* one child was intersected */
+ if(traverseChild1) {
+ nodeAddr = nodeAddrChild1;
+ }
+ else if(!traverseChild0) {
+ /* neither child was intersected */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+
+ /* if node is leaf, fetch triangle list */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
+ int primAddr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ const int primAddr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ bool hit;
+
+ /* pop */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+
+ /* primitive intersection */
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ /* intersect ray against primitive */
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+#if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+#else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#if BVH_FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ /* intersect ray against primitive */
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+# if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+# else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ /* intersect ray against primitive */
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ else
+ hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+# if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+# else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#endif
+ default: {
+ break;
+ }
+ }
+ }
+#if BVH_FEATURE(BVH_INSTANCING)
+ else {
+ /* instance push */
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+
+ if(object_flag & SD_OBJECT_HAS_VOLUME) {
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+#endif
+
+ triangle_intersect_precalc(dir, &isect_precalc);
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
+
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_STACK_SIZE);
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* pop */
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ if(num_hits_in_instance) {
+ float t_fac;
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm);
+#else
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ /* Scale isect->t to adjust for instancing. */
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+ }
+ else {
+ float ignore_t = FLT_MAX;
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ }
+
+ isect_t = tmax;
+ isect_array->t = isect_t;
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
+
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+#endif /* FEATURE(BVH_MOTION) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return num_hits;
+}
+
+ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ const uint max_hits)
+{
+#ifdef __QBVH__
+ if(kernel_data.bvh.use_qbvh) {
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
+ ray,
+ isect_array,
+ max_hits);
+ }
+ else
+#endif
+ {
+ kernel_assert(kernel_data.bvh.use_qbvh == false);
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg,
+ ray,
+ isect_array,
+ max_hits);
+ }
+}
+
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index ac6c6ec4929..9653ad8f1bb 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -32,22 +32,22 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd,
if(dy) *dy = 0.0f;
#endif
- return kernel_tex_fetch(__attributes_float, offset + sd->prim);
+ return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim));
}
else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*(f1 - f0);
+ if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
if(dy) *dy = 0.0f;
#endif
- return (1.0f - sd->u)*f0 + sd->u*f1;
+ return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1;
}
else {
#ifdef __RAY_DIFFERENTIALS__
@@ -71,22 +71,22 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim)));
}
else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0));
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1));
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*(f1 - f0);
+ if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
- return (1.0f - sd->u)*f0 + sd->u*f1;
+ return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1;
}
else {
#ifdef __RAY_DIFFERENTIALS__
@@ -104,22 +104,22 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
{
float r = 0.0f;
- if(sd->type & PRIMITIVE_ALL_CURVE) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
+ float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float4 P_curve[2];
- if(sd->type & PRIMITIVE_CURVE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
}
else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve);
}
- r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
+ r = (P_curve[1].w - P_curve[0].w) * ccl_fetch(sd, u) + P_curve[0].w;
}
return r*2.0f;
@@ -130,8 +130,8 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd)
{
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float4 P_curve[2];
@@ -139,7 +139,7 @@ ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
- return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
+ return float4_to_float3(P_curve[1]) * ccl_fetch(sd, u) + float4_to_float3(P_curve[0]) * (1.0f - ccl_fetch(sd, u));
}
/* Curve tangent normal */
@@ -148,14 +148,14 @@ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd)
{
float3 tgN = make_float3(0.0f,0.0f,0.0f);
- if(sd->type & PRIMITIVE_ALL_CURVE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
- tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu)));
+ tgN = -(-ccl_fetch(sd, I) - ccl_fetch(sd, dPdu) * (dot(ccl_fetch(sd, dPdu),-ccl_fetch(sd, I)) / len_squared(ccl_fetch(sd, dPdu))));
tgN = normalize(tgN);
/* need to find suitable scaled gd for corrected normal */
#if 0
- tgN = normalize(tgN - gd * sd->dPdu);
+ tgN = normalize(tgN - gd * ccl_fetch(sd, dPdu));
#endif
}
@@ -442,12 +442,12 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float r_ext = mw_extension + r_curr;
float coverage = 1.0f;
- if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) {
+ if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) {
/* the bounding box does not overlap the square centered at O */
tree += level;
level = tree & -tree;
}
- else if (level == 1) {
+ else if(level == 1) {
/* the maximum recursion depth is reached.
* check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
@@ -459,13 +459,13 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
if(flags & CURVE_KN_RIBBONS) {
float3 tg = (p_en - p_st);
float w = tg.x * tg.x + tg.y * tg.y;
- if (w == 0) {
+ if(w == 0) {
tree++;
level = tree & -tree;
continue;
}
w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
- w = clamp((float)w, 0.0f, 1.0f);
+ w = saturate(w);
/* compute u on the curve segment */
u = i_st * (1 - w) + i_en * w;
@@ -474,17 +474,17 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0];
float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st)< 0)
+ if(dot(tg, dp_st)< 0)
dp_st *= -1;
- if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
+ if(dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
tree++;
level = tree & -tree;
continue;
}
float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
+ if(dot(tg, dp_en) < 0)
dp_en *= -1;
- if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
+ if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
tree++;
level = tree & -tree;
continue;
@@ -500,13 +500,13 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float d0 = d - r_curr;
float d1 = d + r_curr;
float inv_mw_extension = 1.0f/mw_extension;
- if (d0 >= 0)
+ if(d0 >= 0)
coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f;
else // inside
coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f;
}
- if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
+ if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
tree++;
level = tree & -tree;
continue;
@@ -548,7 +548,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1)));
float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd;
float td = tb*tb - 4*cyla*tc;
- if (td < 0.0f) {
+ if(td < 0.0f) {
tree++;
level = tree & -tree;
continue;
@@ -559,10 +559,10 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
t = tcentre + correction;
float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st)< 0)
+ if(dot(tg, dp_st)< 0)
dp_st *= -1;
float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
+ if(dot(tg, dp_en) < 0)
dp_en *= -1;
if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) {
@@ -570,14 +570,14 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
t = tcentre + correction;
}
- if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) {
+ if(dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) {
tree++;
level = tree & -tree;
continue;
}
float w = (zcentre + (tg.z * correction)) * invl;
- w = clamp((float)w, 0.0f, 1.0f);
+ w = saturate(w);
/* compute u on the curve segment */
u = i_st * (1 - w) + i_en * w;
@@ -777,7 +777,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma;
float td = tb*tb - 4*a*tc;
- if (td < 0.0f)
+ if(td < 0.0f)
return false;
float rootd = 0.0f;
@@ -818,7 +818,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
if(t > 0.0f && t < isect->t && z >= 0 && z <= l) {
- if (flags & CURVE_KN_ENCLOSEFILTER) {
+ if(flags & CURVE_KN_ENCLOSEFILTER) {
float enc_ratio = 1.01f;
if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) {
float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio));
@@ -890,7 +890,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -903,7 +903,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
int prim = kernel_tex_fetch(__prim_index, isect->prim);
float4 v00 = kernel_tex_fetch(__curves, prim);
- int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float3 tg;
@@ -914,14 +914,14 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
float4 P_curve[4];
- if(sd->type & PRIMITIVE_CURVE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
}
else {
- motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
+ motion_cardinal_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), ka, k0, k1, kb, P_curve);
}
float3 p[4];
@@ -933,43 +933,43 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
P = P + D*t;
#ifdef __UV__
- sd->u = isect->u;
- sd->v = 0.0f;
+ ccl_fetch(sd, u) = isect->u;
+ ccl_fetch(sd, v) = 0.0f;
#endif
tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
- sd->Ng = normalize(-(D - tg * (dot(tg, D))));
+ ccl_fetch(sd, Ng) = normalize(-(D - tg * (dot(tg, D))));
}
else {
/* direction from inside to surface of curve */
float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
- sd->Ng = normalize(P - p_curr);
+ ccl_fetch(sd, Ng) = normalize(P - p_curr);
/* adjustment for changing radius */
float gd = isect->v;
if(gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- sd->Ng = normalize(sd->Ng);
+ ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg;
+ ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
}
}
/* todo: sometimes the normal is still so that this is detected as
* backfacing even if cull backfaces is enabled */
- sd->N = sd->Ng;
+ ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
}
else {
float4 P_curve[2];
- if(sd->type & PRIMITIVE_CURVE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
}
else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve);
}
float l = 1.0f;
@@ -980,39 +980,39 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
float3 dif = P - float4_to_float3(P_curve[0]);
#ifdef __UV__
- sd->u = dot(dif,tg)/l;
- sd->v = 0.0f;
+ ccl_fetch(sd, u) = dot(dif,tg)/l;
+ ccl_fetch(sd, v) = 0.0f;
#endif
- if (flag & CURVE_KN_TRUETANGENTGNORMAL) {
- sd->Ng = -(D - tg * dot(tg, D));
- sd->Ng = normalize(sd->Ng);
+ if(flag & CURVE_KN_TRUETANGENTGNORMAL) {
+ ccl_fetch(sd, Ng) = -(D - tg * dot(tg, D));
+ ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
}
else {
float gd = isect->v;
/* direction from inside to surface of curve */
- sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd);
+ ccl_fetch(sd, Ng) = (dif - tg * ccl_fetch(sd, u) * l) / (P_curve[0].w + ccl_fetch(sd, u) * l * gd);
/* adjustment for changing radius */
- if (gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- sd->Ng = normalize(sd->Ng);
+ if(gd != 0.0f) {
+ ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg;
+ ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
}
}
- sd->N = sd->Ng;
+ ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
}
#ifdef __DPDU__
/* dPdu/dPdv */
- sd->dPdu = tg;
- sd->dPdv = cross(tg, sd->Ng);
+ ccl_fetch(sd, dPdu) = tg;
+ ccl_fetch(sd, dPdv) = cross(tg, ccl_fetch(sd, Ng));
#endif
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index d3297e05c67..86f93f242a1 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -134,7 +134,7 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s
return P;
}
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -161,7 +161,7 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
@@ -187,7 +187,7 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh
#ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -213,7 +213,7 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
@@ -236,25 +236,25 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh
ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface)
{
/* get shader */
- sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+ ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
/* get motion info */
int numsteps, numverts;
- object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
+ object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL);
/* figure out which steps we need to fetch and their interpolation factor */
int maxstep = numsteps*2;
- int step = min((int)(sd->time*maxstep), maxstep-1);
- float t = sd->time*maxstep - step;
+ int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1);
+ float t = ccl_fetch(sd, time)*maxstep - step;
/* find attribute */
AttributeElement elem;
- int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_POSITION, &elem);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch vertex coordinates */
float3 verts[3], next_verts[3];
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)));
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
@@ -268,33 +268,33 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
#ifdef __SUBSURFACE__
if(!subsurface)
#endif
- sd->P = motion_triangle_refine(kg, sd, isect, ray, verts);
+ ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts);
#ifdef __SUBSURFACE__
else
- sd->P = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts);
+ ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts);
#endif
/* compute face normal */
float3 Ng;
- if(sd->flag & SD_NEGATIVE_SCALE_APPLIED)
+ if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
else
Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
- sd->Ng = Ng;
- sd->N = Ng;
+ ccl_fetch(sd, Ng) = Ng;
+ ccl_fetch(sd, N) = Ng;
/* compute derivatives of P w.r.t. uv */
#ifdef __DPDU__
- sd->dPdu = (verts[0] - verts[2]);
- sd->dPdv = (verts[1] - verts[2]);
+ ccl_fetch(sd, dPdu) = (verts[0] - verts[2]);
+ ccl_fetch(sd, dPdv) = (verts[1] - verts[2]);
#endif
/* compute smooth normal */
- if(sd->shader & SHADER_SMOOTH_NORMAL) {
+ if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
/* find attribute */
AttributeElement elem;
- int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
+ int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch vertex coordinates */
@@ -308,10 +308,10 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
/* interpolate between vertices */
- float u = sd->u;
- float v = sd->v;
+ float u = ccl_fetch(sd, u);
+ float v = ccl_fetch(sd, v);
float w = 1.0f - u - v;
- sd->N = (u*normals[0] + v*normals[1] + w*normals[2]);
+ ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]);
}
}
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 79a56683454..9d0a008fff1 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -123,9 +123,9 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg
ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
{
#ifdef __OBJECT_MOTION__
- *P = transform_point(&sd->ob_tfm, *P);
+ *P = transform_point_auto(&ccl_fetch(sd, ob_tfm), *P);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
*P = transform_point(&tfm, *P);
#endif
}
@@ -135,9 +135,9 @@ ccl_device_inline void object_position_transform(KernelGlobals *kg, const Shader
ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
{
#ifdef __OBJECT_MOTION__
- *P = transform_point(&sd->ob_itfm, *P);
+ *P = transform_point_auto(&ccl_fetch(sd, ob_itfm), *P);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
*P = transform_point(&tfm, *P);
#endif
}
@@ -147,9 +147,9 @@ ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, cons
ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
{
#ifdef __OBJECT_MOTION__
- *N = normalize(transform_direction_transposed(&sd->ob_tfm, *N));
+ *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_tfm), *N));
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
*N = normalize(transform_direction_transposed(&tfm, *N));
#endif
}
@@ -159,9 +159,9 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const
ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
{
#ifdef __OBJECT_MOTION__
- *N = normalize(transform_direction_transposed(&sd->ob_itfm, *N));
+ *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_itfm), *N));
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
*N = normalize(transform_direction_transposed(&tfm, *N));
#endif
}
@@ -171,9 +171,9 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderDa
ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
{
#ifdef __OBJECT_MOTION__
- *D = transform_direction(&sd->ob_tfm, *D);
+ *D = transform_direction_auto(&ccl_fetch(sd, ob_tfm), *D);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
*D = transform_direction(&tfm, *D);
#endif
}
@@ -183,9 +183,9 @@ ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData
ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
{
#ifdef __OBJECT_MOTION__
- *D = transform_direction(&sd->ob_itfm, *D);
+ *D = transform_direction_auto(&ccl_fetch(sd, ob_itfm), *D);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
*D = transform_direction(&tfm, *D);
#endif
}
@@ -194,13 +194,13 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const Sha
ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd)
{
- if(sd->object == OBJECT_NONE)
+ if(ccl_fetch(sd, object) == OBJECT_NONE)
return make_float3(0.0f, 0.0f, 0.0f);
#ifdef __OBJECT_MOTION__
- return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w);
+ return make_float3(ccl_fetch(sd, ob_tfm).x.w, ccl_fetch(sd, ob_tfm).y.w, ccl_fetch(sd, ob_tfm).z.w);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
#endif
}
@@ -243,7 +243,7 @@ ccl_device_inline float object_random_number(KernelGlobals *kg, int object)
ccl_device_inline int object_particle_id(KernelGlobals *kg, int object)
{
if(object == OBJECT_NONE)
- return 0.0f;
+ return 0;
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
float4 f = kernel_tex_fetch(__objects, offset);
@@ -296,7 +296,7 @@ ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *nu
ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
{
- return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2 + 1);
+ return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2 + 1);
}
/* Particle data from which object was instanced */
@@ -377,7 +377,7 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
/* Transform ray into object space to enter static object in BVH */
-ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t)
+ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
{
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
@@ -425,7 +425,7 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
/* Transorm ray to exit static object in BVH */
-ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t)
+ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
{
if(*t != FLT_MAX) {
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
@@ -453,7 +453,7 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, co
#ifdef __OBJECT_MOTION__
/* Transform ray into object space to enter motion blurred object in BVH */
-ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm)
+ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm)
{
Transform itfm;
*tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
@@ -497,7 +497,7 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, int object,
/* Transorm ray to exit motion blurred object in BVH */
-ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm)
+ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm)
{
if(*t != FLT_MAX)
*t *= len(transform_direction(tfm, 1.0f/(*idir)));
@@ -520,5 +520,38 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, int obj
#endif
+/* TODO(sergey): This is only for until we've got OpenCL 2.0
+ * on all devices we consider supported. It'll be replaced with
+ * generic address space.
+ */
+
+#ifdef __KERNEL_OPENCL__
+ccl_device_inline void object_dir_transform_addrspace(KernelGlobals *kg,
+ const ShaderData *sd,
+ ccl_addr_space float3 *D)
+{
+ float3 private_D = *D;
+ object_dir_transform(kg, sd, &private_D);
+ *D = private_D;
+}
+
+ccl_device_inline void object_normal_transform_addrspace(KernelGlobals *kg,
+ const ShaderData *sd,
+ ccl_addr_space float3 *N)
+{
+ float3 private_N = *N;
+ object_normal_transform(kg, sd, &private_N);
+ *N = private_N;
+}
+#endif
+
+#ifndef __KERNEL_OPENCL__
+# define object_dir_transform_auto object_dir_transform
+# define object_normal_transform_auto object_normal_transform
+#else
+# define object_dir_transform_auto object_dir_transform_addrspace
+# define object_normal_transform_auto object_normal_transform_addrspace
+#endif
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index b52ec7ef1b2..30f12d32355 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -25,16 +25,16 @@ CCL_NAMESPACE_BEGIN
ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
return triangle_attribute_float(kg, sd, elem, offset, dx, dy);
}
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
+ else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float(kg, sd, elem, offset, dx, dy);
}
#endif
#ifdef __VOLUME__
- else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
+ else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float(kg, sd, elem, offset, dx, dy);
}
#endif
@@ -47,16 +47,16 @@ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *
ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
return triangle_attribute_float3(kg, sd, elem, offset, dx, dy);
}
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
+ else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float3(kg, sd, elem, offset, dx, dy);
}
#endif
#ifdef __VOLUME__
- else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
+ else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float3(kg, sd, elem, offset, dx, dy);
}
#endif
@@ -108,9 +108,9 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in
ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
{
#ifdef __HAIR__
- if(sd->type & PRIMITIVE_ALL_CURVE)
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)
#ifdef __DPDU__
- return normalize(sd->dPdu);
+ return normalize(ccl_fetch(sd, dPdu));
#else
return make_float3(0.0f, 0.0f, 0.0f);
#endif
@@ -124,12 +124,12 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
float3 data = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL);
data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
object_normal_transform(kg, sd, &data);
- return cross(sd->N, normalize(cross(data, sd->N)));
+ return cross(ccl_fetch(sd, N), normalize(cross(data, ccl_fetch(sd, N))));
}
else {
/* otherwise use surface derivatives */
#ifdef __DPDU__
- return normalize(sd->dPdu);
+ return normalize(ccl_fetch(sd, dPdu));
#else
return make_float3(0.0f, 0.0f, 0.0f);
#endif
@@ -144,16 +144,16 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
float3 center;
#ifdef __HAIR__
- bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE;
+ bool is_curve_primitive = ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE;
if(is_curve_primitive) {
center = curve_motion_center_location(kg, sd);
- if(!(sd->flag & SD_TRANSFORM_APPLIED))
+ if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED))
object_position_transform(kg, sd, &center);
}
else
#endif
- center = sd->P;
+ center = ccl_fetch(sd, P);
float3 motion_pre = center, motion_post = center;
@@ -164,16 +164,16 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
if(offset != ATTR_STD_NOT_FOUND) {
/* get motion info */
int numverts, numkeys;
- object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
+ object_motion_info(kg, ccl_fetch(sd, object), NULL, &numverts, &numkeys);
/* lookup attributes */
- int offset_next = (sd->type & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys;
+ int offset_next = (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys;
motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL);
motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL);
#ifdef __HAIR__
- if(is_curve_primitive && (sd->flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
+ if(is_curve_primitive && (ccl_fetch(sd, flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
object_position_transform(kg, sd, &motion_pre);
object_position_transform(kg, sd, &motion_post);
}
@@ -184,17 +184,17 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
* transformation was set match the world/object space of motion_pre/post */
Transform tfm;
- tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_PRE);
+ tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_PRE);
motion_pre = transform_point(&tfm, motion_pre);
- tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_POST);
+ tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_POST);
motion_post = transform_point(&tfm, motion_post);
float3 motion_center;
/* camera motion, for perspective/orthographic motion.pre/post will be a
* world-to-raster matrix, for panorama it's world-to-camera */
- if (kernel_data.cam.type != CAMERA_PANORAMA) {
+ if(kernel_data.cam.type != CAMERA_PANORAMA) {
tfm = kernel_data.cam.worldtoraster;
motion_center = transform_perspective(&tfm, center);
diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
index 4233ff15c86..f79b2ed9f34 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
@@ -155,11 +155,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = c1;
+ traversalStack[stackPtr].dist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = c0;
+ traversalStack[stackPtr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -206,7 +206,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
/* Pop. */
@@ -241,7 +241,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
@@ -279,7 +279,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
{
- shader = kernel_tex_fetch(__tri_shader, prim);
+ shader = kernel_tex_fetch(__tri_shader, prim);
}
#ifdef __HAIR__
else {
diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
index 62598115fa3..d85e1a4691e 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
@@ -202,7 +202,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -226,7 +226,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(tri_object != subsurface_object) {
continue;
}
- triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
}
break;
}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
index 99d2fb20837..7e356ea062b 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
@@ -80,6 +80,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if defined(__KERNEL_DEBUG__)
isect->num_traversal_steps = 0;
+ isect->num_traversed_instances = 0;
#endif
ssef tnear(0.0f), tfar(ray->t);
@@ -185,6 +186,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(traverseChild == 0) {
if(d1 < d0) {
nodeAddr = c1;
+ nodeDist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
@@ -193,6 +195,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
else {
nodeAddr = c0;
+ nodeDist = d0;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
@@ -260,7 +263,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
#ifdef __VISIBILITY_FLAG__
if(UNLIKELY((nodeDist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
@@ -296,7 +299,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect->num_traversal_steps++;
#endif
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr)) {
+ if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
if(visibility == PATH_RAY_SHADOW_OPAQUE)
@@ -377,6 +380,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
traversalStack[stackPtr].dist = -FLT_MAX;
nodeAddr = kernel_tex_fetch(__object_node, object);
+
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversed_instances++;
+#endif
}
}
#endif /* FEATURE(BVH_INSTANCING) */
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h
index 2c396e99fc4..d8cfa3a4061 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_volume.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_volume.h
@@ -95,10 +95,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
/* Traverse internal nodes. */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
-#if defined(__KERNEL_DEBUG__)
- isect->num_traversal_steps++;
-#endif
-
ssef dist;
int traverseChild = qbvh_node_intersect(kg,
tnear,
@@ -208,7 +204,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -234,7 +230,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
continue;
}
/* Intersect ray against primitive. */
- triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
+ triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
}
break;
}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
new file mode 100644
index 00000000000..d5131919944
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
@@ -0,0 +1,446 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function for volumes, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ const uint max_hits)
+{
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
+ traversalStack[0].addr = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
+
+ const uint visibility = PATH_RAY_ALL_VISIBILITY;
+
+#if BVH_FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+#ifndef __KERNEL_SSE41__
+ if(!isfinite(P.x)) {
+ return false;
+ }
+#endif
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ int num_hits_in_instance = 0;
+#endif
+
+ uint num_hits = 0;
+ isect_array->t = tmax;
+
+ ssef tnear(0.0f), tfar(isect_t);
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+
+#ifdef __KERNEL_AVX2__
+ float3 P_idir = P*idir;
+ sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ ssef dist;
+ int traverseChild = qbvh_node_intersect(kg,
+ tnear,
+ tfar,
+#ifdef __KERNEL_AVX2__
+ P_idir4,
+#else
+ org,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ nodeAddr,
+ &dist);
+
+ if(traverseChild != 0) {
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(traverseChild);
+ if(traverseChild == 0) {
+ nodeAddr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float*)&dist)[r];
+ r = __bscf(traverseChild);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ if(d1 < d0) {
+ nodeAddr = c1;
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
+ continue;
+ }
+ else {
+ nodeAddr = c0;
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2]);
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float*)&dist)[r];
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c3;
+ traversalStack[stackPtr].dist = d3;
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &traversalStack[stackPtr - 3]);
+ }
+
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
+ int primAddr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ int primAddr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ const uint p_type = type & PRIMITIVE_ALL;
+ bool hit;
+
+ /* Pop. */
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+
+ /* Primitive intersection. */
+ switch(p_type) {
+ case PRIMITIVE_TRIANGLE: {
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+#if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+#else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#if BVH_FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+# if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+# else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ else
+ hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+# if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+# else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#endif
+ }
+ }
+#if BVH_FEATURE(BVH_INSTANCING)
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+
+ if(object_flag & SD_OBJECT_HAS_VOLUME) {
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+#endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect_t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
+
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* Pop. */
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ /* Instance pop. */
+ if(num_hits_in_instance) {
+ float t_fac;
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm);
+#else
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ /* Scale isect->t to adjust for instancing. */
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+ }
+ else {
+ float ignore_t = FLT_MAX;
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ }
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect_t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ isect_t = tmax;
+ isect_array->t = isect_t;
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return num_hits;
+}
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index dd3928682e3..995dfac5b09 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -27,14 +27,14 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd)
{
/* load triangle vertices */
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* return normal */
- if(sd->flag & SD_NEGATIVE_SCALE_APPLIED)
+ if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
return normalize(cross(v2 - v0, v1 - v0));
else
return normalize(cross(v1 - v0, v2 - v0));
@@ -94,7 +94,7 @@ ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, flo
/* Ray differentials on triangle */
-ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, float3 *dPdu, float3 *dPdv)
+ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv)
{
/* fetch triangle vertex coordinates */
float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -116,34 +116,34 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
- return kernel_tex_fetch(__attributes_float, offset + sd->prim);
+ return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim));
}
else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z));
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
+ if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
else if(elem == ATTR_ELEMENT_CORNER) {
- int tri = offset + sd->prim*3;
+ int tri = offset + ccl_fetch(sd, prim)*3;
float f0 = kernel_tex_fetch(__attributes_float, tri + 0);
float f1 = kernel_tex_fetch(__attributes_float, tri + 1);
float f2 = kernel_tex_fetch(__attributes_float, tri + 2);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
+ if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
else {
if(dx) *dx = 0.0f;
@@ -159,24 +159,24 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim)));
}
else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
+ if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) {
- int tri = offset + sd->prim*3;
+ int tri = offset + ccl_fetch(sd, prim)*3;
float3 f0, f1, f2;
if(elem == ATTR_ELEMENT_CORNER) {
@@ -191,11 +191,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData
}
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
+ if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
else {
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index c9e30a451da..3ef918dc842 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-/* Triangle/Ray intersections .
+/* Triangle/Ray intersections.
*
* For BVH ray intersection we use a precomputed triangle storage to accelerate
* intersection at the cost of more memory usage.
@@ -49,18 +49,27 @@ typedef struct IsectPrecalc {
float Sx, Sy, Sz;
} IsectPrecalc;
-/* Workaround for CUDA toolkit 6.5.16. */
-#if defined(__KERNEL_CPU__) || !defined(__KERNEL_CUDA_EXPERIMENTAL__) || __CUDA_ARCH__ < 500
+#if defined(__KERNEL_CUDA__)
# if (defined(i386) || defined(_M_IX86))
+# if __CUDA_ARCH__ > 500
ccl_device_noinline
-# else
+# else /* __CUDA_ARCH__ > 500 */
ccl_device_inline
-# endif
-#else
+# endif /* __CUDA_ARCH__ > 500 */
+# else /* (defined(i386) || defined(_M_IX86)) */
+# if defined(__KERNEL_EXPERIMENTAL__) && (__CUDA_ARCH__ >= 500)
ccl_device_noinline
-#endif
+# else
+ccl_device_inline
+# endif
+# endif /* (defined(i386) || defined(_M_IX86)) */
+#elif defined(__KERNEL_OPENCL_APPLE__)
+ccl_device_noinline
+#else /* defined(__KERNEL_OPENCL_APPLE__) */
+ccl_device_inline
+#endif /* defined(__KERNEL_OPENCL_APPLE__) */
void triangle_intersect_precalc(float3 dir,
- IsectPrecalc *isect_precalc)
+ IsectPrecalc *isect_precalc)
{
/* Calculate dimension where the ray direction is maximal. */
int kz = util_max_axis(make_float3(fabsf(dir.x),
@@ -77,10 +86,10 @@ void triangle_intersect_precalc(float3 dir,
}
/* Calculate the shear constants. */
- float inf_dir_z = 1.0f / IDX(dir, kz);
- isect_precalc->Sx = IDX(dir, kx) * inf_dir_z;
- isect_precalc->Sy = IDX(dir, ky) * inf_dir_z;
- isect_precalc->Sz = inf_dir_z;
+ float inv_dir_z = 1.0f / IDX(dir, kz);
+ isect_precalc->Sx = IDX(dir, kx) * inv_dir_z;
+ isect_precalc->Sy = IDX(dir, ky) * inv_dir_z;
+ isect_precalc->Sz = inv_dir_z;
/* Store the dimensions. */
isect_precalc->kx = kx;
@@ -98,7 +107,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const IsectPrecalc *isect_precalc,
Intersection *isect,
float3 P,
- float3 dir,
uint visibility,
int object,
int triAddr)
@@ -111,14 +119,12 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
- const float3 A = tri[0] - P;
- const float3 B = tri[1] - P;
- const float3 C = tri[2] - P;
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+ const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+ const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -155,8 +161,8 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
*/
const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
const float sign_T = xor_signmast(T, sign_mask);
- if ((sign_T < 0.0f) ||
- (sign_T > isect->t * xor_signmast(det, sign_mask)))
+ if((sign_T < 0.0f) ||
+ (sign_T > isect->t * xor_signmast(det, sign_mask)))
{
return false;
}
@@ -191,7 +197,6 @@ ccl_device_inline void triangle_intersect_subsurface(
const IsectPrecalc *isect_precalc,
Intersection *isect_array,
float3 P,
- float3 dir,
int object,
int triAddr,
float tmax,
@@ -207,14 +212,12 @@ ccl_device_inline void triangle_intersect_subsurface(
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
- const float3 A = tri[0] - P;
- const float3 B = tri[1] - P;
- const float3 C = tri[2] - P;
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+ const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+ const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -249,13 +252,10 @@ ccl_device_inline void triangle_intersect_subsurface(
/* Calculate scaled z−coordinates of vertices and use them to calculate
* the hit distance.
*/
- const float Az = Sz * A_kz;
- const float Bz = Sz * B_kz;
- const float Cz = Sz * C_kz;
- const float T = U * Az + V * Bz + W * Cz;
-
- if ((xor_signmast(T, sign_mask) < 0.0f) ||
- (xor_signmast(T, sign_mask) > tmax * xor_signmast(det, sign_mask)))
+ const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
+ const float sign_T = xor_signmast(T, sign_mask);
+ if((sign_T < 0.0f) ||
+ (sign_T > tmax * xor_signmast(det, sign_mask)))
{
return;
}
@@ -315,7 +315,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
return P;
}
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -327,14 +327,12 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
P = P + D*t;
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
- float3 edge1 = tri[0] - tri[2];
- float3 edge2 = tri[1] - tri[2];
- float3 tvec = P - tri[2];
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
float3 qvec = cross(tvec, edge1);
float3 pvec = cross(D, edge2);
float rt = dot(edge2, qvec) / dot(edge1, pvec);
@@ -343,7 +341,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
@@ -372,7 +370,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
#ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg,
isect->object,
@@ -386,14 +384,12 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
P = P + D*t;
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
- float3 edge1 = tri[0] - tri[2];
- float3 edge2 = tri[1] - tri[2];
- float3 tvec = P - tri[2];
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
float3 qvec = cross(tvec, edge1);
float3 pvec = cross(D, edge2);
float rt = dot(edge2, qvec) / dot(edge1, pvec);
@@ -402,7 +398,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg,
isect->object,
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index c33509fbf4f..c72afa2a3a4 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -60,7 +60,7 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
#endif
if(dx) *dx = 0.0f;
- if(dx) *dy = 0.0f;
+ if(dy) *dy = 0.0f;
/* todo: support float textures to lower memory usage for single floats */
return average(float4_to_float3(r));