diff options
Diffstat (limited to 'intern/cycles/kernel/geom')
20 files changed, 1271 insertions, 253 deletions
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h index bf0d86e6206..5ab900d47aa 100644 --- a/intern/cycles/kernel/geom/geom.h +++ b/intern/cycles/kernel/geom/geom.h @@ -22,7 +22,9 @@ #define BVH_STACK_SIZE 192 #define BVH_QSTACK_SIZE 384 #define BVH_NODE_SIZE 4 +#define BVH_NODE_LEAF_SIZE 1 #define BVH_QNODE_SIZE 7 +#define BVH_QNODE_LEAF_SIZE 1 #define TRI_NODE_SIZE 3 /* silly workaround for float extended precision that happens when compiling diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h index 9ac16e86085..c7364e9edac 100644 --- a/intern/cycles/kernel/geom/geom_attribute.h +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -29,13 +29,13 @@ CCL_NAMESPACE_BEGIN ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem) { - if(sd->object == PRIM_NONE) + if(ccl_fetch(sd, object) == PRIM_NONE) return (int)ATTR_STD_NOT_FOUND; /* for SVM, find attribute by unique id */ - uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride; + uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride; #ifdef __HAIR__ - attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; + attr_offset = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; #endif uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); @@ -49,7 +49,7 @@ ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, ui *elem = (AttributeElement)attr_map.y; - if(sd->prim == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH) + if(ccl_fetch(sd, prim) == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH) return ATTR_STD_NOT_FOUND; /* return result */ diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h index c0eefcd9c7f..3d0d406dd0b 100644 --- a/intern/cycles/kernel/geom/geom_bvh.h +++ b/intern/cycles/kernel/geom/geom_bvh.h @@ -115,7 +115,39 @@ CCL_NAMESPACE_BEGIN #include "geom_bvh_subsurface.h" #endif -/* Record all BVH intersection for shadows */ +/* Volume BVH traversal */ + +#if defined(__VOLUME__) +#define BVH_FUNCTION_NAME bvh_intersect_volume +#define BVH_FUNCTION_FEATURES 0 +#include "geom_bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__INSTANCING__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_instancing +#define BVH_FUNCTION_FEATURES BVH_INSTANCING +#include "geom_bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__HAIR__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_hair +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH +#include "geom_bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +#include "geom_bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION +#include "geom_bvh_volume.h" +#endif + +/* Record all intersections - Shadow BVH traversal */ #if defined(__SHADOW_RECORD_ALL__) #define BVH_FUNCTION_NAME bvh_intersect_shadow_all @@ -147,36 +179,36 @@ CCL_NAMESPACE_BEGIN #include "geom_bvh_shadow.h" #endif -/* Camera inside Volume BVH intersection */ +/* Record all intersections - Volume BVH traversal */ -#if defined(__VOLUME__) -#define BVH_FUNCTION_NAME bvh_intersect_volume +#if defined(__VOLUME_RECORD_ALL__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all #define BVH_FUNCTION_FEATURES 0 -#include "geom_bvh_volume.h" +#include "geom_bvh_volume_all.h" #endif -#if defined(__VOLUME__) && defined(__INSTANCING__) -#define BVH_FUNCTION_NAME bvh_intersect_volume_instancing +#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing #define BVH_FUNCTION_FEATURES BVH_INSTANCING -#include "geom_bvh_volume.h" +#include "geom_bvh_volume_all.h" #endif -#if defined(__VOLUME__) && defined(__HAIR__) -#define BVH_FUNCTION_NAME bvh_intersect_volume_hair +#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair #define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH -#include "geom_bvh_volume.h" +#include "geom_bvh_volume_all.h" #endif -#if defined(__VOLUME__) && defined(__OBJECT_MOTION__) -#define BVH_FUNCTION_NAME bvh_intersect_volume_motion +#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion #define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -#include "geom_bvh_volume.h" +#include "geom_bvh_volume_all.h" #endif -#if defined(__VOLUME__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) -#define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion +#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair_motion #define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION -#include "geom_bvh_volume.h" +#include "geom_bvh_volume_all.h" #endif #undef BVH_FEATURE @@ -330,6 +362,37 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, } #endif +#ifdef __VOLUME_RECORD_ALL__ +ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint max_hits) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_volume_all_hair_motion(kg, ray, isect, max_hits); +#endif /* __HAIR__ */ + + return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits); + } +#endif /* __OBJECT_MOTION__ */ + +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_volume_all_hair(kg, ray, isect, max_hits); +#endif /* __HAIR__ */ + +#ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits); +#endif /* __INSTANCING__ */ + + return bvh_intersect_volume_all(kg, ray, isect, max_hits); +} +#endif + /* Ray offset to avoid self intersection. * @@ -384,5 +447,21 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng) #endif } +#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__) +/* ToDo: Move to another file? */ +ccl_device int intersections_compare(const void *a, const void *b) +{ + const Intersection *isect_a = (const Intersection*)a; + const Intersection *isect_b = (const Intersection*)b; + + if(isect_a->t < isect_b->t) + return -1; + else if(isect_a->t > isect_b->t) + return 1; + else + return 0; +} +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h index 193f49074a3..e4cba99dc96 100644 --- a/intern/cycles/kernel/geom/geom_bvh_shadow.h +++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h @@ -200,7 +200,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, /* if node is leaf, fetch triangle list */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) @@ -226,7 +226,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, switch(p_type) { case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr); + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); break; } #if BVH_FEATURE(BVH_MOTION) @@ -264,7 +264,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) #endif { - shader = kernel_tex_fetch(__tri_shader, prim); + shader = kernel_tex_fetch(__tri_shader, prim); } #ifdef __HAIR__ else { diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h index 290297ef5c5..a73139f9c88 100644 --- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h @@ -187,7 +187,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, /* if node is leaf, fetch triangle list */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) @@ -210,7 +210,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; if(tri_object != subsurface_object) continue; - triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); + triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); } break; } diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h index 0298e687de2..73d79fd78ee 100644 --- a/intern/cycles/kernel/geom/geom_bvh_traversal.h +++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h @@ -76,6 +76,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, #if defined(__KERNEL_DEBUG__) isect->num_traversal_steps = 0; + isect->num_traversed_instances = 0; #endif #if defined(__KERNEL_SSE2__) @@ -248,7 +249,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, /* if node is leaf, fetch triangle list */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) @@ -269,7 +270,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, isect->num_traversal_steps++; #endif kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr)) { + if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { /* shadow ray early termination */ #if defined(__KERNEL_SSE2__) if(visibility == PATH_RAY_SHADOW_OPAQUE) @@ -362,6 +363,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; nodeAddr = kernel_tex_fetch(__object_node, object); + +#if defined(__KERNEL_DEBUG__) + isect->num_traversed_instances++; +#endif } } #endif /* FEATURE(BVH_INSTANCING) */ diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h index 0862812a170..41c784869f2 100644 --- a/intern/cycles/kernel/geom/geom_bvh_volume.h +++ b/intern/cycles/kernel/geom/geom_bvh_volume.h @@ -188,7 +188,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, /* if node is leaf, fetch triangle list */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) @@ -213,7 +213,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } - triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr); + triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); } break; } diff --git a/intern/cycles/kernel/geom/geom_bvh_volume_all.h b/intern/cycles/kernel/geom/geom_bvh_volume_all.h new file mode 100644 index 00000000000..b6db36f4b17 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_bvh_volume_all.h @@ -0,0 +1,454 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __QBVH__ +#include "geom_qbvh_volume_all.h" +#endif + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits) +{ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + + const uint visibility = PATH_RAY_ALL_VISIBILITY; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + uint num_hits = 0; + isect_array->t = tmax; + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + bool traverseChild0, traverseChild1; + int nodeAddrChild1; + +#if !defined(__KERNEL_SSE2__) + /* Intersect two child bounding boxes, non-SSE version */ + float t = isect_array->t; + + /* fetch node data */ + float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); + float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); + float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); + + /* intersect ray against child nodes */ + NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); + NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); + + NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); + NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); + + /* decide which nodes to traverse next */ + traverseChild0 = (c0max >= c0min); + traverseChild1 = (c1max >= c1min); + +#else // __KERNEL_SSE2__ + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + + /* fetch node data */ + const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; + const float4 cnodes = ((float4*)bvh_nodes)[3]; + + /* intersect ray against child nodes */ + const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; + const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; + const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; + + /* calculate { c0min, c1min, -c0max, -c1max} */ + ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); + const ssef tminmax = minmax ^ pn; + + const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); + + /* decide which nodes to traverse next */ + traverseChild0 = (movemask(lrhit) & 1); + traverseChild1 = (movemask(lrhit) & 2); +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.x); + nodeAddrChild1 = __float_as_int(cnodes.y); + + if(traverseChild0 && traverseChild1) { + /* both children were intersected, push the farther one */ +#if !defined(__KERNEL_SSE2__) + bool closestChild1 = (c1min < c0min); +#else + bool closestChild1 = tminmax[1] < tminmax[0]; +#endif + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* one child was intersected */ + if(traverseChild1) { + nodeAddr = nodeAddrChild1; + } + else if(!traverseChild0) { + /* neither child was intersected */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + bool hit; + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +#else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif + default: { + break; + } + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + triangle_intersect_precalc(dir, &isect_precalc); + num_hits_in_instance = 0; + isect_array->t = isect_t; + +#if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + if(num_hits_in_instance) { + float t_fac; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm); +#else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + /* Scale isect->t to adjust for instancing. */ + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } + } + else { + float ignore_t = FLT_MAX; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + } + + isect_t = tmax; + isect_array->t = isect_t; + +#if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif /* FEATURE(BVH_MOTION) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} + +ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect_array, + max_hits); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect_array, + max_hits); + } +} + +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index ac6c6ec4929..9653ad8f1bb 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -32,22 +32,22 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, if(dy) *dy = 0.0f; #endif - return kernel_tex_fetch(__attributes_float, offset + sd->prim); + return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim)); } else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float f0 = kernel_tex_fetch(__attributes_float, offset + k0); float f1 = kernel_tex_fetch(__attributes_float, offset + k1); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); + if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); if(dy) *dy = 0.0f; #endif - return (1.0f - sd->u)*f0 + sd->u*f1; + return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -71,22 +71,22 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim)); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim))); } else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0)); float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1)); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); + if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return (1.0f - sd->u)*f0 + sd->u*f1; + return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -104,22 +104,22 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) { float r = 0.0f; - if(sd->type & PRIMITIVE_ALL_CURVE) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float4 P_curve[2]; - if(sd->type & PRIMITIVE_CURVE) { + if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); } else { - motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve); } - r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w; + r = (P_curve[1].w - P_curve[0].w) * ccl_fetch(sd, u) + P_curve[0].w; } return r*2.0f; @@ -130,8 +130,8 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float4 P_curve[2]; @@ -139,7 +139,7 @@ ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); - return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); + return float4_to_float3(P_curve[1]) * ccl_fetch(sd, u) + float4_to_float3(P_curve[0]) * (1.0f - ccl_fetch(sd, u)); } /* Curve tangent normal */ @@ -148,14 +148,14 @@ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd) { float3 tgN = make_float3(0.0f,0.0f,0.0f); - if(sd->type & PRIMITIVE_ALL_CURVE) { + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { - tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu))); + tgN = -(-ccl_fetch(sd, I) - ccl_fetch(sd, dPdu) * (dot(ccl_fetch(sd, dPdu),-ccl_fetch(sd, I)) / len_squared(ccl_fetch(sd, dPdu)))); tgN = normalize(tgN); /* need to find suitable scaled gd for corrected normal */ #if 0 - tgN = normalize(tgN - gd * sd->dPdu); + tgN = normalize(tgN - gd * ccl_fetch(sd, dPdu)); #endif } @@ -442,12 +442,12 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect float r_ext = mw_extension + r_curr; float coverage = 1.0f; - if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { + if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { /* the bounding box does not overlap the square centered at O */ tree += level; level = tree & -tree; } - else if (level == 1) { + else if(level == 1) { /* the maximum recursion depth is reached. * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. @@ -459,13 +459,13 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect if(flags & CURVE_KN_RIBBONS) { float3 tg = (p_en - p_st); float w = tg.x * tg.x + tg.y * tg.y; - if (w == 0) { + if(w == 0) { tree++; level = tree & -tree; continue; } w = -(p_st.x * tg.x + p_st.y * tg.y) / w; - w = clamp((float)w, 0.0f, 1.0f); + w = saturate(w); /* compute u on the curve segment */ u = i_st * (1 - w) + i_en * w; @@ -474,17 +474,17 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0]; float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if (dot(tg, dp_st)< 0) + if(dot(tg, dp_st)< 0) dp_st *= -1; - if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { + if(dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { tree++; level = tree & -tree; continue; } float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if (dot(tg, dp_en) < 0) + if(dot(tg, dp_en) < 0) dp_en *= -1; - if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { + if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { tree++; level = tree & -tree; continue; @@ -500,13 +500,13 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect float d0 = d - r_curr; float d1 = d + r_curr; float inv_mw_extension = 1.0f/mw_extension; - if (d0 >= 0) + if(d0 >= 0) coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f; else // inside coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f; } - if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { + if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { tree++; level = tree & -tree; continue; @@ -548,7 +548,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1))); float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd; float td = tb*tb - 4*cyla*tc; - if (td < 0.0f) { + if(td < 0.0f) { tree++; level = tree & -tree; continue; @@ -559,10 +559,10 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect t = tcentre + correction; float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if (dot(tg, dp_st)< 0) + if(dot(tg, dp_st)< 0) dp_st *= -1; float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if (dot(tg, dp_en) < 0) + if(dot(tg, dp_en) < 0) dp_en *= -1; if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) { @@ -570,14 +570,14 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect t = tcentre + correction; } - if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { + if(dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { tree++; level = tree & -tree; continue; } float w = (zcentre + (tg.z * correction)) * invl; - w = clamp((float)w, 0.0f, 1.0f); + w = saturate(w); /* compute u on the curve segment */ u = i_st * (1 - w) + i_en * w; @@ -777,7 +777,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma; float td = tb*tb - 4*a*tc; - if (td < 0.0f) + if(td < 0.0f) return false; float rootd = 0.0f; @@ -818,7 +818,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { - if (flags & CURVE_KN_ENCLOSEFILTER) { + if(flags & CURVE_KN_ENCLOSEFILTER) { float enc_ratio = 1.01f; if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); @@ -890,7 +890,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -903,7 +903,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con int prim = kernel_tex_fetch(__prim_index, isect->prim); float4 v00 = kernel_tex_fetch(__curves, prim); - int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float3 tg; @@ -914,14 +914,14 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con float4 P_curve[4]; - if(sd->type & PRIMITIVE_CURVE) { + if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { P_curve[0] = kernel_tex_fetch(__curve_keys, ka); P_curve[1] = kernel_tex_fetch(__curve_keys, k0); P_curve[2] = kernel_tex_fetch(__curve_keys, k1); P_curve[3] = kernel_tex_fetch(__curve_keys, kb); } else { - motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); + motion_cardinal_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), ka, k0, k1, kb, P_curve); } float3 p[4]; @@ -933,43 +933,43 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con P = P + D*t; #ifdef __UV__ - sd->u = isect->u; - sd->v = 0.0f; + ccl_fetch(sd, u) = isect->u; + ccl_fetch(sd, v) = 0.0f; #endif tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { - sd->Ng = normalize(-(D - tg * (dot(tg, D)))); + ccl_fetch(sd, Ng) = normalize(-(D - tg * (dot(tg, D)))); } else { /* direction from inside to surface of curve */ float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); - sd->Ng = normalize(P - p_curr); + ccl_fetch(sd, Ng) = normalize(P - p_curr); /* adjustment for changing radius */ float gd = isect->v; if(gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg; - sd->Ng = normalize(sd->Ng); + ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg; + ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); } } /* todo: sometimes the normal is still so that this is detected as * backfacing even if cull backfaces is enabled */ - sd->N = sd->Ng; + ccl_fetch(sd, N) = ccl_fetch(sd, Ng); } else { float4 P_curve[2]; - if(sd->type & PRIMITIVE_CURVE) { + if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); } else { - motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve); } float l = 1.0f; @@ -980,39 +980,39 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con float3 dif = P - float4_to_float3(P_curve[0]); #ifdef __UV__ - sd->u = dot(dif,tg)/l; - sd->v = 0.0f; + ccl_fetch(sd, u) = dot(dif,tg)/l; + ccl_fetch(sd, v) = 0.0f; #endif - if (flag & CURVE_KN_TRUETANGENTGNORMAL) { - sd->Ng = -(D - tg * dot(tg, D)); - sd->Ng = normalize(sd->Ng); + if(flag & CURVE_KN_TRUETANGENTGNORMAL) { + ccl_fetch(sd, Ng) = -(D - tg * dot(tg, D)); + ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); } else { float gd = isect->v; /* direction from inside to surface of curve */ - sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd); + ccl_fetch(sd, Ng) = (dif - tg * ccl_fetch(sd, u) * l) / (P_curve[0].w + ccl_fetch(sd, u) * l * gd); /* adjustment for changing radius */ - if (gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg; - sd->Ng = normalize(sd->Ng); + if(gd != 0.0f) { + ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg; + ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); } } - sd->N = sd->Ng; + ccl_fetch(sd, N) = ccl_fetch(sd, Ng); } #ifdef __DPDU__ /* dPdu/dPdv */ - sd->dPdu = tg; - sd->dPdv = cross(tg, sd->Ng); + ccl_fetch(sd, dPdu) = tg; + ccl_fetch(sd, dPdv) = cross(tg, ccl_fetch(sd, Ng)); #endif if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index d3297e05c67..86f93f242a1 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -134,7 +134,7 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s return P; } #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -161,7 +161,7 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif @@ -187,7 +187,7 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh #ifdef __INTERSECTION_REFINE__ if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -213,7 +213,7 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif @@ -236,25 +236,25 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface) { /* get shader */ - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim)); /* get motion info */ int numsteps, numverts; - object_motion_info(kg, sd->object, &numsteps, &numverts, NULL); + object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL); /* figure out which steps we need to fetch and their interpolation factor */ int maxstep = numsteps*2; - int step = min((int)(sd->time*maxstep), maxstep-1); - float t = sd->time*maxstep - step; + int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1); + float t = ccl_fetch(sd, time)*maxstep - step; /* find attribute */ AttributeElement elem; - int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_POSITION, &elem); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* fetch vertex coordinates */ float3 verts[3], next_verts[3]; - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim))); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); @@ -268,33 +268,33 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD #ifdef __SUBSURFACE__ if(!subsurface) #endif - sd->P = motion_triangle_refine(kg, sd, isect, ray, verts); + ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts); #ifdef __SUBSURFACE__ else - sd->P = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts); + ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts); #endif /* compute face normal */ float3 Ng; - if(sd->flag & SD_NEGATIVE_SCALE_APPLIED) + if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED) Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0])); else Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); - sd->Ng = Ng; - sd->N = Ng; + ccl_fetch(sd, Ng) = Ng; + ccl_fetch(sd, N) = Ng; /* compute derivatives of P w.r.t. uv */ #ifdef __DPDU__ - sd->dPdu = (verts[0] - verts[2]); - sd->dPdv = (verts[1] - verts[2]); + ccl_fetch(sd, dPdu) = (verts[0] - verts[2]); + ccl_fetch(sd, dPdv) = (verts[1] - verts[2]); #endif /* compute smooth normal */ - if(sd->shader & SHADER_SMOOTH_NORMAL) { + if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { /* find attribute */ AttributeElement elem; - int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); + int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_NORMAL, &elem); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* fetch vertex coordinates */ @@ -308,10 +308,10 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD normals[2] = (1.0f - t)*normals[2] + t*next_normals[2]; /* interpolate between vertices */ - float u = sd->u; - float v = sd->v; + float u = ccl_fetch(sd, u); + float v = ccl_fetch(sd, v); float w = 1.0f - u - v; - sd->N = (u*normals[0] + v*normals[1] + w*normals[2]); + ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]); } } diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 79a56683454..9d0a008fff1 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -123,9 +123,9 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point(&sd->ob_tfm, *P); + *P = transform_point_auto(&ccl_fetch(sd, ob_tfm), *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); *P = transform_point(&tfm, *P); #endif } @@ -135,9 +135,9 @@ ccl_device_inline void object_position_transform(KernelGlobals *kg, const Shader ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point(&sd->ob_itfm, *P); + *P = transform_point_auto(&ccl_fetch(sd, ob_itfm), *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); *P = transform_point(&tfm, *P); #endif } @@ -147,9 +147,9 @@ ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, cons ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ - *N = normalize(transform_direction_transposed(&sd->ob_tfm, *N)); + *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_tfm), *N)); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); *N = normalize(transform_direction_transposed(&tfm, *N)); #endif } @@ -159,9 +159,9 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ - *N = normalize(transform_direction_transposed(&sd->ob_itfm, *N)); + *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_itfm), *N)); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); *N = normalize(transform_direction_transposed(&tfm, *N)); #endif } @@ -171,9 +171,9 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderDa ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction(&sd->ob_tfm, *D); + *D = transform_direction_auto(&ccl_fetch(sd, ob_tfm), *D); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); *D = transform_direction(&tfm, *D); #endif } @@ -183,9 +183,9 @@ ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction(&sd->ob_itfm, *D); + *D = transform_direction_auto(&ccl_fetch(sd, ob_itfm), *D); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); *D = transform_direction(&tfm, *D); #endif } @@ -194,13 +194,13 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const Sha ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd) { - if(sd->object == OBJECT_NONE) + if(ccl_fetch(sd, object) == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); #ifdef __OBJECT_MOTION__ - return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w); + return make_float3(ccl_fetch(sd, ob_tfm).x.w, ccl_fetch(sd, ob_tfm).y.w, ccl_fetch(sd, ob_tfm).z.w); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); return make_float3(tfm.x.w, tfm.y.w, tfm.z.w); #endif } @@ -243,7 +243,7 @@ ccl_device_inline float object_random_number(KernelGlobals *kg, int object) ccl_device_inline int object_particle_id(KernelGlobals *kg, int object) { if(object == OBJECT_NONE) - return 0.0f; + return 0; int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES; float4 f = kernel_tex_fetch(__objects, offset); @@ -296,7 +296,7 @@ ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *nu ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) { - return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2 + 1); + return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2 + 1); } /* Particle data from which object was instanced */ @@ -377,7 +377,7 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir) /* Transform ray into object space to enter static object in BVH */ -ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t) +ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t) { Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); @@ -425,7 +425,7 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg, /* Transorm ray to exit static object in BVH */ -ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t) +ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t) { if(*t != FLT_MAX) { Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); @@ -453,7 +453,7 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, co #ifdef __OBJECT_MOTION__ /* Transform ray into object space to enter motion blurred object in BVH */ -ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm) +ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm) { Transform itfm; *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm); @@ -497,7 +497,7 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, int object, /* Transorm ray to exit motion blurred object in BVH */ -ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm) +ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm) { if(*t != FLT_MAX) *t *= len(transform_direction(tfm, 1.0f/(*idir))); @@ -520,5 +520,38 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, int obj #endif +/* TODO(sergey): This is only for until we've got OpenCL 2.0 + * on all devices we consider supported. It'll be replaced with + * generic address space. + */ + +#ifdef __KERNEL_OPENCL__ +ccl_device_inline void object_dir_transform_addrspace(KernelGlobals *kg, + const ShaderData *sd, + ccl_addr_space float3 *D) +{ + float3 private_D = *D; + object_dir_transform(kg, sd, &private_D); + *D = private_D; +} + +ccl_device_inline void object_normal_transform_addrspace(KernelGlobals *kg, + const ShaderData *sd, + ccl_addr_space float3 *N) +{ + float3 private_N = *N; + object_normal_transform(kg, sd, &private_N); + *N = private_N; +} +#endif + +#ifndef __KERNEL_OPENCL__ +# define object_dir_transform_auto object_dir_transform +# define object_normal_transform_auto object_normal_transform +#else +# define object_dir_transform_auto object_dir_transform_addrspace +# define object_normal_transform_auto object_normal_transform_addrspace +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index b52ec7ef1b2..30f12d32355 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -25,16 +25,16 @@ CCL_NAMESPACE_BEGIN ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { return triangle_attribute_float(kg, sd, elem, offset, dx, dy); } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { + else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { return curve_attribute_float(kg, sd, elem, offset, dx, dy); } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { + else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { return volume_attribute_float(kg, sd, elem, offset, dx, dy); } #endif @@ -47,16 +47,16 @@ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData * ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { return triangle_attribute_float3(kg, sd, elem, offset, dx, dy); } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { + else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { return curve_attribute_float3(kg, sd, elem, offset, dx, dy); } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { + else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { return volume_attribute_float3(kg, sd, elem, offset, dx, dy); } #endif @@ -108,9 +108,9 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) { #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) #ifdef __DPDU__ - return normalize(sd->dPdu); + return normalize(ccl_fetch(sd, dPdu)); #else return make_float3(0.0f, 0.0f, 0.0f); #endif @@ -124,12 +124,12 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) float3 data = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); object_normal_transform(kg, sd, &data); - return cross(sd->N, normalize(cross(data, sd->N))); + return cross(ccl_fetch(sd, N), normalize(cross(data, ccl_fetch(sd, N)))); } else { /* otherwise use surface derivatives */ #ifdef __DPDU__ - return normalize(sd->dPdu); + return normalize(ccl_fetch(sd, dPdu)); #else return make_float3(0.0f, 0.0f, 0.0f); #endif @@ -144,16 +144,16 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) float3 center; #ifdef __HAIR__ - bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE; + bool is_curve_primitive = ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE; if(is_curve_primitive) { center = curve_motion_center_location(kg, sd); - if(!(sd->flag & SD_TRANSFORM_APPLIED)) + if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED)) object_position_transform(kg, sd, ¢er); } else #endif - center = sd->P; + center = ccl_fetch(sd, P); float3 motion_pre = center, motion_post = center; @@ -164,16 +164,16 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) if(offset != ATTR_STD_NOT_FOUND) { /* get motion info */ int numverts, numkeys; - object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); + object_motion_info(kg, ccl_fetch(sd, object), NULL, &numverts, &numkeys); /* lookup attributes */ - int offset_next = (sd->type & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys; + int offset_next = (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys; motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL); #ifdef __HAIR__ - if(is_curve_primitive && (sd->flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { + if(is_curve_primitive && (ccl_fetch(sd, flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { object_position_transform(kg, sd, &motion_pre); object_position_transform(kg, sd, &motion_post); } @@ -184,17 +184,17 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) * transformation was set match the world/object space of motion_pre/post */ Transform tfm; - tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_PRE); + tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_PRE); motion_pre = transform_point(&tfm, motion_pre); - tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_POST); + tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_POST); motion_post = transform_point(&tfm, motion_post); float3 motion_center; /* camera motion, for perspective/orthographic motion.pre/post will be a * world-to-raster matrix, for panorama it's world-to-camera */ - if (kernel_data.cam.type != CAMERA_PANORAMA) { + if(kernel_data.cam.type != CAMERA_PANORAMA) { tfm = kernel_data.cam.worldtoraster; motion_center = transform_perspective(&tfm, center); diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h index 4233ff15c86..f79b2ed9f34 100644 --- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h +++ b/intern/cycles/kernel/geom/geom_qbvh_shadow.h @@ -155,11 +155,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, ++stackPtr; kernel_assert(stackPtr < BVH_QSTACK_SIZE); traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = c1; + traversalStack[stackPtr].dist = d1; ++stackPtr; kernel_assert(stackPtr < BVH_QSTACK_SIZE); traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = c0; + traversalStack[stackPtr].dist = d0; /* Three children are hit, push all onto stack and sort 3 * stack items, continue with closest child. @@ -206,7 +206,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* If node is leaf, fetch triangle list. */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); #ifdef __VISIBILITY_FLAG__ if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) { /* Pop. */ @@ -241,7 +241,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, switch(p_type) { case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr); + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); break; } #if BVH_FEATURE(BVH_MOTION) @@ -279,7 +279,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) #endif { - shader = kernel_tex_fetch(__tri_shader, prim); + shader = kernel_tex_fetch(__tri_shader, prim); } #ifdef __HAIR__ else { diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h index 62598115fa3..d85e1a4691e 100644 --- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h @@ -202,7 +202,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* If node is leaf, fetch triangle list. */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) @@ -226,7 +226,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, if(tri_object != subsurface_object) { continue; } - triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); + triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); } break; } diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h index 99d2fb20837..7e356ea062b 100644 --- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h +++ b/intern/cycles/kernel/geom/geom_qbvh_traversal.h @@ -80,6 +80,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, #if defined(__KERNEL_DEBUG__) isect->num_traversal_steps = 0; + isect->num_traversed_instances = 0; #endif ssef tnear(0.0f), tfar(ray->t); @@ -185,6 +186,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, if(traverseChild == 0) { if(d1 < d0) { nodeAddr = c1; + nodeDist = d1; ++stackPtr; kernel_assert(stackPtr < BVH_QSTACK_SIZE); traversalStack[stackPtr].addr = c0; @@ -193,6 +195,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, } else { nodeAddr = c0; + nodeDist = d0; ++stackPtr; kernel_assert(stackPtr < BVH_QSTACK_SIZE); traversalStack[stackPtr].addr = c1; @@ -260,7 +263,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* If node is leaf, fetch triangle list. */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); #ifdef __VISIBILITY_FLAG__ if(UNLIKELY((nodeDist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0))) @@ -296,7 +299,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, isect->num_traversal_steps++; #endif kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr)) { + if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { tfar = ssef(isect->t); /* Shadow ray early termination. */ if(visibility == PATH_RAY_SHADOW_OPAQUE) @@ -377,6 +380,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, traversalStack[stackPtr].dist = -FLT_MAX; nodeAddr = kernel_tex_fetch(__object_node, object); + +#if defined(__KERNEL_DEBUG__) + isect->num_traversed_instances++; +#endif } } #endif /* FEATURE(BVH_INSTANCING) */ diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h index 2c396e99fc4..d8cfa3a4061 100644 --- a/intern/cycles/kernel/geom/geom_qbvh_volume.h +++ b/intern/cycles/kernel/geom/geom_qbvh_volume.h @@ -95,10 +95,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, do { /* Traverse internal nodes. */ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { -#if defined(__KERNEL_DEBUG__) - isect->num_traversal_steps++; -#endif - ssef dist; int traverseChild = qbvh_node_intersect(kg, tnear, @@ -208,7 +204,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* If node is leaf, fetch triangle list. */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) @@ -234,7 +230,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, continue; } /* Intersect ray against primitive. */ - triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr); + triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); } break; } diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h new file mode 100644 index 00000000000..d5131919944 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h @@ -0,0 +1,446 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + + const uint visibility = PATH_RAY_ALL_VISIBILITY; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + uint num_hits = 0; + isect_array->t = tmax; + + ssef tnear(0.0f), tfar(isect_t); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + ssef dist; + int traverseChild = qbvh_node_intersect(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#else + org, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + bool hit; + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + switch(p_type) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +#else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + num_hits_in_instance = 0; + isect_array->t = isect_t; + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* Instance pop. */ + if(num_hits_in_instance) { + float t_fac; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm); +#else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + /* Scale isect->t to adjust for instancing. */ + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } + } + else { + float ignore_t = FLT_MAX; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + } + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + isect_t = tmax; + isect_array->t = isect_t; + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index dd3928682e3..995dfac5b09 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -27,14 +27,14 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) { /* load triangle vertices */ - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); /* return normal */ - if(sd->flag & SD_NEGATIVE_SCALE_APPLIED) + if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED) return normalize(cross(v2 - v0, v1 - v0)); else return normalize(cross(v1 - v0, v2 - v0)); @@ -94,7 +94,7 @@ ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, flo /* Ray differentials on triangle */ -ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, float3 *dPdu, float3 *dPdv) +ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv) { /* fetch triangle vertex coordinates */ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); @@ -116,34 +116,34 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, offset + sd->prim); + return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim)); } else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x)); float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y)); float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z)); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; + if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } else if(elem == ATTR_ELEMENT_CORNER) { - int tri = offset + sd->prim*3; + int tri = offset + ccl_fetch(sd, prim)*3; float f0 = kernel_tex_fetch(__attributes_float, tri + 0); float f1 = kernel_tex_fetch(__attributes_float, tri + 1); float f2 = kernel_tex_fetch(__attributes_float, tri + 2); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; + if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } else { if(dx) *dx = 0.0f; @@ -159,24 +159,24 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim)); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim))); } else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { - float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; + if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) { - int tri = offset + sd->prim*3; + int tri = offset + ccl_fetch(sd, prim)*3; float3 f0, f1, f2; if(elem == ATTR_ELEMENT_CORNER) { @@ -191,11 +191,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData } #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; + if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } else { if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index c9e30a451da..3ef918dc842 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -14,7 +14,7 @@ * limitations under the License. */ -/* Triangle/Ray intersections . +/* Triangle/Ray intersections. * * For BVH ray intersection we use a precomputed triangle storage to accelerate * intersection at the cost of more memory usage. @@ -49,18 +49,27 @@ typedef struct IsectPrecalc { float Sx, Sy, Sz; } IsectPrecalc; -/* Workaround for CUDA toolkit 6.5.16. */ -#if defined(__KERNEL_CPU__) || !defined(__KERNEL_CUDA_EXPERIMENTAL__) || __CUDA_ARCH__ < 500 +#if defined(__KERNEL_CUDA__) # if (defined(i386) || defined(_M_IX86)) +# if __CUDA_ARCH__ > 500 ccl_device_noinline -# else +# else /* __CUDA_ARCH__ > 500 */ ccl_device_inline -# endif -#else +# endif /* __CUDA_ARCH__ > 500 */ +# else /* (defined(i386) || defined(_M_IX86)) */ +# if defined(__KERNEL_EXPERIMENTAL__) && (__CUDA_ARCH__ >= 500) ccl_device_noinline -#endif +# else +ccl_device_inline +# endif +# endif /* (defined(i386) || defined(_M_IX86)) */ +#elif defined(__KERNEL_OPENCL_APPLE__) +ccl_device_noinline +#else /* defined(__KERNEL_OPENCL_APPLE__) */ +ccl_device_inline +#endif /* defined(__KERNEL_OPENCL_APPLE__) */ void triangle_intersect_precalc(float3 dir, - IsectPrecalc *isect_precalc) + IsectPrecalc *isect_precalc) { /* Calculate dimension where the ray direction is maximal. */ int kz = util_max_axis(make_float3(fabsf(dir.x), @@ -77,10 +86,10 @@ void triangle_intersect_precalc(float3 dir, } /* Calculate the shear constants. */ - float inf_dir_z = 1.0f / IDX(dir, kz); - isect_precalc->Sx = IDX(dir, kx) * inf_dir_z; - isect_precalc->Sy = IDX(dir, ky) * inf_dir_z; - isect_precalc->Sz = inf_dir_z; + float inv_dir_z = 1.0f / IDX(dir, kz); + isect_precalc->Sx = IDX(dir, kx) * inv_dir_z; + isect_precalc->Sy = IDX(dir, ky) * inv_dir_z; + isect_precalc->Sz = inv_dir_z; /* Store the dimensions. */ isect_precalc->kx = kx; @@ -98,7 +107,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, const IsectPrecalc *isect_precalc, Intersection *isect, float3 P, - float3 dir, uint visibility, int object, int triAddr) @@ -111,14 +119,12 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, const float Sz = isect_precalc->Sz; /* Calculate vertices relative to ray origin. */ - float3 tri[3]; - tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0)); - tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1)); - tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2)); - - const float3 A = tri[0] - P; - const float3 B = tri[1] - P; - const float3 C = tri[2] - P; + const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z); + const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z); + const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z); const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz); const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz); @@ -155,8 +161,8 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, */ const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz; const float sign_T = xor_signmast(T, sign_mask); - if ((sign_T < 0.0f) || - (sign_T > isect->t * xor_signmast(det, sign_mask))) + if((sign_T < 0.0f) || + (sign_T > isect->t * xor_signmast(det, sign_mask))) { return false; } @@ -191,7 +197,6 @@ ccl_device_inline void triangle_intersect_subsurface( const IsectPrecalc *isect_precalc, Intersection *isect_array, float3 P, - float3 dir, int object, int triAddr, float tmax, @@ -207,14 +212,12 @@ ccl_device_inline void triangle_intersect_subsurface( const float Sz = isect_precalc->Sz; /* Calculate vertices relative to ray origin. */ - float3 tri[3]; - tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0)); - tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1)); - tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2)); - - const float3 A = tri[0] - P; - const float3 B = tri[1] - P; - const float3 C = tri[2] - P; + const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z); + const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z); + const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z); const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz); const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz); @@ -249,13 +252,10 @@ ccl_device_inline void triangle_intersect_subsurface( /* Calculate scaled z−coordinates of vertices and use them to calculate * the hit distance. */ - const float Az = Sz * A_kz; - const float Bz = Sz * B_kz; - const float Cz = Sz * C_kz; - const float T = U * Az + V * Bz + W * Cz; - - if ((xor_signmast(T, sign_mask) < 0.0f) || - (xor_signmast(T, sign_mask) > tmax * xor_signmast(det, sign_mask))) + const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz; + const float sign_T = xor_signmast(T, sign_mask); + if((sign_T < 0.0f) || + (sign_T > tmax * xor_signmast(det, sign_mask))) { return; } @@ -315,7 +315,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, return P; } #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -327,14 +327,12 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, P = P + D*t; - float3 tri[3]; - tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0)); - tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1)); - tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2)); - - float3 edge1 = tri[0] - tri[2]; - float3 edge2 = tri[1] - tri[2]; - float3 tvec = P - tri[2]; + const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); float3 qvec = cross(tvec, edge1); float3 pvec = cross(D, edge2); float rt = dot(edge2, qvec) / dot(edge1, pvec); @@ -343,7 +341,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif @@ -372,7 +370,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, #ifdef __INTERSECTION_REFINE__ if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, @@ -386,14 +384,12 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, P = P + D*t; - float3 tri[3]; - tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0)); - tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1)); - tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2)); - - float3 edge1 = tri[0] - tri[2]; - float3 edge2 = tri[1] - tri[2]; - float3 tvec = P - tri[2]; + const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); float3 qvec = cross(tvec, edge1); float3 pvec = cross(D, edge2); float rt = dot(edge2, qvec) / dot(edge1, pvec); @@ -402,7 +398,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index c33509fbf4f..c72afa2a3a4 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -60,7 +60,7 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, #endif if(dx) *dx = 0.0f; - if(dx) *dy = 0.0f; + if(dy) *dy = 0.0f; /* todo: support float textures to lower memory usage for single floats */ return average(float4_to_float3(r)); |