diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2019-05-02 17:05:23 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2019-05-02 17:05:23 +0300 |
commit | 43e6bb85cee0802887eae9489a2bd73836daf41d (patch) | |
tree | eaa106429af8341bf2123d7445fdab0af71b3dda /intern/cycles/kernel | |
parent | 7daeb1f9aee284d958abe87622b43c70c21af967 (diff) | |
parent | ffaf91b5fc03f91e1fc90bd2f1d5dc5aa75656ff (diff) |
Merge 'master' into 'collada'
Diffstat (limited to 'intern/cycles/kernel')
50 files changed, 329 insertions, 1305 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 13e72ed299f..7503bad37b0 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -57,7 +57,7 @@ CCL_NAMESPACE_BEGIN #if defined(__HAIR__) # define BVH_FUNCTION_NAME bvh_intersect_hair -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR # include "kernel/bvh/bvh_traversal.h" #endif @@ -69,7 +69,7 @@ CCL_NAMESPACE_BEGIN #if defined(__HAIR__) && defined(__OBJECT_MOTION__) # define BVH_FUNCTION_NAME bvh_intersect_hair_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH | BVH_MOTION +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION # include "kernel/bvh/bvh_traversal.h" #endif @@ -181,10 +181,7 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray) ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray ray, const uint visibility, - Intersection *isect, - uint *lcg_state, - float difl, - float extmax) + Intersection *isect) { PROFILING_INIT(kg, PROFILING_INTERSECT); @@ -211,7 +208,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, if (kernel_data.bvh.have_motion) { # ifdef __HAIR__ if (kernel_data.bvh.have_curves) - return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax); + return bvh_intersect_hair_motion(kg, &ray, isect, visibility); # endif /* __HAIR__ */ return bvh_intersect_motion(kg, &ray, isect, visibility); @@ -220,7 +217,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, #ifdef __HAIR__ if (kernel_data.bvh.have_curves) - return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax); + return bvh_intersect_hair(kg, &ray, isect, visibility); #endif /* __HAIR__ */ #ifdef __KERNEL_CPU__ diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h index 042630121c8..a33bc73e25b 100644 --- a/intern/cycles/kernel/bvh/bvh_nodes.h +++ b/intern/cycles/kernel/bvh/bvh_nodes.h @@ -75,67 +75,6 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg, # endif } -ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg, - const float3 P, - const float3 idir, - const float t, - const float difl, - const float extmax, - const int node_addr, - const uint visibility, - float dist[2]) -{ - - /* fetch node data */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); - float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1); - float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2); - float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3); - - /* intersect ray against child nodes */ - float c0lox = (node0.x - P.x) * idir.x; - float c0hix = (node0.z - P.x) * idir.x; - float c0loy = (node1.x - P.y) * idir.y; - float c0hiy = (node1.z - P.y) * idir.y; - float c0loz = (node2.x - P.z) * idir.z; - float c0hiz = (node2.z - P.z) * idir.z; - float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); - float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); - - float c1lox = (node0.y - P.x) * idir.x; - float c1hix = (node0.w - P.x) * idir.x; - float c1loy = (node1.y - P.y) * idir.y; - float c1hiy = (node1.w - P.y) * idir.y; - float c1loz = (node2.y - P.z) * idir.z; - float c1hiz = (node2.w - P.z) * idir.z; - float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); - float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); - - if (difl != 0.0f) { - float hdiff = 1.0f + difl; - float ldiff = 1.0f - difl; - if (__float_as_int(cnodes.z) & PATH_RAY_CURVE) { - c0min = max(ldiff * c0min, c0min - extmax); - c0max = min(hdiff * c0max, c0max + extmax); - } - if (__float_as_int(cnodes.w) & PATH_RAY_CURVE) { - c1min = max(ldiff * c1min, c1min - extmax); - c1max = min(hdiff * c1max, c1max + extmax); - } - } - - dist[0] = c0min; - dist[1] = c1min; - -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | - (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); -# else - return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0); -# endif -} - ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg, const float3 P, const float3 dir, @@ -162,41 +101,6 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg return tnear <= tfar; } -ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(KernelGlobals *kg, - const float3 P, - const float3 dir, - const float t, - const float difl, - int node_addr, - int child, - float dist[2]) -{ - Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child); - float3 aligned_dir = transform_direction(&space, dir); - float3 aligned_P = transform_point(&space, P); - float3 nrdir = -bvh_inverse_direction(aligned_dir); - float3 tLowerXYZ = aligned_P * nrdir; - float3 tUpperXYZ = tLowerXYZ - nrdir; - const float near_x = min(tLowerXYZ.x, tUpperXYZ.x); - const float near_y = min(tLowerXYZ.y, tUpperXYZ.y); - const float near_z = min(tLowerXYZ.z, tUpperXYZ.z); - const float far_x = max(tLowerXYZ.x, tUpperXYZ.x); - const float far_y = max(tLowerXYZ.y, tUpperXYZ.y); - const float far_z = max(tLowerXYZ.z, tUpperXYZ.z); - const float tnear = max4(0.0f, near_x, near_y, near_z); - const float tfar = min4(t, far_x, far_y, far_z); - *dist = tnear; - if (difl != 0.0f) { - /* TODO(sergey): Same as for QBVH, needs a proper use. */ - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - return round_down * tnear <= round_up * tfar; - } - else { - return tnear <= tfar; - } -} - ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, const float3 P, const float3 dir, @@ -227,38 +131,6 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, return mask; } -ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg, - const float3 P, - const float3 dir, - const float3 idir, - const float t, - const float difl, - const float extmax, - const int node_addr, - const uint visibility, - float dist[2]) -{ - int mask = 0; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); - if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) { -# ifdef __VISIBILITY_FLAG__ - if ((__float_as_uint(cnodes.x) & visibility)) -# endif - { - mask |= 1; - } - } - if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) { -# ifdef __VISIBILITY_FLAG__ - if ((__float_as_uint(cnodes.y) & visibility)) -# endif - { - mask |= 2; - } - } - return mask; -} - ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, const float3 P, const float3 dir, @@ -277,27 +149,6 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, } } -ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg, - const float3 P, - const float3 dir, - const float3 idir, - const float t, - const float difl, - const float extmax, - const int node_addr, - const uint visibility, - float dist[2]) -{ - float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); - if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect_robust( - kg, P, dir, idir, t, difl, extmax, node_addr, visibility, dist); - } - else { - return bvh_aligned_node_intersect_robust( - kg, P, idir, t, difl, extmax, node_addr, visibility, dist); - } -} #else /* !defined(__KERNEL_SSE2__) */ int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg, @@ -343,69 +194,6 @@ int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg, # endif } -ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg, - const float3 &P, - const float3 &dir, - const ssef &tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const float difl, - const float extmax, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - - /* fetch node data */ - const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + nodeAddr; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - - if (difl != 0.0f) { - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0); - float4 *tminmaxview = (float4 *)&tminmax; - float &c0min = tminmaxview->x, &c1min = tminmaxview->y; - float &c0max = tminmaxview->z, &c1max = tminmaxview->w; - float hdiff = 1.0f + difl; - float ldiff = 1.0f - difl; - if (__float_as_int(cnodes.x) & PATH_RAY_CURVE) { - c0min = max(ldiff * c0min, c0min - extmax); - c0max = min(hdiff * c0max, c0max + extmax); - } - if (__float_as_int(cnodes.y) & PATH_RAY_CURVE) { - c1min = max(ldiff * c1min, c1min - extmax); - c1max = min(hdiff * c1max, c1max + extmax); - } - } - - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - dist[0] = tminmax[0]; - dist[1] = tminmax[1]; - - int mask = movemask(lrhit); - -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); - return cmask; -# else - return mask & 3; -# endif -} - ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, const float3 P, const float3 dir, @@ -458,68 +246,6 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, # endif } -ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg, - const float3 P, - const float3 dir, - const ssef &isect_near, - const ssef &isect_far, - const float difl, - const int node_addr, - const uint visibility, - float dist[2]) -{ - Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0); - Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1); - - float3 aligned_dir0 = transform_direction(&space0, dir), - aligned_dir1 = transform_direction(&space1, dir); - float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P); - float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), - nrdir1 = -bvh_inverse_direction(aligned_dir1); - - ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f), - lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f), - lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f); - - ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), - upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), - upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); - - ssef tnear_x = min(lower_x, upper_x); - ssef tnear_y = min(lower_y, upper_y); - ssef tnear_z = min(lower_z, upper_z); - ssef tfar_x = max(lower_x, upper_x); - ssef tfar_y = max(lower_y, upper_y); - ssef tfar_z = max(lower_z, upper_z); - - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - sseb vmask; - if (difl != 0.0f) { - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - vmask = round_down * tnear <= round_up * tfar; - } - else { - vmask = tnear <= tfar; - } - - dist[0] = tnear.f[0]; - dist[1] = tnear.f[1]; - - int mask = (int)movemask(vmask); - -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); - return cmask; -# else - return mask & 3; -# endif -} - ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, const float3 &P, const float3 &dir, @@ -543,40 +269,4 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist); } } - -ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg, - const float3 &P, - const float3 &dir, - const ssef &isect_near, - const ssef &isect_far, - const ssef &tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const float difl, - const float extmax, - const int node_addr, - const uint visibility, - float dist[2]) -{ - float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); - if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect_robust( - kg, P, dir, isect_near, isect_far, difl, node_addr, visibility, dist); - } - else { - return bvh_aligned_node_intersect_robust(kg, - P, - dir, - tsplat, - Psplat, - idirsplat, - shufflexyz, - difl, - extmax, - node_addr, - visibility, - dist); - } -} #endif /* !defined(__KERNEL_SSE2__) */ diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index b362779549c..268bb149970 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -219,10 +219,7 @@ ccl_device_inline object, prim_addr, ray->time, - curve_type, - NULL, - 0, - 0); + curve_type); } else { hit = curve_intersect(kg, @@ -233,10 +230,7 @@ ccl_device_inline object, prim_addr, ray->time, - curve_type, - NULL, - 0, - 0); + curve_type); } break; } diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h index 34a06d003bb..18afc6ae4eb 100644 --- a/intern/cycles/kernel/bvh/bvh_traversal.h +++ b/intern/cycles/kernel/bvh/bvh_traversal.h @@ -26,10 +26,8 @@ #if BVH_FEATURE(BVH_HAIR) # define NODE_INTERSECT bvh_node_intersect -# define NODE_INTERSECT_ROBUST bvh_node_intersect_robust #else # define NODE_INTERSECT bvh_aligned_node_intersect -# define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust #endif /* This is a template BVH traversal function, where various features can be @@ -38,21 +36,13 @@ * * BVH_INSTANCING: object instancing * BVH_HAIR: hair curve rendering - * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width * BVH_MOTION: motion blur rendering */ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, const Ray *ray, Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , - uint *lcg_state, - float difl, - float extmax -#endif -) + const uint visibility) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) @@ -117,23 +107,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #if !defined(__KERNEL_SSE2__) -# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if (difl != 0.0f) { - traverse_mask = NODE_INTERSECT_ROBUST(kg, - P, -# if BVH_FEATURE(BVH_HAIR) - dir, -# endif - idir, - isect->t, - difl, - extmax, - node_addr, - visibility, - dist); - } - else -# endif { traverse_mask = NODE_INTERSECT(kg, P, @@ -147,27 +120,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, dist); } #else // __KERNEL_SSE2__ -# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if (difl != 0.0f) { - traverse_mask = NODE_INTERSECT_ROBUST(kg, - P, - dir, -# if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, -# endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - difl, - extmax, - node_addr, - visibility, - dist); - } - else -# endif { traverse_mask = NODE_INTERSECT(kg, P, @@ -287,32 +239,12 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); bool hit; if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { - hit = cardinal_curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); + hit = cardinal_curve_intersect( + kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type); } else { - hit = curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); + hit = curve_intersect( + kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type); } if (hit) { /* shadow ray early termination */ @@ -408,56 +340,19 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , - uint *lcg_state, - float difl, - float extmax -#endif -) + const uint visibility) { switch (kernel_data.bvh.bvh_layout) { #ifdef __KERNEL_AVX2__ case BVH_LAYOUT_BVH8: - return BVH_FUNCTION_FULL_NAME(OBVH)(kg, - ray, - isect, - visibility -# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , - lcg_state, - difl, - extmax -# endif - ); + return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility); #endif #ifdef __QBVH__ case BVH_LAYOUT_BVH4: - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect, - visibility -# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , - lcg_state, - difl, - extmax -# endif - ); + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility); #endif /* __QBVH__ */ case BVH_LAYOUT_BVH2: - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect, - visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , - lcg_state, - difl, - extmax -#endif - ); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility); } kernel_assert(!"Should not happen"); return false; @@ -466,4 +361,3 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, #undef BVH_FUNCTION_NAME #undef BVH_FUNCTION_FEATURES #undef NODE_INTERSECT -#undef NODE_INTERSECT_ROBUST diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h index 16f3b03f842..84dc0dbaef5 100644 --- a/intern/cycles/kernel/bvh/bvh_types.h +++ b/intern/cycles/kernel/bvh/bvh_types.h @@ -38,7 +38,6 @@ CCL_NAMESPACE_BEGIN #define BVH_INSTANCING 1 #define BVH_MOTION 2 #define BVH_HAIR 4 -#define BVH_HAIR_MINIMUM_WIDTH 8 #define BVH_NAME_JOIN(x, y) x##_##y #define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y) diff --git a/intern/cycles/kernel/bvh/obvh_nodes.h b/intern/cycles/kernel/bvh/obvh_nodes.h index 6831562cade..e5c935b75ed 100644 --- a/intern/cycles/kernel/bvh/obvh_nodes.h +++ b/intern/cycles/kernel/bvh/obvh_nodes.h @@ -276,53 +276,6 @@ ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg #endif } -ccl_device_inline int obvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg, - const avxf &isect_near, - const avxf &isect_far, -#ifdef __KERNEL_AVX2__ - const avx3f &P_idir, -#else - const avx3f &P, -#endif - const avx3f &idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - avxf *ccl_restrict dist) -{ - const int offset = node_addr + 2; -#ifdef __KERNEL_AVX2__ - const avxf tnear_x = msub( - kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x); - const avxf tfar_x = msub( - kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x); - const avxf tnear_y = msub( - kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y); - const avxf tfar_y = msub( - kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y); - const avxf tnear_z = msub( - kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z); - const avxf tfar_z = msub( - kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z); - - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near); - const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far); - const avxb vmask = round_down * tnear <= round_up * tfar; - int mask = (int)movemask(vmask); - *dist = tnear; - return mask; -#else - return 0; -#endif -} - /* Unaligned nodes intersection */ ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg, @@ -391,77 +344,6 @@ ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict return movemask(vmask); } -ccl_device_inline int obvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg, - const avxf &isect_near, - const avxf &isect_far, -#ifdef __KERNEL_AVX2__ - const avx3f &P_idir, -#endif - const avx3f &P, - const avx3f &dir, - const avx3f &idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - avxf *ccl_restrict dist) -{ - const int offset = node_addr; - const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2); - const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4); - const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6); - - const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8); - const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10); - const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12); - - const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14); - const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16); - const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18); - - const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20); - const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22); - const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24); - - const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z, - aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z, - aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z; - - const avxf aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x, - aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y, - aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z; - - const avxf neg_one(-1.0f); - const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y, - nrdir_z = neg_one / aligned_dir_z; - - const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y, - tlower_z = aligned_P_z * nrdir_z; - - const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y, - tupper_z = tlower_z - nrdir_z; - - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - - const avxf tnear_x = min(tlower_x, tupper_x); - const avxf tnear_y = min(tlower_y, tupper_y); - const avxf tnear_z = min(tlower_z, tupper_z); - const avxf tfar_x = max(tlower_x, tupper_x); - const avxf tfar_y = max(tlower_y, tupper_y); - const avxf tfar_z = max(tlower_z, tupper_z); - - const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const avxb vmask = round_down * tnear <= round_up * tfar; - *dist = tnear; - return movemask(vmask); -} - /* Intersectors wrappers. * * They'll check node type and call appropriate intersection code. @@ -526,66 +408,3 @@ ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg, dist); } } - -ccl_device_inline int obvh_node_intersect_robust(KernelGlobals *ccl_restrict kg, - const avxf &isect_near, - const avxf &isect_far, -#ifdef __KERNEL_AVX2__ - const avx3f &P_idir, -#endif - const avx3f &P, - const avx3f &dir, - const avx3f &idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - avxf *ccl_restrict dist) -{ - const int offset = node_addr; - const float4 node = kernel_tex_fetch(__bvh_nodes, offset); - if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return obvh_unaligned_node_intersect_robust(kg, - isect_near, - isect_far, -#ifdef __KERNEL_AVX2__ - P_idir, -#endif - P, - dir, - idir, - near_x, - near_y, - near_z, - far_x, - far_y, - far_z, - node_addr, - difl, - dist); - } - else { - return obvh_aligned_node_intersect_robust(kg, - isect_near, - isect_far, -#ifdef __KERNEL_AVX2__ - P_idir, -#else - P, -#endif - idir, - near_x, - near_y, - near_z, - far_x, - far_y, - far_z, - node_addr, - difl, - dist); - } -} diff --git a/intern/cycles/kernel/bvh/obvh_shadow_all.h b/intern/cycles/kernel/bvh/obvh_shadow_all.h index 98efb003788..b7ab75b723c 100644 --- a/intern/cycles/kernel/bvh/obvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/obvh_shadow_all.h @@ -431,7 +431,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, } prim_addr++; - } //while + } // while } else { kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) == @@ -503,10 +503,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, object, prim_addr, ray->time, - curve_type, - NULL, - 0, - 0); + curve_type); } else { hit = curve_intersect(kg, @@ -517,10 +514,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, object, prim_addr, ray->time, - curve_type, - NULL, - 0, - 0); + curve_type); } break; } @@ -574,7 +568,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, } prim_addr++; - } //while prim + } // while prim } } #if BVH_FEATURE(BVH_INSTANCING) diff --git a/intern/cycles/kernel/bvh/obvh_traversal.h b/intern/cycles/kernel/bvh/obvh_traversal.h index 86b1de48aaa..9095233f8b6 100644 --- a/intern/cycles/kernel/bvh/obvh_traversal.h +++ b/intern/cycles/kernel/bvh/obvh_traversal.h @@ -20,29 +20,19 @@ * * BVH_INSTANCING: object instancing * BVH_HAIR: hair curve rendering - * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width * BVH_MOTION: motion blur rendering */ #if BVH_FEATURE(BVH_HAIR) # define NODE_INTERSECT obvh_node_intersect -# define NODE_INTERSECT_ROBUST obvh_node_intersect_robust #else # define NODE_INTERSECT obvh_aligned_node_intersect -# define NODE_INTERSECT_ROBUST obvh_aligned_node_intersect_robust #endif ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, const Ray *ray, Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , - uint *lcg_state, - float difl, - float extmax -#endif -) + const uint visibility) { /* Traversal stack in CUDA thread-local memory. */ OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; @@ -117,38 +107,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, BVH_DEBUG_NEXT_NODE(); -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if (difl != 0.0f) { - /* NOTE: We extend all the child BB instead of fetching - * and checking visibility flags for each of the, - * - * Need to test if doing opposite would be any faster. - */ - child_mask = NODE_INTERSECT_ROBUST(kg, - tnear, - tfar, -# ifdef __KERNEL_AVX2__ - P_idir4, -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, -# endif -# if BVH_FEATURE(BVH_HAIR) - dir4, -# endif - idir4, - near_x, - near_y, - near_z, - far_x, - far_y, - far_z, - node_addr, - difl, - &dist); - } - else -#endif /* BVH_HAIR_MINIMUM_WIDTH */ { child_mask = NODE_INTERSECT(kg, tnear, @@ -375,8 +333,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, } /* Eight children are hit, push all onto stack and sort 8 - * stack items, continue with closest child. - */ + * stack items, continue with closest child. + */ r = __bscf(child_mask); int c7 = __float_as_int(cnodes[r]); float d7 = ((float *)&dist)[r]; @@ -451,7 +409,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, return true; } } - } //for + } // for } else { kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); @@ -472,7 +430,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, return true; } } - } //prim count + } // prim count break; } #if BVH_FEATURE(BVH_MOTION) @@ -501,32 +459,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); bool hit; if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { - hit = cardinal_curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); + hit = cardinal_curve_intersect( + kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type); } else { - hit = curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); + hit = curve_intersect( + kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type); } if (hit) { tfar = avxf(isect->t); @@ -617,4 +555,3 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, } #undef NODE_INTERSECT -#undef NODE_INTERSECT_ROBUST diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h index 7c1d8c8c72e..070406fb18a 100644 --- a/intern/cycles/kernel/bvh/qbvh_nodes.h +++ b/intern/cycles/kernel/bvh/qbvh_nodes.h @@ -127,7 +127,7 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1, /* Axis-aligned nodes intersection */ -//ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg, +// ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg, static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg, const ssef &isect_near, const ssef &isect_far, @@ -181,51 +181,6 @@ static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg, return mask; } -ccl_device_inline int qbvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg, - const ssef &isect_near, - const ssef &isect_far, -#ifdef __KERNEL_AVX2__ - const sse3f &P_idir, -#else - const sse3f &P, -#endif - const sse3f &idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - ssef *ccl_restrict dist) -{ - const int offset = node_addr + 1; -#ifdef __KERNEL_AVX2__ - const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, P_idir.x); - const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, P_idir.y); - const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, P_idir.z); - const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, P_idir.x); - const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, P_idir.y); - const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, P_idir.z); -#else - const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - P.x) * idir.x; - const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - P.y) * idir.y; - const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - P.z) * idir.z; - const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - P.x) * idir.x; - const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - P.y) * idir.y; - const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - P.z) * idir.z; -#endif - - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const sseb vmask = round_down * tnear <= round_up * tfar; - *dist = tnear; - return (int)movemask(vmask); -} - /* Unaligned nodes intersection */ ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg, @@ -308,85 +263,6 @@ ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict #endif } -ccl_device_inline int qbvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg, - const ssef &isect_near, - const ssef &isect_far, -#ifdef __KERNEL_AVX2__ - const sse3f &P_idir, -#endif - const sse3f &P, - const sse3f &dir, - const sse3f &idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - ssef *ccl_restrict dist) -{ - const int offset = node_addr; - const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1); - const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2); - const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3); - - const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4); - const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5); - const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6); - - const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7); - const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8); - const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9); - - const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10); - const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11); - const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12); - - const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z, - aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z, - aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z; - - const ssef aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x, - aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y, - aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z; - - const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); - const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y, - nrdir_z = neg_one / aligned_dir_z; - - const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y, - tlower_z = aligned_P_z * nrdir_z; - - const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y, - tupper_z = tlower_z - nrdir_z; - - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - -#ifdef __KERNEL_SSE41__ - const ssef tnear_x = mini(tlower_x, tupper_x); - const ssef tnear_y = mini(tlower_y, tupper_y); - const ssef tnear_z = mini(tlower_z, tupper_z); - const ssef tfar_x = maxi(tlower_x, tupper_x); - const ssef tfar_y = maxi(tlower_y, tupper_y); - const ssef tfar_z = maxi(tlower_z, tupper_z); -#else - const ssef tnear_x = min(tlower_x, tupper_x); - const ssef tnear_y = min(tlower_y, tupper_y); - const ssef tnear_z = min(tlower_z, tupper_z); - const ssef tfar_x = max(tlower_x, tupper_x); - const ssef tfar_y = max(tlower_y, tupper_y); - const ssef tfar_z = max(tlower_z, tupper_z); -#endif - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const sseb vmask = round_down * tnear <= round_up * tfar; - *dist = tnear; - return movemask(vmask); -} - /* Intersectors wrappers. * * They'll check node type and call appropriate intersection code. @@ -451,66 +327,3 @@ ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg, dist); } } - -ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *ccl_restrict kg, - const ssef &isect_near, - const ssef &isect_far, -#ifdef __KERNEL_AVX2__ - const sse3f &P_idir, -#endif - const sse3f &P, - const sse3f &dir, - const sse3f &idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - ssef *ccl_restrict dist) -{ - const int offset = node_addr; - const float4 node = kernel_tex_fetch(__bvh_nodes, offset); - if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return qbvh_unaligned_node_intersect_robust(kg, - isect_near, - isect_far, -#ifdef __KERNEL_AVX2__ - P_idir, -#endif - P, - dir, - idir, - near_x, - near_y, - near_z, - far_x, - far_y, - far_z, - node_addr, - difl, - dist); - } - else { - return qbvh_aligned_node_intersect_robust(kg, - isect_near, - isect_far, -#ifdef __KERNEL_AVX2__ - P_idir, -#else - P, -#endif - idir, - near_x, - near_y, - near_z, - far_x, - far_y, - far_z, - node_addr, - difl, - dist); - } -} diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h index 49e607bfbd0..682251bf25b 100644 --- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h @@ -37,7 +37,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, uint *num_hits) { /* TODO(sergey): - * - Test if pushing distance on the stack helps. + * - Test if pushing distance on the stack helps. * - Likely and unlikely for if() statements. * - Test restrict attribute for pointers. */ @@ -293,10 +293,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, object, prim_addr, ray->time, - curve_type, - NULL, - 0, - 0); + curve_type); } else { hit = curve_intersect(kg, @@ -307,10 +304,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, object, prim_addr, ray->time, - curve_type, - NULL, - 0, - 0); + curve_type); } break; } diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h index 9ee0f7b5933..f43e84bf368 100644 --- a/intern/cycles/kernel/bvh/qbvh_traversal.h +++ b/intern/cycles/kernel/bvh/qbvh_traversal.h @@ -20,29 +20,19 @@ * * BVH_INSTANCING: object instancing * BVH_HAIR: hair curve rendering - * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width * BVH_MOTION: motion blur rendering */ #if BVH_FEATURE(BVH_HAIR) # define NODE_INTERSECT qbvh_node_intersect -# define NODE_INTERSECT_ROBUST qbvh_node_intersect_robust #else # define NODE_INTERSECT qbvh_aligned_node_intersect -# define NODE_INTERSECT_ROBUST qbvh_aligned_node_intersect_robust #endif ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, const Ray *ray, Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , - uint *lcg_state, - float difl, - float extmax -#endif -) + const uint visibility) { /* TODO(sergey): * - Test if pushing distance on the stack helps (for non shadow rays). @@ -126,38 +116,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, BVH_DEBUG_NEXT_NODE(); -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if (difl != 0.0f) { - /* NOTE: We extend all the child BB instead of fetching - * and checking visibility flags for each of the, - * - * Need to test if doing opposite would be any faster. - */ - child_mask = NODE_INTERSECT_ROBUST(kg, - tnear, - tfar, -# ifdef __KERNEL_AVX2__ - P_idir4, -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, -# endif -# if BVH_FEATURE(BVH_HAIR) - dir4, -# endif - idir4, - near_x, - near_y, - near_z, - far_x, - far_y, - far_z, - node_addr, - difl, - &dist); - } - else -#endif /* BVH_HAIR_MINIMUM_WIDTH */ { child_mask = NODE_INTERSECT(kg, tnear, @@ -364,32 +322,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); bool hit; if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { - hit = cardinal_curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); + hit = cardinal_curve_intersect( + kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type); } else { - hit = curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); + hit = curve_intersect( + kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type); } if (hit) { tfar = ssef(isect->t); @@ -480,4 +418,3 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, } #undef NODE_INTERSECT -#undef NODE_INTERSECT_ROBUST diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h index b3b1c37748d..6495ae743ab 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h @@ -85,15 +85,11 @@ ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderCl float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f); float HdotN = fmaxf(dot(H, N), 1e-6f); - float pump = - 1.0f / - fmaxf( - 1e-6f, - (HdotI * - fmaxf( - NdotO, - NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */ - /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */ + /* pump from original paper + * (first derivative disc., but cancels the HdotI in the pdf nicely) */ + float pump = 1.0f / fmaxf(1e-6f, (HdotI * fmaxf(NdotO, NdotI))); + /* pump from d-brdf paper */ + /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x); float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y); @@ -105,9 +101,8 @@ ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderCl float norm = (n_x + 1.0f) / (8.0f * M_PI_F); out = NdotO * norm * lobe * pump; - *pdf = - norm * lobe / - HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */ + /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper). */ + *pdf = norm * lobe / HdotI; } else { /* anisotropic */ diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index 6b2a9a97d30..4b6f5b3b439 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -224,7 +224,7 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, fast_sincosf(phi, &sinphi, &cosphi); *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg; - //differentials - TODO: find a better approximation for the reflective bounce + // differentials - TODO: find a better approximation for the reflective bounce #ifdef __RAY_DIFFERENTIALS__ *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; @@ -285,7 +285,7 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, fast_sincosf(phi, &sinphi, &cosphi); *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg; - //differentials - TODO: find a better approximation for the transmission bounce + // differentials - TODO: find a better approximation for the transmission bounce #ifdef __RAY_DIFFERENTIALS__ *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h index a4bba2fbf6c..4db5a6cc830 100644 --- a/intern/cycles/kernel/closure/bsdf_hair_principled.h +++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h @@ -60,7 +60,8 @@ ccl_device_inline float cos_from_sin(const float s) return safe_sqrtf(1.0f - s * s); } -/* Gives the change in direction in the normal plane for the given angles and p-th-order scattering. */ +/* Gives the change in direction in the normal plane for the given angles and p-th-order + * scattering. */ ccl_device_inline float delta_phi(int p, float gamma_o, float gamma_t) { return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F; diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h index 2cc1a9c5299..07be33ee6b5 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h @@ -16,7 +16,8 @@ CCL_NAMESPACE_BEGIN -/* Most of the code is based on the supplemental implementations from https://eheitzresearch.wordpress.com/240-2/. */ +/* Most of the code is based on the supplemental implementations from + * https://eheitzresearch.wordpress.com/240-2/. */ /* === GGX Microfacet distribution functions === */ @@ -80,7 +81,8 @@ ccl_device_forceinline float2 mf_sampleP22_11(const float cosI, return make_float2(slopeX, -slopeY); } -/* Visible normal sampling for the GGX distribution (based on page 7 of the supplemental implementation). */ +/* Visible normal sampling for the GGX distribution + * (based on page 7 of the supplemental implementation). */ ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, const float2 alpha, const float randx, @@ -134,7 +136,8 @@ ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, return make_float3(phase, phase, phase); } -/* Phase function for dielectric transmissive materials, including both reflection and refraction according to the dielectric fresnel term. */ +/* Phase function for dielectric transmissive materials, including both reflection and refraction + * according to the dielectric fresnel term. */ ccl_device_forceinline float3 mf_sample_phase_glass( const float3 wi, const float eta, const float3 wm, const float randV, bool *outside) { @@ -227,7 +230,8 @@ ccl_device_forceinline float mf_G1(const float3 w, const float C1, const float l return powf(C1, lambda); } -/* Sampling from the visible height distribution (based on page 17 of the supplemental implementation). */ +/* Sampling from the visible height distribution (based on page 17 of the supplemental + * implementation). */ ccl_device_forceinline bool mf_sample_height( const float3 w, float *h, float *C1, float *G1, float *lambda, const float U) { @@ -254,7 +258,8 @@ ccl_device_forceinline bool mf_sample_height( } /* === PDF approximations for the different phase functions. === - * As explained in bsdf_microfacet_multi_impl.h, using approximations with MIS still produces an unbiased result. */ + * As explained in bsdf_microfacet_multi_impl.h, using approximations with MIS still produces an + * unbiased result. */ /* Approximation for the albedo of the single-scattering GGX distribution, * the missing energy is then approximated as a diffuse reflection for the PDF. */ @@ -342,7 +347,8 @@ ccl_device_forceinline float mf_glass_pdf(const float3 wi, } } -/* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */ +/* === Actual random walk implementations === */ +/* One version of mf_eval and mf_sample per phase function. */ #define MF_NAME_JOIN(x, y) x##_##y #define MF_NAME_EVAL(x, y) MF_NAME_JOIN(x, y) diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h index 79247ee8057..04d9b22d7d2 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h @@ -16,14 +16,14 @@ /* Evaluate the BSDF from wi to wo. * Evaluation is split into the analytical single-scattering BSDF and the multi-scattering BSDF, - * which is evaluated stochastically through a random walk. At each bounce (except for the first one), - * the amount of reflection from here towards wo is evaluated before bouncing again. + * which is evaluated stochastically through a random walk. At each bounce (except for the first + * one), the amount of reflection from here towards wo is evaluated before bouncing again. * - * Because of the random walk, the evaluation is not deterministic, but its expected value is equal to - * the correct BSDF, which is enough for Monte-Carlo rendering. The PDF also can't be determined - * analytically, so the single-scattering PDF plus a diffuse term to account for the multi-scattered - * energy is used. In combination with MIS, that is enough to produce an unbiased result, although - * the balance heuristic isn't necessarily optimal anymore. + * Because of the random walk, the evaluation is not deterministic, but its expected value is equal + * to the correct BSDF, which is enough for Monte-Carlo rendering. The PDF also can't be determined + * analytically, so the single-scattering PDF plus a diffuse term to account for the + * multi-scattered energy is used. In combination with MIS, that is enough to produce an unbiased + * result, although the balance heuristic isn't necessarily optimal anymore. */ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, @@ -36,7 +36,8 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, bool use_fresnel, const float3 cspec0) { - /* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */ + /* Evaluating for a shallower incoming direction produces less noise, and the properties of the + * BSDF guarantee reciprocity. */ bool swapped = false; #ifdef MF_MULTI_GLASS if (wi.z * wo.z < 0.0f) { @@ -180,9 +181,9 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, return eval; } -/* Perform a random walk on the microsurface starting from wi, returning the direction in which the walk - * escaped the surface in wo. The function returns the throughput between wi and wo. - * Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal. +/* Perform a random walk on the microsurface starting from wi, returning the direction in which the + * walk escaped the surface in wo. The function returns the throughput between wi and wo. Without + * reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal. */ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h index a9a27edd7de..3bce47caedb 100644 --- a/intern/cycles/kernel/closure/bsdf_util.h +++ b/intern/cycles/kernel/closure/bsdf_util.h @@ -155,7 +155,7 @@ interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0 /* Calculate the fresnel interpolation factor * The value from fresnel_dielectric_cos(...) has to be normalized because * the cspec0 keeps the F0 color - */ + */ float F0_norm = 1.0f / (1.0f - F0); float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm; diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index 57804eca269..a7d9f90b443 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -450,7 +450,8 @@ ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) { bssrdf_gaussian_sample(radius, xi, r, h); } - else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ + else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || + * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) */ bssrdf_burley_sample(radius, xi, r, h); } } @@ -466,7 +467,8 @@ ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r) else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) { return bssrdf_gaussian_pdf(radius, r); } - else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ + else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || + * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ return bssrdf_burley_pdf(radius, r); } } diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h index 809ccfe8be6..8a2af957146 100644 --- a/intern/cycles/kernel/filter/filter_features.h +++ b/intern/cycles/kernel/filter/filter_features.h @@ -18,8 +18,9 @@ CCL_NAMESPACE_BEGIN #define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride] -/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always points to the current pixel in the first pass. - * Repeat the loop for every secondary frame if there are any. */ +/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always + * points to the current pixel in the first pass. Repeat the loop for every secondary frame if + * there are any. */ #define FOR_PIXEL_WINDOW \ for (int frame = 0; frame < tile_info->num_frames; frame++) { \ pixel.z = tile_info->frames[frame]; \ diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h index 1e0d6e93453..7bbd17066fd 100644 --- a/intern/cycles/kernel/filter/filter_features_sse.h +++ b/intern/cycles/kernel/filter/filter_features_sse.h @@ -20,8 +20,8 @@ CCL_NAMESPACE_BEGIN /* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y), 4 at a time. * pixel_buffer always points to the first of the 4 current pixel in the first pass. - * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set for all pixels within the window. - * Repeat the loop for every secondary frame if there are any. */ + * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set + * for all pixels within the window. Repeat the loop for every secondary frame if there are any. */ #define FOR_PIXEL_WINDOW_SSE \ for (int frame = 0; frame < tile_info->num_frames; frame++) { \ pixel.z = tile_info->frames[frame]; \ diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h index a94266a8786..24200c29203 100644 --- a/intern/cycles/kernel/filter/filter_nlm_cpu.h +++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h @@ -197,7 +197,8 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, bool use_time) { int4 clip_area = rect_clip(rect, filter_window); - /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */ + /* fy and fy are in filter-window-relative coordinates, + * while x and y are in feature-window-relative coordinates. */ for (int y = clip_area.y; y < clip_area.w; y++) { for (int x = clip_area.x; x < clip_area.z; x++) { const int low = max(rect.x, x - f); diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h index 8211311313d..b48a3f3f68b 100644 --- a/intern/cycles/kernel/filter/filter_prefilter.h +++ b/intern/cycles/kernel/filter/filter_prefilter.h @@ -16,14 +16,19 @@ CCL_NAMESPACE_BEGIN -/* First step of the shadow prefiltering, performs the shadow division and stores all data +/** + * First step of the shadow prefiltering, performs the shadow division and stores all data * in a nice and easy rectangular array that can be passed to the NLM filter. * * Calculates: - * unfiltered: Contains the two half images of the shadow feature pass - * sampleVariance: The sample-based variance calculated in the kernel. Note: This calculation is biased in general, and especially here since the variance of the ratio can only be approximated. - * sampleVarianceV: Variance of the sample variance estimation, quite noisy (since it's essentially the buffer variance of the two variance halves) - * bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy. + * \param unfiltered: Contains the two half images of the shadow feature pass + * \param sampleVariance: The sample-based variance calculated in the kernel. + * Note: This calculation is biased in general, + * and especially here since the variance of the ratio can only be approximated. + * \param sampleVarianceV: Variance of the sample variance estimation, quite noisy + * (since it's essentially the buffer variance of the two variance halves) + * \param bufferVariance: The buffer-based variance of the shadow feature. + * Unbiased, but quite noisy. */ ccl_device void kernel_filter_divide_shadow(int sample, CCL_FILTER_TILE_INFO, @@ -204,10 +209,10 @@ ccl_device void kernel_filter_detect_outliers(int x, if (L > ref) { /* The pixel appears to be an outlier. - * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel - * should actually be at the reference value: - * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier. - * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight. + * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is + * that the pixel should actually be at the reference value: If the reference is within the + * 3-sigma interval, the pixel is assumed to be a statistical outlier. Otherwise, it is very + * unlikely that the pixel should be darker, which indicates a legitimate highlight. */ if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) { @@ -219,7 +224,8 @@ ccl_device void kernel_filter_detect_outliers(int x, float stddev = sqrtf(pixel_variance); if (L - 3 * stddev < ref) { /* The pixel is an outlier, so negate the depth value to mark it as one. - * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ + * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM + * weights. */ depth[idx] = -depth[idx]; float fac = ref / L; color *= fac; diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h index 69e3c7c458d..585c4b33787 100644 --- a/intern/cycles/kernel/filter/filter_transform.h +++ b/intern/cycles/kernel/filter/filter_transform.h @@ -55,7 +55,8 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff math_vector_scale(feature_means, 1.0f / num_pixels, num_features); - /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ + /* === Scale the shifted feature passes to a range of [-1; 1] === + * Will be baked into the transform later. */ float feature_scale[DENOISE_FEATURES]; math_vector_zero(feature_scale, num_features); @@ -69,8 +70,9 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff filter_calculate_scale(feature_scale, use_time); /* === Generate the feature transformation. === - * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space - * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ + * This transformation maps the num_features-dimentional feature space to a reduced feature + * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent + * overfitting. */ float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES]; math_matrix_zero(feature_matrix, num_features); FOR_PIXEL_WINDOW diff --git a/intern/cycles/kernel/filter/filter_transform_gpu.h b/intern/cycles/kernel/filter/filter_transform_gpu.h index 89cddfd927f..41bbadb621d 100644 --- a/intern/cycles/kernel/filter/filter_transform_gpu.h +++ b/intern/cycles/kernel/filter/filter_transform_gpu.h @@ -61,7 +61,8 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re math_vector_scale(feature_means, 1.0f / num_pixels, num_features); - /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ + /* === Scale the shifted feature passes to a range of [-1; 1] === + * Will be baked into the transform later. */ float feature_scale[DENOISE_FEATURES]; math_vector_zero(feature_scale, num_features); @@ -75,8 +76,9 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re filter_calculate_scale(feature_scale, use_time); /* === Generate the feature transformation. === - * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space - * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ + * This transformation maps the num_features-dimentional feature space to a reduced feature + * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent + * overfitting. */ float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES]; math_matrix_zero(feature_matrix, num_features); FOR_PIXEL_WINDOW diff --git a/intern/cycles/kernel/filter/filter_transform_sse.h b/intern/cycles/kernel/filter/filter_transform_sse.h index 22397b292db..830444645d7 100644 --- a/intern/cycles/kernel/filter/filter_transform_sse.h +++ b/intern/cycles/kernel/filter/filter_transform_sse.h @@ -58,7 +58,8 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff feature_means[i] = reduce_add(feature_means[i]) * pixel_scale; } - /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ + /* === Scale the shifted feature passes to a range of [-1; 1] === + * Will be baked into the transform later. */ float4 feature_scale[DENOISE_FEATURES]; math_vector_zero_sse(feature_scale, num_features); FOR_PIXEL_WINDOW_SSE @@ -72,8 +73,9 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff filter_calculate_scale_sse(feature_scale, use_time); /* === Generate the feature transformation. === - * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space - * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ + * This transformation maps the num_features-dimentional feature space to a reduced feature + * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent + * overfitting. */ float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES]; math_matrix_zero_sse(feature_matrix_sse, num_features); FOR_PIXEL_WINDOW_SSE diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index 5fd277c2f99..0327ebf8890 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -34,10 +34,7 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, int object, int curveAddr, float time, - int type, - uint *lcg_state, - float difl, - float extmax) + int type) { const bool is_curve_primitive = (type & PRIMITIVE_CURVE); @@ -239,9 +236,6 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, return false; /* minimum width extension */ - float mw_extension = min(difl * fabsf(upper), extmax); - float r_ext = mw_extension + r_curr; - float xextrem[4]; curvebounds(&lower, &upper, @@ -253,7 +247,7 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x); - if (lower > r_ext || upper < -r_ext) + if (lower > r_curr || upper < -r_curr) return false; float yextrem[4]; @@ -267,7 +261,7 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y); - if (lower > r_ext || upper < -r_ext) + if (lower > r_curr || upper < -r_curr) return false; /* setup recurrent loop */ @@ -340,12 +334,8 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, float r2 = r_st + (r_en - r_st) * i_en; r_curr = max(r1, r2); - mw_extension = min(difl * fabsf(bmaxz), extmax); - float r_ext = mw_extension + r_curr; - float coverage = 1.0f; - - if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext || bmaxx < -r_ext || - bminy > r_ext || bmaxy < -r_ext) { + if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_curr || + bmaxx < -r_curr || bminy > r_curr || bmaxy < -r_curr) { /* the bounding box does not overlap the square centered at O */ tree += level; level = tree & -tree; @@ -404,31 +394,7 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, continue; } - /* compute coverage */ - float r_ext = r_curr; - coverage = 1.0f; - if (difl != 0.0f) { - mw_extension = min(difl * fabsf(bmaxz), extmax); - r_ext = mw_extension + r_curr; -# ifdef __KERNEL_SSE__ - const float3 p_curr_sq = p_curr * p_curr; - const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128))); - float d = dxxx.x; -# else - float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); -# endif - float d0 = d - r_curr; - float d1 = d + r_curr; - float inv_mw_extension = 1.0f / mw_extension; - if (d0 >= 0) - coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * - 0.5f; - else // inside - coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * - 0.5f; - } - - if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || + if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_curr * r_curr || p_curr.z <= epsilon || isect->t < p_curr.z) { tree++; level = tree & -tree; @@ -436,41 +402,23 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, } t = p_curr.z; - - /* stochastic fade from minimum width */ - if (difl != 0.0f && lcg_state) { - if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) - return hit; - } } else { float l = len(p_en - p_st); - /* minimum width extension */ - float or1 = r1; - float or2 = r2; - - if (difl != 0.0f) { - mw_extension = min(len(p_st - P) * difl, extmax); - or1 = r1 < mw_extension ? mw_extension : r1; - mw_extension = min(len(p_en - P) * difl, extmax); - or2 = r2 < mw_extension ? mw_extension : r2; - } - /* --- */ float invl = 1.0f / l; float3 tg = (p_en - p_st) * invl; - gd = (or2 - or1) * invl; + gd = (r2 - r1) * invl; float difz = -dot(p_st, tg); float cyla = 1.0f - (tg.z * tg.z * (1 + gd * gd)); float invcyla = 1.0f / cyla; - float halfb = (-p_st.z - tg.z * (difz + gd * (difz * gd + or1))); + float halfb = (-p_st.z - tg.z * (difz + gd * (difz * gd + r1))); float tcentre = -halfb * invcyla; float zcentre = difz + (tg.z * tcentre); float3 tdif = -p_st; tdif.z += tcentre; float tdifz = dot(tdif, tg); - float tb = 2 * (tdif.z - tg.z * (tdifz + gd * (tdifz * gd + or1))); - float tc = dot(tdif, tdif) - tdifz * tdifz * (1 + gd * gd) - or1 * or1 - - 2 * or1 * tdifz * gd; + float tb = 2 * (tdif.z - tg.z * (tdifz + gd * (tdifz * gd + r1))); + float tc = dot(tdif, tdif) - tdifz * tdifz * (1 + gd * gd) - r1 * r1 - 2 * r1 * tdifz * gd; float td = tb * tb - 4 * cyla * tc; if (td < 0.0f) { tree++; @@ -507,16 +455,6 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, w = saturate(w); /* compute u on the curve segment */ u = i_st * (1 - w) + i_en * w; - - /* stochastic fade from minimum width */ - if (difl != 0.0f && lcg_state) { - r_curr = r1 + (r2 - r1) * w; - r_ext = or1 + (or2 - or1) * w; - coverage = r_curr / r_ext; - - if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) - return hit; - } } /* we found a new intersection */ @@ -556,10 +494,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, int object, int curveAddr, float time, - int type, - uint *lcg_state, - float difl, - float extmax) + int type) { /* define few macros to minimize code duplication for SSE */ # ifndef __KERNEL_SSE2__ @@ -600,23 +535,14 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve); } - float or1 = P_curve[0].w; - float or2 = P_curve[1].w; + float r1 = P_curve[0].w; + float r2 = P_curve[1].w; float3 p1 = float4_to_float3(P_curve[0]); float3 p2 = float4_to_float3(P_curve[1]); /* minimum width extension */ - float r1 = or1; - float r2 = or2; float3 dif = P - p1; float3 dif_second = P - p2; - if (difl != 0.0f) { - float pixelsize = min(len3(dif) * difl, extmax); - r1 = or1 < pixelsize ? pixelsize : or1; - pixelsize = min(len3(dif_second) * difl, extmax); - r2 = or2 < pixelsize ? pixelsize : or2; - } - /* --- */ float3 p21_diff = p2 - p1; float3 sphere_dif1 = (dif + dif_second) * 0.5f; @@ -635,20 +561,10 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4 *)&P_curve); } - const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]); - - ssef r12 = or12; + ssef r12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]); const ssef vP = load4f(P); const ssef dif = vP - P_curve[0]; const ssef dif_second = vP - P_curve[1]; - if (difl != 0.0f) { - const ssef len1_sq = len3_squared_splat(dif); - const ssef len2_sq = len3_squared_splat(dif_second); - const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq)); - const ssef pixelsize12 = min(len12 * difl, ssef(extmax)); - r12 = max(or12, pixelsize12); - } - float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12)); float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12)); const ssef p21_diff = P_curve[1] - P_curve[0]; @@ -754,15 +670,6 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, z = zcentre + (dirz * correction); } - /* stochastic fade from minimum width */ - float adjradius = or1 + z * (or2 - or1) * invl; - adjradius = adjradius / (r1 + z * gd); - if (lcg_state && adjradius != 1.0f) { - if (lcg_step_float(lcg_state) > adjradius) - return false; - } - /* --- */ - if (t > 0.0f && t < isect->t && z >= 0 && z <= l) { if (flags & CURVE_KN_ENCLOSEFILTER) { diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 2792fd64c61..f410e6e27e2 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -386,7 +386,8 @@ ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle) ccl_device_inline float3 bvh_clamp_direction(float3 dir) { - /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */ + /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse + * direction */ #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f); const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes); diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index bcad03102d2..9c6fd498a80 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -178,7 +178,7 @@ ccl_device_inline int ray_triangle_intersect8(KernelGlobals *kg, _mm256_cmpeq_epi32(two256, UVW_256_1)); unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256)); - if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set + if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { // all bits set return false; } @@ -375,7 +375,7 @@ ccl_device_inline int triangle_intersect8(KernelGlobals *kg, tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; } - //create 9 or 12 placeholders + // create 9 or 12 placeholders tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256 tri[1] = _mm256_castps128_ps256(tri_b[0]); //_mm256_zextps128_ps256 tri[2] = _mm256_castps128_ps256(tri_c[0]); //_mm256_zextps128_ps256 @@ -401,40 +401,40 @@ ccl_device_inline int triangle_intersect8(KernelGlobals *kg, } //------------------------------------------------ - //0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1 - //1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1 - //2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1 + // 0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1 + // 1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1 + // 2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1 - //3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1 - //4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1 - //5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1 + // 3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1 + // 4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1 + // 5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1 - //6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1 - //7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1 - //8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1 + // 6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1 + // 7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1 + // 8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1 - //9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1 - //10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1 - //11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1 + // 9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1 + // 10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1 + // 11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1 //"transpose" - tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); //0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5 - tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); //1! Za0 Za1 1 1 Za4 Za5 1 1 + tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); // 0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5 + tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); // 1! Za0 Za1 1 1 Za4 Za5 1 1 - tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); //2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7 - tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); //3! Za2 Za3 1 1 Za6 Za7 1 1 + tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); // 2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7 + tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); // 3! Za2 Za3 1 1 Za6 Za7 1 1 - tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); //4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5 - tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); //5! Zb0 Zb1 1 1 Zb4 Zb5 1 1 + tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); // 4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5 + tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); // 5! Zb0 Zb1 1 1 Zb4 Zb5 1 1 - tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); //6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7 - tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); //7! Zb2 Zb3 1 1 Zb6 Zb7 1 1 + tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); // 6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7 + tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); // 7! Zb2 Zb3 1 1 Zb6 Zb7 1 1 - tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); //8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5 - tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); //9! Zc0 Zc1 1 1 Zc4 Zc5 1 1 + tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); // 8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5 + tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); // 9! Zc0 Zc1 1 1 Zc4 Zc5 1 1 - tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7 - tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3 1 1 Zc6 Zc7 1 1 + tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); // 10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7 + tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); // 11! Zc2 Zc3 1 1 Zc6 Zc7 1 1 /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ triA[0] = _mm256_castpd_ps( @@ -459,13 +459,13 @@ ccl_device_inline int triangle_intersect8(KernelGlobals *kg, triC[0] = _mm256_castpd_ps( _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]), - _mm256_castps_pd(tritmp[10]))); //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7 + _mm256_castps_pd(tritmp[10]))); // Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7 triC[1] = _mm256_castpd_ps( _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]), - _mm256_castps_pd(tritmp[10]))); //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7 + _mm256_castps_pd(tritmp[10]))); // Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7 triC[2] = _mm256_castpd_ps( _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]), - _mm256_castps_pd(tritmp[11]))); //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7 + _mm256_castps_pd(tritmp[11]))); // Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7 /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 10b71bc6bdf..cd1ca5ea7ec 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -72,7 +72,8 @@ ccl_device_inline void compute_light_pass( # ifdef __SUBSURFACE__ /* sample subsurface scattering */ if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { - /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ + /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting + * if scattering was successful. */ SubsurfaceIndirectRays ss_indirect; kernel_path_subsurface_init_indirect(&ss_indirect); if (kernel_path_subsurface_scatter( @@ -123,7 +124,8 @@ ccl_device_inline void compute_light_pass( # ifdef __SUBSURFACE__ /* sample subsurface scattering */ if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { - /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ + /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting + * if scattering was successful. */ kernel_branched_path_subsurface_scatter( kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput); } diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h index c1f4e39e5e7..1ca42e933d1 100644 --- a/intern/cycles/kernel/kernel_id_passes.h +++ b/intern/cycles/kernel/kernel_id_passes.h @@ -1,18 +1,18 @@ /* -* Copyright 2018 Blender Foundation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ CCL_NAMESPACE_BEGIN @@ -32,7 +32,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ if (id_buffer[slot].x == ID_NONE) { /* Use an atomic to claim this slot. - * If a different thread got here first, try again from this slot on. */ + * If a different thread got here first, try again from this slot on. */ float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id); if (old_id != ID_NONE && old_id != id) { continue; @@ -54,7 +54,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, break; } /* If there already is a slot for that ID, add the weight. - * If no slot was found, add it to the last. */ + * If no slot was found, add it to the last. */ else if (id_buffer[slot].x == id || slot == num_slots - 1) { id_buffer[slot].y += weight; break; diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 5e24f8dedaf..9128bfa9d95 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -524,7 +524,8 @@ ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direct portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf; if (!is_possible) { /* Portal sampling is not possible here because all portals point to the wrong side. - * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */ + * If map sampling is possible, it would be used instead, + * otherwise fallback sampling is used. */ if (portal_sampling_pdf == 1.0f) { return kernel_data.integrator.pdf_lights / M_4PI_F; } diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h index a933be970c2..acd5086be3a 100644 --- a/intern/cycles/kernel/kernel_montecarlo.h +++ b/intern/cycles/kernel/kernel_montecarlo.h @@ -199,21 +199,27 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) float NdotNg = dot(N, Ng); float3 X = normalize(N - NdotNg * Ng); + /* Keep math expressions. */ + /* clang-format off */ /* Calculate N.z and N.x in the local coordinate system. * * The goal of this computation is to find a N' that is rotated towards Ng just enough * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t. * - * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t. + * According to the standard reflection equation, + * this means that we want dot(2*dot(N', I)*N' - I, Ng) = t. * - * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t. + * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get + * 2*dot(N', I)*N'.z - I.z = t. * - * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that - * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z . + * The rotation is simple to express in the coordinate system we formed - + * since N lies in the X-Z-plane, we know that N' will also lie in the X-Z-plane, + * so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z . * * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2). * - * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t. + * With these simplifications, + * we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t. * * The only unknown here is N'.z, so we can solve for that. * @@ -227,8 +233,11 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) * c = I.z*t + a * N'.z = +-sqrt(0.5*(+-b + c)/a) * - * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere. + * Two solutions can immediately be discarded because they're negative so N' would lie in the + * lower hemisphere. */ + /* clang-format on */ + float Ix = dot(I, X), Iz = dot(I, Ng); float Ix2 = sqr(Ix), Iz2 = sqr(Iz); float a = Ix2 + Iz2; @@ -237,8 +246,9 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) float c = Iz * threshold + a; /* Evaluate both solutions. - * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first. - * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */ + * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than + * one), so check for that first. If no option is viable (might happen in extreme cases like N + * being in the wrong hemisphere), give up and return Ng. */ float fac = 0.5f / a; float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c); bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f)); @@ -256,8 +266,9 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) valid1 = (R1 >= 1e-5f); valid2 = (R2 >= 1e-5f); if (valid1 && valid2) { - /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input - * (if the original reflection wasn't shallow, we would not be in this part of the function). */ + /* If both solutions are valid, return the one with the shallower reflection since it will be + * closer to the input (if the original reflection wasn't shallow, we would not be in this + * part of the function). */ N_new = (R1 < R2) ? N1 : N2; } else { diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 2be1b745632..f3e2a8a234a 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -65,25 +65,7 @@ ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg, ray->t = kernel_data.background.ao_distance; } -#ifdef __HAIR__ - float difl = 0.0f, extmax = 0.0f; - uint lcg_state = 0; - - if (kernel_data.bvh.have_curves) { - if ((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) { - float3 pixdiff = ray->dD.dx + ray->dD.dy; - /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/ - difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f; - } - - extmax = kernel_data.curve.maximum_width; - lcg_state = lcg_state_init_addrspace(state, 0x51633e2d); - } - - bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax); -#else - bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f); -#endif /* __HAIR__ */ + bool hit = scene_intersect(kg, *ray, visibility, isect); #ifdef __KERNEL_DEBUG__ if (state->flag & PATH_RAY_CAMERA) { @@ -455,8 +437,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, } /* path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate */ + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate */ float probability = path_state_continuation_probability(kg, state, throughput); if (probability == 0.0f) { @@ -482,7 +464,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, # ifdef __SUBSURFACE__ /* bssrdf scatter to a different location on the same object, replacing - * the closures with a diffuse BSDF */ + * the closures with a diffuse BSDF */ if (sd->flag & SD_BSSRDF) { if (kernel_path_subsurface_scatter( kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) { @@ -593,8 +575,8 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg, } /* path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate */ + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate */ float probability = path_state_continuation_probability(kg, state, throughput); if (probability == 0.0f) { @@ -619,7 +601,7 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg, # ifdef __SUBSURFACE__ /* bssrdf scatter to a different location on the same object, replacing - * the closures with a diffuse BSDF */ + * the closures with a diffuse BSDF */ if (sd.flag & SD_BSSRDF) { if (kernel_path_subsurface_scatter( kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) { diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index e8ce61024b3..f3a1ea3f4fd 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -428,8 +428,8 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, /* transparency termination */ if (state.flag & PATH_RAY_TRANSPARENT) { /* path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate */ + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate */ float probability = path_state_continuation_probability(kg, &state, throughput); if (probability == 0.0f) { diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 6251313c5f8..a1ab4951565 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -18,7 +18,8 @@ CCL_NAMESPACE_BEGIN #if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || \ defined(__BAKING__) -/* branched path tracing: connect path directly to position on one or more lights and add it to L */ +/* branched path tracing: connect path directly to position on one or more lights and add it to L + */ ccl_device_noinline void kernel_branched_path_surface_connect_light( KernelGlobals *kg, ShaderData *sd, @@ -62,8 +63,10 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light( LightSample ls; if (lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) { - /* The sampling probability returned by lamp_light_sample assumes that all lights were sampled. - * However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */ + /* The sampling probability returned by lamp_light_sample assumes that all lights were + * sampled. + * However, this code only samples lamps, so if the scene also had mesh lights, the real + * probability is twice as high. */ if (kernel_data.integrator.pdf_triangles != 0.0f) ls.pdf *= 2.0f; @@ -109,7 +112,8 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light( LightSample ls; if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - /* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */ + /* Same as above, probability needs to be corrected since the sampling was forced to + * select a mesh light. */ if (kernel_data.integrator.num_all_lights) ls.pdf *= 2.0f; diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 6af1369feab..07201819030 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -103,8 +103,7 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, Intersection *isect, float3 *shadow) { - const bool blocked = scene_intersect( - kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); + const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); #ifdef __VOLUME__ if (!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader. */ @@ -319,8 +318,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg, if (bounce >= kernel_data.integrator.transparent_max_bounce) { return true; } - if (!scene_intersect( - kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) { + if (!scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) { break; } if (!shader_transparent_shadow(kg, isect)) { @@ -376,8 +374,7 @@ ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg, Intersection *isect, float3 *shadow) { - bool blocked = scene_intersect( - kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); + bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false; return shadow_blocked_transparent_stepped_loop( kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow); @@ -436,8 +433,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, * TODO(sergey): Check why using record-all behavior causes slowdown in such * cases. Could that be caused by a higher spill pressure? */ - const bool blocked = scene_intersect( - kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); + const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect); const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false; if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) { return shadow_blocked_transparent_stepped_loop( diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 3f62b726b6a..0c6b4b401f0 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -1372,8 +1372,7 @@ typedef struct KernelCurves { int curveflags; int subdivisions; - float minimum_width; - float maximum_width; + int pad1, pad2; } KernelCurves; static_assert_align(KernelCurves, 16); diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index e024003252f..1705f58b87d 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -559,7 +559,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg, float dt = new_t - t; /* use random position inside this segment to sample shader, - * for last shorter step we remap it to fit within the segment. */ + * for last shorter step we remap it to fit within the segment. */ if (new_t == ray->t) { step_offset *= (new_t - t) / step_size; } @@ -794,7 +794,7 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, float dt = new_t - t; /* use random position inside this segment to sample shader, - * for last shorter step we remap it to fit within the segment. */ + * for last shorter step we remap it to fit within the segment. */ if (new_t == ray->t) { step_offset *= (new_t - t) / step_size; } diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h index d9f349837a8..3ec00762e72 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_config.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h @@ -61,7 +61,8 @@ /* tunable parameters */ # define CUDA_THREADS_BLOCK_WIDTH 16 -/* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of registers */ +/* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of + * registers */ # if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600 # define CUDA_KERNEL_MAX_REGISTERS 64 # else diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index aa7e2727577..27205df3732 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -497,8 +497,8 @@ class MicrofacetFresnelClosure : public CBSDFClosure { MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) { /* Technically, the MultiGGX Glass closure may also transmit. However, - * since this is set statically and only used for caustic flags, this - * is probably as good as it gets. */ + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { return NULL; } @@ -715,8 +715,8 @@ class MicrofacetMultiFresnelClosure : public CBSDFClosure { MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) { /* Technically, the MultiGGX closure may also transmit. However, - * since this is set statically and only used for caustic flags, this - * is probably as good as it gets. */ + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { return NULL; } diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index eb9f672fd8a..6404690224a 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -733,7 +733,7 @@ bool OSLRenderServices::get_object_standard_attribute( return set_attribute_float3(f, type, derivatives, val); } #if 0 /* unsupported */ - else if(name == u_particle_rotation) { + else if (name == u_particle_rotation) { int particle_id = object_particle_id(kg, sd->object); float4 f = particle_rotation(kg, particle_id); return set_attribute_float4(f, type, derivatives, val); @@ -1017,7 +1017,7 @@ bool OSLRenderServices::texture(ustring filename, PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error)); if (!r) { - //std::cerr << error.c_str() << std::endl; + // std::cerr << error.c_str() << std::endl; return false; } @@ -1373,7 +1373,7 @@ bool OSLRenderServices::trace(TraceOpt &options, /* Raytrace, leaving out shadow opaque to avoid early exit. */ uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE; - return scene_intersect(sd->osl_globals, ray, visibility, &tracedata->isect, NULL, 0.0f, 0.0f); + return scene_intersect(sd->osl_globals, ray, visibility, &tracedata->isect); } bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index 9b9720ffff9..6515d914909 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -235,15 +235,42 @@ int clamp(int x, int minval, int maxval) return max(min(x, maxval), minval); } #if 0 -normal mix (normal x, normal y, normal a) { return x*(1-a) + y*a; } -normal mix (normal x, normal y, float a) { return x*(1-a) + y*a; } -vector mix (vector x, vector y, vector a) { return x*(1-a) + y*a; } -vector mix (vector x, vector y, float a) { return x*(1-a) + y*a; } -point mix (point x, point y, point a) { return x*(1-a) + y*a; } -point mix (point x, point y, float a) { return x*(1-a) + y*a; } -color mix (color x, color y, color a) { return x*(1-a) + y*a; } -color mix (color x, color y, float a) { return x*(1-a) + y*a; } -float mix (float x, float y, float a) { return x*(1-a) + y*a; } +normal mix(normal x, normal y, normal a) +{ + return x * (1 - a) + y * a; +} +normal mix(normal x, normal y, float a) +{ + return x * (1 - a) + y * a; +} +vector mix(vector x, vector y, vector a) +{ + return x * (1 - a) + y * a; +} +vector mix(vector x, vector y, float a) +{ + return x * (1 - a) + y * a; +} +point mix(point x, point y, point a) +{ + return x * (1 - a) + y * a; +} +point mix(point x, point y, float a) +{ + return x * (1 - a) + y * a; +} +color mix(color x, color y, color a) +{ + return x * (1 - a) + y * a; +} +color mix(color x, color y, float a) +{ + return x * (1 - a) + y * a; +} +float mix(float x, float y, float a) +{ + return x * (1 - a) + y * a; +} #else normal mix(normal x, normal y, normal a) BUILTIN; normal mix(normal x, normal y, float a) BUILTIN; @@ -360,16 +387,16 @@ point rotate(point p, float angle, point a, point b) vector axis = normalize(b - a); float cosang, sinang; /* Older OSX has major issues with sincos() function, - * it's likely a big in OSL or LLVM. For until we've - * updated to new versions of this libraries we'll - * use a workaround to prevent possible crashes on all - * the platforms. - * - * Shouldn't be that bad because it's mainly used for - * anisotropic shader where angle is usually constant. - */ + * it's likely a big in OSL or LLVM. For until we've + * updated to new versions of this libraries we'll + * use a workaround to prevent possible crashes on all + * the platforms. + * + * Shouldn't be that bad because it's mainly used for + * anisotropic shader where angle is usually constant. + */ #if 0 - sincos (angle, sinang, cosang); + sincos(angle, sinang, cosang); #else sinang = sin(angle); cosang = cos(angle); @@ -398,7 +425,7 @@ point rotate(point p, float angle, point a, point b) normal ensure_valid_reflection(normal Ng, vector I, normal N) { /* The implementation here mirrors the one in kernel_montecarlo.h, - * check there for an explanation of the algorithm. */ + * check there for an explanation of the algorithm. */ float sqr(float x) { diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index e77743350dc..e37be5b405e 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -132,8 +132,8 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, if (ray->t != 0.0f) { /* Initialize throughput, path radiance, Ray, PathState; - * These rays proceed with path-iteration. - */ + * These rays proceed with path-iteration. + */ *throughput = make_float3(1.0f, 1.0f, 1.0f); path_radiance_init(L, kernel_data.film.use_light_pass); path_state_init(kg, diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 52930843f56..2f83a10316d 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -46,10 +46,10 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( int sh, int offset, int stride, - ccl_global int *Queue_index, /* Tracks the number of elements in queues */ - int queuesize, /* size (capacity) of the queue */ - ccl_global char * - use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* size (capacity) of the queue */ + ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues + to fetch ray index */ ccl_global unsigned int *work_pools, /* Work pool for each work group */ unsigned int num_samples, ccl_global float *buffer) diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index 63bc5a8e0ce..5cd4131e2ae 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -114,9 +114,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { /* Path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate. - */ + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate. + */ float probability = path_state_continuation_probability(kg, state, throughput); if (probability == 0.0f) { diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index 781ce869374..3c2f6038035 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -109,9 +109,9 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { /* If we are here, then it means that scene-intersect kernel - * has already been executed atleast once. From the next time, - * scene-intersect kernel may operate on queues to fetch ray index - */ + * has already been executed atleast once. From the next time, + * scene-intersect kernel may operate on queues to fetch ray index + */ *kernel_split_params.use_queues_flag = 1; /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 6ff3f5bdb55..ac4a450ca2b 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -19,7 +19,8 @@ CCL_NAMESPACE_BEGIN -/* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */ +/* parameters used by the split kernels, we use a single struct to avoid passing these to each + * kernel */ typedef struct SplitParams { WorkTile tile; @@ -112,7 +113,8 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_BRANCHED_ENTRIES \ SPLIT_DATA_ENTRY(ShaderData, _sd, 0) -/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */ +/* Entries to be copied to inactive rays when sharing branched samples + * (TODO: which are actually needed?) */ #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ @@ -134,8 +136,9 @@ typedef struct SplitData { SPLIT_DATA_ENTRIES #undef SPLIT_DATA_ENTRY - /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from - * the host easily) but is still used the same as the other data so we have it here in this struct as well + /* this is actually in a separate buffer from the rest of the split state data (so it can be read + * back from the host easily) but is still used the same as the other data so we have it here in + * this struct as well */ ccl_global char *ray_state; } SplitData; diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h index 06076175c40..62413979201 100644 --- a/intern/cycles/kernel/svm/svm_ao.h +++ b/intern/cycles/kernel/svm/svm_ao.h @@ -1,18 +1,18 @@ /* -* Copyright 2011-2018 Blender Foundation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ CCL_NAMESPACE_BEGIN @@ -70,7 +70,7 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg, } else { Intersection isect; - if (!scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f)) { + if (!scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect)) { unoccluded++; } } diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h index 9434c0c5505..f13527c03db 100644 --- a/intern/cycles/kernel/svm/svm_ies.h +++ b/intern/cycles/kernel/svm/svm_ies.h @@ -21,12 +21,12 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float interpolate_ies_vertical( KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h) { - /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end of v - * (corresponding to the north pole) would result in artifacts. - * The proper way of dealing with this would be to lookup the corresponding value on the other side of the pole, - * but since the horizontal coordinates might be nonuniform, this would require yet another interpolation. - * Therefore, the assumtion is made that the light is going to be symmetrical, which means that we can just take - * the corresponding value at the current horizontal coordinate. */ + /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end + * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing + * with this would be to lookup the corresponding value on the other side of the pole, but since + * the horizontal coordinates might be nonuniform, this would require yet another interpolation. + * Therefore, the assumtion is made that the light is going to be symmetrical, which means that + * we can just take the corresponding value at the current horizontal coordinate. */ #define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v)) /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */ @@ -66,7 +66,8 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg, /* Lookup the angles to find the table position. */ int h_i, v_i; - /* TODO(lukas): Consider using bisection. Probably not worth it for the vast majority of IES files. */ + /* TODO(lukas): Consider using bisection. + * Probably not worth it for the vast majority of IES files. */ for (h_i = 0; IES_LOOKUP_ANGLE_H(h_i + 1) < h_angle; h_i++) ; for (v_i = 0; IES_LOOKUP_ANGLE_V(v_i + 1) < v_angle; v_i++) @@ -83,7 +84,8 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg, /* Perform cubic interpolation along the horizontal coordinate to get the intensity value. * If h_i is zero, just wrap around since the horizontal angles always go over the full circle. - * However, the last entry (360°) equals the first one, so we need to wrap around to the one before that. */ + * However, the last entry (360°) equals the first one, so we need to wrap around to the one + * before that. */ float a = interpolate_ies_vertical( kg, ofs, v_i, v_num, v_frac, (h_i == 0) ? h_num - 2 : h_i - 1); float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i); diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index c311aefaf38..3e28a316169 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -70,7 +70,8 @@ ccl_device void voronoi_neighbors( } } - /* To keep the shortest four distances and associated points we have to keep them in sorted order. */ + /* To keep the shortest four distances and associated points we have to keep them in sorted + * order. */ if (d < da[0]) { da[3] = da[2]; da[2] = da[1]; |