Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/bvh')
-rw-r--r--intern/cycles/kernel/bvh/bvh.h17
-rw-r--r--intern/cycles/kernel/bvh/bvh_nodes.h30
-rw-r--r--intern/cycles/kernel/bvh/bvh_traversal.h21
-rw-r--r--intern/cycles/kernel/bvh/bvh_types.h2
4 files changed, 33 insertions, 37 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 7cecee793c1..36798982653 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -157,8 +157,9 @@ CCL_NAMESPACE_BEGIN
#undef BVH_NAME_EVAL
#undef BVH_FUNCTION_FULL_NAME
+/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
- const Ray *ray,
+ const Ray ray,
const uint visibility,
Intersection *isect,
uint *lcg_state,
@@ -169,32 +170,32 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
if(kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
- return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax);
+ return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax);
# endif /* __HAIR__ */
- return bvh_intersect_motion(kg, ray, isect, visibility);
+ return bvh_intersect_motion(kg, &ray, isect, visibility);
}
#endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
if(kernel_data.bvh.have_curves)
- return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax);
+ return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax);
#endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
- return bvh_intersect_instancing(kg, ray, isect, visibility);
+ return bvh_intersect_instancing(kg, &ray, isect, visibility);
# endif /* __INSTANCING__ */
- return bvh_intersect(kg, ray, isect, visibility);
+ return bvh_intersect(kg, &ray, isect, visibility);
#else /* __KERNEL_CPU__ */
# ifdef __INSTANCING__
- return bvh_intersect_instancing(kg, ray, isect, visibility);
+ return bvh_intersect_instancing(kg, &ray, isect, visibility);
# else
- return bvh_intersect(kg, ray, isect, visibility);
+ return bvh_intersect(kg, &ray, isect, visibility);
# endif /* __INSTANCING__ */
#endif /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index db2275b0ff8..726bef1794c 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -16,7 +16,7 @@
// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
// 3-vector which might be faster.
-ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
+ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
int node_addr,
int child)
{
@@ -30,7 +30,7 @@ ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
}
#if !defined(__KERNEL_SSE2__)
-ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
+ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 idir,
const float t,
@@ -77,7 +77,7 @@ ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
#endif
}
-ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
+ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
const float3 P,
const float3 idir,
const float t,
@@ -139,7 +139,7 @@ ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
#endif
}
-ccl_device_inline bool bvh_unaligned_node_intersect_child(
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
KernelGlobals *kg,
const float3 P,
const float3 dir,
@@ -166,7 +166,7 @@ ccl_device_inline bool bvh_unaligned_node_intersect_child(
return tnear <= tfar;
}
-ccl_device_inline bool bvh_unaligned_node_intersect_child_robust(
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
KernelGlobals *kg,
const float3 P,
const float3 dir,
@@ -202,7 +202,7 @@ ccl_device_inline bool bvh_unaligned_node_intersect_child_robust(
}
}
-ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
+ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 dir,
const float3 idir,
@@ -232,7 +232,7 @@ ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
return mask;
}
-ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
+ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
const float3 P,
const float3 dir,
const float3 idir,
@@ -264,7 +264,7 @@ ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
return mask;
}
-ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
+ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 dir,
const float3 idir,
@@ -295,7 +295,7 @@ ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
}
}
-ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
+ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
const float3 P,
const float3 dir,
const float3 idir,
@@ -333,7 +333,7 @@ ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
}
#else /* !defined(__KERNEL_SSE2__) */
-int ccl_device_inline bvh_aligned_node_intersect(
+int ccl_device_forceinline bvh_aligned_node_intersect(
KernelGlobals *kg,
const float3& P,
const float3& dir,
@@ -377,7 +377,7 @@ int ccl_device_inline bvh_aligned_node_intersect(
# endif
}
-int ccl_device_inline bvh_aligned_node_intersect_robust(
+ccl_device_forceinline int bvh_aligned_node_intersect_robust(
KernelGlobals *kg,
const float3& P,
const float3& dir,
@@ -441,7 +441,7 @@ int ccl_device_inline bvh_aligned_node_intersect_robust(
# endif
}
-int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg,
+ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 dir,
const ssef& isect_near,
@@ -502,7 +502,7 @@ int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg,
# endif
}
-int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
+ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
const float3 P,
const float3 dir,
const ssef& isect_near,
@@ -573,7 +573,7 @@ int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
# endif
}
-ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
+ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
const float3& P,
const float3& dir,
const ssef& isect_near,
@@ -611,7 +611,7 @@ ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
}
}
-ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
+ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
const float3& P,
const float3& dir,
const ssef& isect_near,
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index b1a52968a26..a0e478e972b 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -40,21 +40,16 @@
*
*/
-#ifndef __KERNEL_GPU__
-ccl_device
-#else
-ccl_device_inline
-#endif
-bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility
+ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect,
+ const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- , uint *lcg_state,
- float difl,
- float extmax
+ , uint *lcg_state,
+ float difl,
+ float extmax
#endif
- )
+ )
{
/* todo:
* - test if pushing distance on the stack helps (for non shadow rays)
diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h
index 27729046f8d..c3abe2e157d 100644
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ b/intern/cycles/kernel/bvh/bvh_types.h
@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN
/* Don't inline intersect functions on GPU, this is faster */
#ifdef __KERNEL_GPU__
-# define ccl_device_intersect ccl_device_noinline
+# define ccl_device_intersect ccl_device_forceinline
#else
# define ccl_device_intersect ccl_device_inline
#endif