Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-07-11 14:53:37 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-07-11 14:58:47 +0300
commitcb3b19730c4fa402c065e288330f4f1f197026ab (patch)
tree9ecbafc8af4ad7a1027a47eddc6fc0b8b7ce49e6 /intern/cycles/kernel/bvh
parentcf82b49a0fd116d87b4c7e96e39bb02fb9e964bf (diff)
Cycles: Use utility define for restrict pointers
This way restrict can be used for CUDA and OpenCL as well. From quick tests in areas i've been testing this it might give some barely measurable %% of speedup, but it increases registers pressure. So use of this qualifier is still really limited.
Diffstat (limited to 'intern/cycles/kernel/bvh')
-rw-r--r--intern/cycles/kernel/bvh/qbvh_nodes.h42
1 files changed, 21 insertions, 21 deletions
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
index 6dfb1c08e27..a833f4b1248 100644
--- a/intern/cycles/kernel/bvh/qbvh_nodes.h
+++ b/intern/cycles/kernel/bvh/qbvh_nodes.h
@@ -22,27 +22,27 @@ struct QBVHStackItem {
/* TOOD(sergey): Investigate if using intrinsics helps for both
* stack item swap and float comparison.
*/
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a,
- QBVHStackItem *__restrict b)
+ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a,
+ QBVHStackItem *ccl_restrict b)
{
QBVHStackItem tmp = *a;
*a = *b;
*b = tmp;
}
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
- QBVHStackItem *__restrict s2,
- QBVHStackItem *__restrict s3)
+ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
+ QBVHStackItem *ccl_restrict s2,
+ QBVHStackItem *ccl_restrict s3)
{
if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
}
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
- QBVHStackItem *__restrict s2,
- QBVHStackItem *__restrict s3,
- QBVHStackItem *__restrict s4)
+ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
+ QBVHStackItem *ccl_restrict s2,
+ QBVHStackItem *ccl_restrict s3,
+ QBVHStackItem *ccl_restrict s4)
{
if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
@@ -53,7 +53,7 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
/* Axis-aligned nodes intersection */
-ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
+ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
const ssef& tnear,
const ssef& tfar,
#ifdef __KERNEL_AVX2__
@@ -69,7 +69,7 @@ ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
const int far_y,
const int far_z,
const int node_addr,
- ssef *__restrict dist)
+ ssef *ccl_restrict dist)
{
const int offset = node_addr + 1;
#ifdef __KERNEL_AVX2__
@@ -104,7 +104,7 @@ ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
}
ccl_device_inline int qbvh_aligned_node_intersect_robust(
- KernelGlobals *__restrict kg,
+ KernelGlobals *ccl_restrict kg,
const ssef& tnear,
const ssef& tfar,
#ifdef __KERNEL_AVX2__
@@ -121,7 +121,7 @@ ccl_device_inline int qbvh_aligned_node_intersect_robust(
const int far_z,
const int node_addr,
const float difl,
- ssef *__restrict dist)
+ ssef *ccl_restrict dist)
{
const int offset = node_addr + 1;
#ifdef __KERNEL_AVX2__
@@ -152,7 +152,7 @@ ccl_device_inline int qbvh_aligned_node_intersect_robust(
/* Unaligned nodes intersection */
ccl_device_inline int qbvh_unaligned_node_intersect(
- KernelGlobals *__restrict kg,
+ KernelGlobals *ccl_restrict kg,
const ssef& tnear,
const ssef& tfar,
#ifdef __KERNEL_AVX2__
@@ -168,7 +168,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
const int far_y,
const int far_z,
const int node_addr,
- ssef *__restrict dist)
+ ssef *ccl_restrict dist)
{
const int offset = node_addr;
const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
@@ -236,7 +236,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
}
ccl_device_inline int qbvh_unaligned_node_intersect_robust(
- KernelGlobals *__restrict kg,
+ KernelGlobals *ccl_restrict kg,
const ssef& tnear,
const ssef& tfar,
#ifdef __KERNEL_AVX2__
@@ -253,7 +253,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
const int far_z,
const int node_addr,
const float difl,
- ssef *__restrict dist)
+ ssef *ccl_restrict dist)
{
const int offset = node_addr;
const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
@@ -324,7 +324,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
*/
ccl_device_inline int qbvh_node_intersect(
- KernelGlobals *__restrict kg,
+ KernelGlobals *ccl_restrict kg,
const ssef& tnear,
const ssef& tfar,
#ifdef __KERNEL_AVX2__
@@ -340,7 +340,7 @@ ccl_device_inline int qbvh_node_intersect(
const int far_y,
const int far_z,
const int node_addr,
- ssef *__restrict dist)
+ ssef *ccl_restrict dist)
{
const int offset = node_addr;
const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
@@ -377,7 +377,7 @@ ccl_device_inline int qbvh_node_intersect(
}
ccl_device_inline int qbvh_node_intersect_robust(
- KernelGlobals *__restrict kg,
+ KernelGlobals *ccl_restrict kg,
const ssef& tnear,
const ssef& tfar,
#ifdef __KERNEL_AVX2__
@@ -394,7 +394,7 @@ ccl_device_inline int qbvh_node_intersect_robust(
const int far_z,
const int node_addr,
const float difl,
- ssef *__restrict dist)
+ ssef *ccl_restrict dist)
{
const int offset = node_addr;
const float4 node = kernel_tex_fetch(__bvh_nodes, offset);