intern/cycles/kernel/geom/geom_qbvh.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

/*
 * Copyright 2011-2014, Blender Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

struct QBVHStackItem {
	int addr;
	float dist;
};

/* TOOD(sergey): Investigate if using instrinsics helps for both
 * stack item swap and float comparison.
 */
ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a,
                                      QBVHStackItem *__restrict b)
{
	QBVHStackItem tmp = *a;
	*a = *b;
	*b = tmp;
}

ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
                                       QBVHStackItem *__restrict s2,
                                       QBVHStackItem *__restrict s3)
{
	if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
	if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
	if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
}

ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
                                       QBVHStackItem *__restrict s2,
                                       QBVHStackItem *__restrict s3,
                                       QBVHStackItem *__restrict s4)
{
	if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
	if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
	if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); }
	if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); }
	if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
}

ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
                                          const ssef& tnear,
                                          const ssef& tfar,
#ifdef __KERNEL_AVX2__
                                          const sse3f& org_idir,
#else
                                          const sse3f& org,
#endif
                                          const sse3f& idir,
                                          const int near_x,
                                          const int near_y,
                                          const int near_z,
                                          const int far_x,
                                          const int far_y,
                                          const int far_z,
                                          const int nodeAddr,
                                          ssef *__restrict dist)
{
	const int offset = nodeAddr*BVH_QNODE_SIZE;
#ifdef __KERNEL_AVX2__
	const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
	const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
	const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
	const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x);
	const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y);
	const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z);
#else
	const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x;
	const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y;
	const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z;
	const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x;
	const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y;
	const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z;
#endif

#ifdef __KERNEL_SSE41__
	const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear));
	const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar));
	const sseb vmask = cast(tNear) > cast(tFar);
	int mask = (int)movemask(vmask)^0xf;
#else
	const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
	const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
	const sseb vmask = tNear <= tFar;
	int mask = (int)movemask(vmask);
#endif
	*dist = tNear;
	return mask;
}

ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
                                                 const ssef& tnear,
                                                 const ssef& tfar,
#ifdef __KERNEL_AVX2__
                                                 const sse3f& P_idir,
#else
                                                 const sse3f& P,
#endif
                                                 const sse3f& idir,
                                                 const int near_x,
                                                 const int near_y,
                                                 const int near_z,
                                                 const int far_x,
                                                 const int far_y,
                                                 const int far_z,
                                                 const int nodeAddr,
                                                 const float difl,
                                                 ssef *__restrict dist)
{
	const int offset = nodeAddr*BVH_QNODE_SIZE;
#ifdef __KERNEL_AVX2__
	const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
	const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
	const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
	const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x);
	const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y);
	const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z);
#else
	const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x;
	const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y;
	const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z;
	const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x;
	const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y;
	const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z;
#endif

	const float round_down = 1.0f - difl;
	const float round_up = 1.0f + difl;
	const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
	const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
	const sseb vmask = round_down*tNear <= round_up*tFar;
	*dist = tNear;
	return (int)movemask(vmask);
}