Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/blender/addon/ui.py144
-rw-r--r--intern/cycles/blender/blender_shader.cpp2
-rw-r--r--intern/cycles/bvh/CMakeLists.txt2
-rw-r--r--intern/cycles/bvh/bvh.cpp595
-rw-r--r--intern/cycles/bvh/bvh.h50
-rw-r--r--intern/cycles/bvh/bvh_binning.cpp72
-rw-r--r--intern/cycles/bvh/bvh_binning.h39
-rw-r--r--intern/cycles/bvh/bvh_build.cpp270
-rw-r--r--intern/cycles/bvh/bvh_build.h27
-rw-r--r--intern/cycles/bvh/bvh_node.cpp70
-rw-r--r--intern/cycles/bvh/bvh_node.h47
-rw-r--r--intern/cycles/bvh/bvh_params.h18
-rw-r--r--intern/cycles/bvh/bvh_sort.cpp43
-rw-r--r--intern/cycles/bvh/bvh_sort.h10
-rw-r--r--intern/cycles/bvh/bvh_split.cpp115
-rw-r--r--intern/cycles/bvh/bvh_split.h82
-rw-r--r--intern/cycles/bvh/bvh_unaligned.cpp178
-rw-r--r--intern/cycles/bvh/bvh_unaligned.h81
-rw-r--r--intern/cycles/kernel/CMakeLists.txt39
-rw-r--r--intern/cycles/kernel/bvh/bvh.h (renamed from intern/cycles/kernel/geom/geom_bvh.h)63
-rw-r--r--intern/cycles/kernel/bvh/bvh_nodes.h656
-rw-r--r--intern/cycles/kernel/bvh/bvh_shadow_all.h (renamed from intern/cycles/kernel/geom/geom_bvh_shadow.h)260
-rw-r--r--intern/cycles/kernel/bvh/bvh_subsurface.h (renamed from intern/cycles/kernel/geom/geom_bvh_subsurface.h)177
-rw-r--r--intern/cycles/kernel/bvh/bvh_traversal.h (renamed from intern/cycles/kernel/geom/geom_bvh_traversal.h)337
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume.h (renamed from intern/cycles/kernel/geom/geom_bvh_volume.h)221
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume_all.h (renamed from intern/cycles/kernel/geom/geom_bvh_volume_all.h)235
-rw-r--r--intern/cycles/kernel/bvh/qbvh_nodes.h433
-rw-r--r--intern/cycles/kernel/bvh/qbvh_shadow_all.h (renamed from intern/cycles/kernel/geom/geom_qbvh_shadow.h)296
-rw-r--r--intern/cycles/kernel/bvh/qbvh_subsurface.h (renamed from intern/cycles/kernel/geom/geom_qbvh_subsurface.h)194
-rw-r--r--intern/cycles/kernel/bvh/qbvh_traversal.h505
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume.h (renamed from intern/cycles/kernel/geom/geom_qbvh_volume.h)249
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume_all.h (renamed from intern/cycles/kernel/geom/geom_qbvh_volume_all.h)263
-rw-r--r--intern/cycles/kernel/geom/geom.h22
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h4
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle.h32
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh.h147
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_traversal.h412
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h61
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h28
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h1
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h1
-rw-r--r--intern/cycles/kernel/kernel_light.h4
-rw-r--r--intern/cycles/kernel/kernel_path.h1
-rw-r--r--intern/cycles/kernel/kernel_random.h2
-rw-r--r--intern/cycles/kernel/kernel_textures.h6
-rw-r--r--intern/cycles/kernel/kernel_types.h27
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl1
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp5
-rw-r--r--intern/cycles/kernel/shaders/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/shaders/node_image_texture.osl14
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_to_bw.osl25
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h1
-rw-r--r--intern/cycles/kernel/svm/svm_image.h37
-rw-r--r--intern/cycles/render/image.cpp35
-rw-r--r--intern/cycles/render/image.h2
-rw-r--r--intern/cycles/render/mesh.cpp221
-rw-r--r--intern/cycles/render/mesh.h57
-rw-r--r--intern/cycles/render/nodes.cpp10
-rw-r--r--intern/cycles/render/scene.h6
-rw-r--r--intern/cycles/util/util_boundbox.h2
-rw-r--r--intern/cycles/util/util_transform.h13
-rw-r--r--intern/cycles/util/util_types.h1
62 files changed, 4721 insertions, 2231 deletions
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 94ed3dd4311..3c9c83fec42 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -383,7 +383,6 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel):
sub.prop(cscene, "use_progressive_refine")
subsub = sub.column(align=True)
- subsub.enabled = not rd.use_border
subsub.prop(rd, "use_save_buffers")
col = split.column(align=True)
@@ -1599,89 +1598,62 @@ def draw_pause(self, context):
def get_panels():
- types = bpy.types
- panels = [
- "RENDER_PT_render",
- "RENDER_PT_output",
- "RENDER_PT_encoding",
- "RENDER_PT_dimensions",
- "RENDER_PT_stamp",
- "RENDER_PT_freestyle",
- "RENDERLAYER_PT_layers",
- "RENDERLAYER_PT_freestyle",
- "RENDERLAYER_PT_freestyle_lineset",
- "RENDERLAYER_PT_freestyle_linestyle",
- "SCENE_PT_scene",
- "SCENE_PT_color_management",
- "SCENE_PT_custom_props",
- "SCENE_PT_audio",
- "SCENE_PT_unit",
- "SCENE_PT_keying_sets",
- "SCENE_PT_keying_set_paths",
- "SCENE_PT_physics",
- "WORLD_PT_context_world",
- "WORLD_PT_custom_props",
- "DATA_PT_context_mesh",
- "DATA_PT_context_camera",
- "DATA_PT_context_lamp",
- "DATA_PT_context_speaker",
- "DATA_PT_normals",
- "DATA_PT_texture_space",
- "DATA_PT_curve_texture_space",
- "DATA_PT_mball_texture_space",
- "DATA_PT_vertex_groups",
- "DATA_PT_shape_keys",
- "DATA_PT_uv_texture",
- "DATA_PT_vertex_colors",
- "DATA_PT_camera",
- "DATA_PT_camera_display",
- "DATA_PT_camera_stereoscopy",
- "DATA_PT_camera_safe_areas",
- "DATA_PT_lens",
- "DATA_PT_speaker",
- "DATA_PT_distance",
- "DATA_PT_cone",
- "DATA_PT_customdata",
- "DATA_PT_custom_props_mesh",
- "DATA_PT_custom_props_camera",
- "DATA_PT_custom_props_lamp",
- "DATA_PT_custom_props_speaker",
- "DATA_PT_custom_props_arm",
- "DATA_PT_custom_props_curve",
- "DATA_PT_custom_props_lattice",
- "DATA_PT_custom_props_metaball",
- "TEXTURE_PT_preview",
- "TEXTURE_PT_custom_props",
- "TEXTURE_PT_clouds",
- "TEXTURE_PT_wood",
- "TEXTURE_PT_marble",
- "TEXTURE_PT_magic",
- "TEXTURE_PT_blend",
- "TEXTURE_PT_stucci",
- "TEXTURE_PT_image",
- "TEXTURE_PT_image_sampling",
- "TEXTURE_PT_image_mapping",
- "TEXTURE_PT_musgrave",
- "TEXTURE_PT_voronoi",
- "TEXTURE_PT_distortednoise",
- "TEXTURE_PT_voxeldata",
- "TEXTURE_PT_pointdensity",
- "TEXTURE_PT_pointdensity_turbulence",
- "TEXTURE_PT_mapping",
- "TEXTURE_PT_ocean",
- "TEXTURE_PT_influence",
- "TEXTURE_PT_colors",
- "SCENE_PT_rigid_body_world",
- "SCENE_PT_rigid_body_cache",
- "SCENE_PT_rigid_body_field_weights",
- "MATERIAL_PT_custom_props",
- "MATERIAL_PT_freestyle_line",
- "BONE_PT_custom_props",
- "OBJECT_PT_custom_props",
- ]
-
- return [getattr(types, p) for p in panels if hasattr(types, p)]
-
+ exclude_panels = {
+ 'DATA_PT_area',
+ 'DATA_PT_camera_dof',
+ 'DATA_PT_falloff_curve',
+ 'DATA_PT_lamp',
+ 'DATA_PT_preview',
+ 'DATA_PT_shadow',
+ 'DATA_PT_spot',
+ 'DATA_PT_sunsky',
+ 'MATERIAL_PT_context_material',
+ 'MATERIAL_PT_diffuse',
+ 'MATERIAL_PT_flare',
+ 'MATERIAL_PT_halo',
+ 'MATERIAL_PT_mirror',
+ 'MATERIAL_PT_options',
+ 'MATERIAL_PT_pipeline',
+ 'MATERIAL_PT_preview',
+ 'MATERIAL_PT_shading',
+ 'MATERIAL_PT_shadow',
+ 'MATERIAL_PT_specular',
+ 'MATERIAL_PT_sss',
+ 'MATERIAL_PT_strand',
+ 'MATERIAL_PT_transp',
+ 'MATERIAL_PT_volume_density',
+ 'MATERIAL_PT_volume_integration',
+ 'MATERIAL_PT_volume_lighting',
+ 'MATERIAL_PT_volume_options',
+ 'MATERIAL_PT_volume_shading',
+ 'MATERIAL_PT_volume_transp',
+ 'RENDERLAYER_PT_layer_options',
+ 'RENDERLAYER_PT_layer_passes',
+ 'RENDERLAYER_PT_views',
+ 'RENDER_PT_antialiasing',
+ 'RENDER_PT_bake',
+ 'RENDER_PT_motion_blur',
+ 'RENDER_PT_performance',
+ 'RENDER_PT_post_processing',
+ 'RENDER_PT_shading',
+ 'SCENE_PT_simplify',
+ 'TEXTURE_PT_context_texture',
+ 'WORLD_PT_ambient_occlusion',
+ 'WORLD_PT_environment_lighting',
+ 'WORLD_PT_gather',
+ 'WORLD_PT_indirect_lighting',
+ 'WORLD_PT_mist',
+ 'WORLD_PT_preview',
+ 'WORLD_PT_world'
+ }
+
+ panels = []
+ for panel in bpy.types.Panel.__subclasses__():
+ if hasattr(panel, 'COMPAT_ENGINES') and 'BLENDER_RENDER' in panel.COMPAT_ENGINES:
+ if panel.__name__ not in exclude_panels:
+ panels.append(panel)
+
+ return panels
def register():
bpy.types.RENDER_PT_render.append(draw_device)
@@ -1690,10 +1662,10 @@ def register():
for panel in get_panels():
panel.COMPAT_ENGINES.add('CYCLES')
-
def unregister():
bpy.types.RENDER_PT_render.remove(draw_device)
bpy.types.VIEW3D_HT_header.remove(draw_pause)
for panel in get_panels():
- panel.COMPAT_ENGINES.remove('CYCLES')
+ if 'CYCLES' in panel.COMPAT_ENGINES:
+ panel.COMPAT_ENGINES.remove('CYCLES')
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index 7ca23f23cb4..64559804ccb 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -440,7 +440,7 @@ static ShaderNode *add_node(Scene *scene,
glossy->distribution = CLOSURE_BSDF_MICROFACET_GGX_ID;
break;
case BL::ShaderNodeBsdfGlossy::distribution_ASHIKHMIN_SHIRLEY:
- glossy->distribution = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
+ glossy->distribution = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
break;
case BL::ShaderNodeBsdfGlossy::distribution_MULTI_GGX:
glossy->distribution = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index 5729fa6113d..92e48f0d87f 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -19,6 +19,7 @@ set(SRC
bvh_node.cpp
bvh_sort.cpp
bvh_split.cpp
+ bvh_unaligned.cpp
)
set(SRC_HEADERS
@@ -29,6 +30,7 @@ set(SRC_HEADERS
bvh_params.h
bvh_sort.h
bvh_split.h
+ bvh_unaligned.h
)
include_directories(${INC})
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index fa2b9ae7279..e92526ac1c4 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -24,6 +24,7 @@
#include "bvh_build.h"
#include "bvh_node.h"
#include "bvh_params.h"
+#include "bvh_unaligned.h"
#include "util_debug.h"
#include "util_foreach.h"
@@ -121,7 +122,7 @@ void BVH::refit(Progress& progress)
/* Triangles */
-void BVH::pack_triangle(int idx, float4 storage[3])
+void BVH::pack_triangle(int idx, float4 tri_verts[3])
{
int tob = pack.prim_object[idx];
assert(tob >= 0 && tob < objects.size());
@@ -129,49 +130,58 @@ void BVH::pack_triangle(int idx, float4 storage[3])
int tidx = pack.prim_index[idx];
Mesh::Triangle t = mesh->get_triangle(tidx);
- const float3* vpos = &mesh->verts[0];
+ const float3 *vpos = &mesh->verts[0];
float3 v0 = vpos[t.v[0]];
float3 v1 = vpos[t.v[1]];
float3 v2 = vpos[t.v[2]];
- storage[0] = float3_to_float4(v0);
- storage[1] = float3_to_float4(v1);
- storage[2] = float3_to_float4(v2);
+ tri_verts[0] = float3_to_float4(v0);
+ tri_verts[1] = float3_to_float4(v1);
+ tri_verts[2] = float3_to_float4(v2);
}
void BVH::pack_primitives()
{
- int nsize = TRI_NODE_SIZE;
- size_t tidx_size = pack.prim_index.size();
-
- pack.tri_storage.clear();
- pack.tri_storage.resize(tidx_size * nsize);
+ const size_t tidx_size = pack.prim_index.size();
+ size_t num_prim_triangles = 0;
+ /* Count number of triangles primitives in BVH. */
+ for(unsigned int i = 0; i < tidx_size; i++) {
+ if((pack.prim_index[i] != -1)) {
+ if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
+ ++num_prim_triangles;
+ }
+ }
+ }
+ /* Reserve size for arrays. */
+ pack.prim_tri_index.clear();
+ pack.prim_tri_index.resize(tidx_size);
+ pack.prim_tri_verts.clear();
+ pack.prim_tri_verts.resize(num_prim_triangles * 3);
pack.prim_visibility.clear();
pack.prim_visibility.resize(tidx_size);
-
+ /* Fill in all the arrays. */
+ size_t prim_triangle_index = 0;
for(unsigned int i = 0; i < tidx_size; i++) {
if(pack.prim_index[i] != -1) {
- float4 storage[3];
+ int tob = pack.prim_object[i];
+ Object *ob = objects[tob];
- if(pack.prim_type[i] & PRIMITIVE_TRIANGLE) {
- pack_triangle(i, storage);
+ if((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
+ pack_triangle(i, (float4*)&pack.prim_tri_verts[3 * prim_triangle_index]);
+ pack.prim_tri_index[i] = 3 * prim_triangle_index;
+ ++prim_triangle_index;
}
else {
- /* Avoid use of uninitialized memory. */
- memset(&storage, 0, sizeof(storage));
+ pack.prim_tri_index[i] = -1;
}
- memcpy(&pack.tri_storage[i * nsize], storage, sizeof(float4)*3);
-
- int tob = pack.prim_object[i];
- Object *ob = objects[tob];
pack.prim_visibility[i] = ob->visibility;
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack.prim_visibility[i] |= PATH_RAY_CURVE;
}
else {
- memset(&pack.tri_storage[i * nsize], 0, sizeof(float4)*3);
+ pack.prim_tri_index[i] = -1;
pack.prim_visibility[i] = 0;
}
}
@@ -183,13 +193,13 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
{
/* The BVH's for instances are built separately, but for traversal all
* BVH's are stored in global arrays. This function merges them into the
- * top level BVH, adjusting indexes and offsets where appropriate. */
- bool use_qbvh = params.use_qbvh;
- size_t nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
- size_t nsize_leaf = (use_qbvh)? BVH_QNODE_LEAF_SIZE: BVH_NODE_LEAF_SIZE;
+ * top level BVH, adjusting indexes and offsets where appropriate.
+ */
+ const bool use_qbvh = params.use_qbvh;
- /* adjust primitive index to point to the triangle in the global array, for
- * meshes with transform applied and already in the top level BVH */
+ /* Adjust primitive index to point to the triangle in the global array, for
+ * meshes with transform applied and already in the top level BVH.
+ */
for(size_t i = 0; i < pack.prim_index.size(); i++)
if(pack.prim_index[i] != -1) {
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
@@ -208,10 +218,10 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
/* reserve */
size_t prim_index_size = pack.prim_index.size();
- size_t tri_storage_size = pack.tri_storage.size();
+ size_t prim_tri_verts_size = pack.prim_tri_verts.size();
size_t pack_prim_index_offset = prim_index_size;
- size_t pack_tri_storage_offset = tri_storage_size;
+ size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
size_t pack_nodes_offset = nodes_size;
size_t pack_leaf_nodes_offset = leaf_nodes_size;
size_t object_offset = 0;
@@ -225,7 +235,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
if(mesh->need_build_bvh()) {
if(mesh_map.find(mesh) == mesh_map.end()) {
prim_index_size += bvh->pack.prim_index.size();
- tri_storage_size += bvh->pack.tri_storage.size();
+ prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
nodes_size += bvh->pack.nodes.size();
leaf_nodes_size += bvh->pack.leaf_nodes.size();
@@ -240,7 +250,8 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
pack.prim_type.resize(prim_index_size);
pack.prim_object.resize(prim_index_size);
pack.prim_visibility.resize(prim_index_size);
- pack.tri_storage.resize(tri_storage_size);
+ pack.prim_tri_verts.resize(prim_tri_verts_size);
+ pack.prim_tri_index.resize(prim_index_size);
pack.nodes.resize(nodes_size);
pack.leaf_nodes.resize(leaf_nodes_size);
pack.object_node.resize(objects.size());
@@ -249,7 +260,8 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
uint *pack_prim_visibility = (pack.prim_visibility.size())? &pack.prim_visibility[0]: NULL;
- float4 *pack_tri_storage = (pack.tri_storage.size())? &pack.tri_storage[0]: NULL;
+ float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size())? &pack.prim_tri_verts[0]: NULL;
+ uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
int4 *pack_nodes = (pack.nodes.size())? &pack.nodes[0]: NULL;
int4 *pack_leaf_nodes = (pack.leaf_nodes.size())? &pack.leaf_nodes[0]: NULL;
@@ -277,8 +289,8 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
BVH *bvh = mesh->bvh;
- int noffset = nodes_offset/nsize;
- int noffset_leaf = nodes_leaf_offset/nsize_leaf;
+ int noffset = nodes_offset;
+ int noffset_leaf = nodes_leaf_offset;
int mesh_tri_offset = mesh->tri_offset;
int mesh_curve_offset = mesh->curve_offset;
@@ -290,18 +302,24 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
mesh_map[mesh] = pack.object_node[object_offset-1];
- /* merge primitive and object indexes */
+ /* merge primitive, object and triangle indexes */
if(bvh->pack.prim_index.size()) {
size_t bvh_prim_index_size = bvh->pack.prim_index.size();
int *bvh_prim_index = &bvh->pack.prim_index[0];
int *bvh_prim_type = &bvh->pack.prim_type[0];
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
+ uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
for(size_t i = 0; i < bvh_prim_index_size; i++) {
- if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
+ if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
- else
+ pack_prim_tri_index[pack_prim_index_offset] = -1;
+ }
+ else {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
+ pack_prim_tri_index[pack_prim_index_offset] =
+ bvh_prim_tri_index[i] + pack_prim_tri_verts_offset;
+ }
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
@@ -310,50 +328,64 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
}
}
- /* merge triangle intersection data */
- if(bvh->pack.tri_storage.size()) {
- memcpy(pack_tri_storage + pack_tri_storage_offset,
- &bvh->pack.tri_storage[0],
- bvh->pack.tri_storage.size()*sizeof(float4));
- pack_tri_storage_offset += bvh->pack.tri_storage.size();
+ /* Merge triangle vertices data. */
+ if(bvh->pack.prim_tri_verts.size()) {
+ const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
+ memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
+ &bvh->pack.prim_tri_verts[0],
+ prim_tri_size*sizeof(float4));
+ pack_prim_tri_verts_offset += prim_tri_size;
}
/* merge nodes */
if(bvh->pack.leaf_nodes.size()) {
int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0];
size_t leaf_nodes_offset_size = bvh->pack.leaf_nodes.size();
- for(size_t i = 0, j = 0; i < leaf_nodes_offset_size; i+=nsize_leaf, j++) {
+ for(size_t i = 0, j = 0;
+ i < leaf_nodes_offset_size;
+ i+= BVH_NODE_LEAF_SIZE, j++)
+ {
int4 data = leaf_nodes_offset[i];
data.x += prim_offset;
data.y += prim_offset;
pack_leaf_nodes[pack_leaf_nodes_offset] = data;
- for(int j = 1; j < nsize_leaf; ++j) {
+ for(int j = 1; j < BVH_NODE_LEAF_SIZE; ++j) {
pack_leaf_nodes[pack_leaf_nodes_offset + j] = leaf_nodes_offset[i + j];
}
- pack_leaf_nodes_offset += nsize_leaf;
+ pack_leaf_nodes_offset += BVH_NODE_LEAF_SIZE;
}
}
if(bvh->pack.nodes.size()) {
- /* For QBVH we're packing a child bbox into 6 float4,
- * and for regular BVH they're packed into 3 float4.
- */
- size_t nsize_bbox = (use_qbvh)? 6: 3;
int4 *bvh_nodes = &bvh->pack.nodes[0];
- size_t bvh_nodes_size = bvh->pack.nodes.size();
+ size_t bvh_nodes_size = bvh->pack.nodes.size();
+
+ for(size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
+ size_t nsize, nsize_bbox;
+ if(bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
+ nsize = use_qbvh
+ ? BVH_UNALIGNED_QNODE_SIZE
+ : BVH_UNALIGNED_NODE_SIZE;
+ nsize_bbox = (use_qbvh)? 13: 0;
+ }
+ else {
+ nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
+ nsize_bbox = (use_qbvh)? 7: 0;
+ }
- for(size_t i = 0, j = 0; i < bvh_nodes_size; i+=nsize, j++) {
- memcpy(pack_nodes + pack_nodes_offset, bvh_nodes + i, nsize_bbox*sizeof(int4));
+ memcpy(pack_nodes + pack_nodes_offset,
+ bvh_nodes + i,
+ nsize_bbox*sizeof(int4));
- /* modify offsets into arrays */
+ /* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox];
- data.x += (data.x < 0)? -noffset_leaf: noffset;
- data.y += (data.y < 0)? -noffset_leaf: noffset;
+ data.z += (data.z < 0)? -noffset_leaf: noffset;
+ data.w += (data.w < 0)? -noffset_leaf: noffset;
if(use_qbvh) {
- data.z += (data.z < 0)? -noffset_leaf: noffset;
- data.w += (data.w < 0)? -noffset_leaf: noffset;
+ data.x += (data.x < 0)? -noffset_leaf: noffset;
+ data.y += (data.y < 0)? -noffset_leaf: noffset;
}
pack_nodes[pack_nodes_offset + nsize_bbox] = data;
@@ -366,6 +398,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
sizeof(int4) * (nsize - (nsize_bbox+1)));
pack_nodes_offset += nsize;
+ i += nsize;
}
}
@@ -377,12 +410,20 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
/* Regular BVH */
+static bool node_bvh_is_unaligned(const BVHNode *node)
+{
+ const BVHNode *node0 = node->get_child(0),
+ *node1 = node->get_child(1);
+ return node0->is_unaligned() || node1->is_unaligned();
+}
+
RegularBVH::RegularBVH(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
{
}
-void RegularBVH::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
+void RegularBVH::pack_leaf(const BVHStackEntry& e,
+ const LeafNode *leaf)
{
float4 data[BVH_NODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
@@ -401,54 +442,130 @@ void RegularBVH::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
data[0].w = __uint_as_float(pack.prim_type[leaf->m_lo]);
}
- memcpy(&pack.leaf_nodes[e.idx * BVH_NODE_LEAF_SIZE], data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
+ memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
+}
+
+void RegularBVH::pack_inner(const BVHStackEntry& e,
+ const BVHStackEntry& e0,
+ const BVHStackEntry& e1)
+{
+ if (e0.node->is_unaligned() || e1.node->is_unaligned()) {
+ pack_unaligned_inner(e, e0, e1);
+ } else {
+ pack_aligned_inner(e, e0, e1);
+ }
}
-void RegularBVH::pack_inner(const BVHStackEntry& e, const BVHStackEntry& e0, const BVHStackEntry& e1)
+void RegularBVH::pack_aligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry& e0,
+ const BVHStackEntry& e1)
{
- pack_node(e.idx, e0.node->m_bounds, e1.node->m_bounds, e0.encodeIdx(), e1.encodeIdx(), e0.node->m_visibility, e1.node->m_visibility);
+ pack_aligned_node(e.idx,
+ e0.node->m_bounds, e1.node->m_bounds,
+ e0.encodeIdx(), e1.encodeIdx(),
+ e0.node->m_visibility & ~PATH_RAY_NODE_UNALIGNED,
+ e1.node->m_visibility & ~PATH_RAY_NODE_UNALIGNED);
}
-void RegularBVH::pack_node(int idx, const BoundBox& b0, const BoundBox& b1, int c0, int c1, uint visibility0, uint visibility1)
+void RegularBVH::pack_aligned_node(int idx,
+ const BoundBox& b0,
+ const BoundBox& b1,
+ int c0, int c1,
+ uint visibility0, uint visibility1)
{
int4 data[BVH_NODE_SIZE] =
{
+ make_int4(visibility0, visibility1, c0, c1),
make_int4(__float_as_int(b0.min.x), __float_as_int(b1.min.x), __float_as_int(b0.max.x), __float_as_int(b1.max.x)),
make_int4(__float_as_int(b0.min.y), __float_as_int(b1.min.y), __float_as_int(b0.max.y), __float_as_int(b1.max.y)),
make_int4(__float_as_int(b0.min.z), __float_as_int(b1.min.z), __float_as_int(b0.max.z), __float_as_int(b1.max.z)),
- make_int4(c0, c1, visibility0, visibility1)
};
- memcpy(&pack.nodes[idx * BVH_NODE_SIZE], data, sizeof(int4)*BVH_NODE_SIZE);
+ memcpy(&pack.nodes[idx], data, sizeof(int4)*BVH_NODE_SIZE);
}
-void RegularBVH::pack_nodes(const BVHNode *root)
+void RegularBVH::pack_unaligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry& e0,
+ const BVHStackEntry& e1)
{
- size_t tot_node_size = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
- size_t leaf_node_size = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
- size_t node_size = tot_node_size - leaf_node_size;
+ pack_unaligned_node(e.idx,
+ e0.node->get_aligned_space(),
+ e1.node->get_aligned_space(),
+ e0.node->m_bounds,
+ e1.node->m_bounds,
+ e0.encodeIdx(), e1.encodeIdx(),
+ e0.node->m_visibility, e1.node->m_visibility);
+}
- /* resize arrays */
- pack.nodes.clear();
+void RegularBVH::pack_unaligned_node(int idx,
+ const Transform& aligned_space0,
+ const Transform& aligned_space1,
+ const BoundBox& bounds0,
+ const BoundBox& bounds1,
+ int c0, int c1,
+ uint visibility0, uint visibility1)
+{
+ float4 data[BVH_UNALIGNED_NODE_SIZE];
+ Transform space0 = BVHUnaligned::compute_node_transform(bounds0,
+ aligned_space0);
+ Transform space1 = BVHUnaligned::compute_node_transform(bounds1,
+ aligned_space1);
+ data[0] = make_float4(__int_as_float(visibility0 | PATH_RAY_NODE_UNALIGNED),
+ __int_as_float(visibility1 | PATH_RAY_NODE_UNALIGNED),
+ __int_as_float(c0),
+ __int_as_float(c1));
+
+ data[1] = space0.x;
+ data[2] = space0.y;
+ data[3] = space0.z;
+ data[4] = space1.x;
+ data[5] = space1.y;
+ data[6] = space1.z;
+
+ memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_NODE_SIZE);
+}
- /* for top level BVH, first merge existing BVH's so we know the offsets */
+void RegularBVH::pack_nodes(const BVHNode *root)
+{
+ const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
+ const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
+ assert(num_leaf_nodes <= num_nodes);
+ const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
+ size_t node_size;
+ if(params.use_unaligned_nodes) {
+ const size_t num_unaligned_nodes =
+ root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
+ node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
+ (num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE;
+ }
+ else {
+ node_size = num_inner_nodes * BVH_NODE_SIZE;
+ }
+ /* Resize arrays */
+ pack.nodes.clear();
+ pack.leaf_nodes.clear();
+ /* For top level BVH, first merge existing BVH's so we know the offsets. */
if(params.top_level) {
- pack_instances(node_size*BVH_NODE_SIZE,
- leaf_node_size*BVH_NODE_LEAF_SIZE);
+ pack_instances(node_size, num_leaf_nodes*BVH_NODE_LEAF_SIZE);
}
else {
- pack.nodes.resize(node_size*BVH_NODE_SIZE);
- pack.leaf_nodes.resize(leaf_node_size*BVH_NODE_LEAF_SIZE);
+ pack.nodes.resize(node_size);
+ pack.leaf_nodes.resize(num_leaf_nodes*BVH_NODE_LEAF_SIZE);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2);
- if(root->is_leaf())
+ if(root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
- else
- stack.push_back(BVHStackEntry(root, nextNodeIdx++));
+ }
+ else {
+ stack.push_back(BVHStackEntry(root, nextNodeIdx));
+ nextNodeIdx += node_bvh_is_unaligned(root)
+ ? BVH_UNALIGNED_NODE_SIZE
+ : BVH_NODE_SIZE;
+ }
while(stack.size()) {
BVHStackEntry e = stack.back();
@@ -456,20 +573,31 @@ void RegularBVH::pack_nodes(const BVHNode *root)
if(e.node->is_leaf()) {
/* leaf node */
- const LeafNode* leaf = reinterpret_cast<const LeafNode*>(e.node);
+ const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
pack_leaf(e, leaf);
}
else {
/* innner node */
- int idx0 = (e.node->get_child(0)->is_leaf())? (nextLeafNodeIdx++) : (nextNodeIdx++);
- int idx1 = (e.node->get_child(1)->is_leaf())? (nextLeafNodeIdx++) : (nextNodeIdx++);
- stack.push_back(BVHStackEntry(e.node->get_child(0), idx0));
- stack.push_back(BVHStackEntry(e.node->get_child(1), idx1));
+ int idx[2];
+ for (int i = 0; i < 2; ++i) {
+ if (e.node->get_child(i)->is_leaf()) {
+ idx[i] = nextLeafNodeIdx++;
+ }
+ else {
+ idx[i] = nextNodeIdx;
+ nextNodeIdx += node_bvh_is_unaligned(e.node->get_child(i))
+ ? BVH_UNALIGNED_NODE_SIZE
+ : BVH_NODE_SIZE;
+ }
+ }
+
+ stack.push_back(BVHStackEntry(e.node->get_child(0), idx[0]));
+ stack.push_back(BVHStackEntry(e.node->get_child(1), idx[1]));
pack_inner(e, stack[stack.size()-2], stack[stack.size()-1]);
}
}
-
+ assert(node_size == nextNodeIdx);
/* root index to start traversal at, to handle case of single leaf node */
pack.root_index = (root->is_leaf())? -1: 0;
}
@@ -486,7 +614,7 @@ void RegularBVH::refit_nodes()
void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
{
if(leaf) {
- int4 *data = &pack.leaf_nodes[idx*BVH_NODE_LEAF_SIZE];
+ int4 *data = &pack.leaf_nodes[idx];
int c0 = data[0].x;
int c1 = data[0].y;
/* refit leaf node */
@@ -565,9 +693,9 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility
sizeof(float4)*BVH_NODE_LEAF_SIZE);
}
else {
- int4 *data = &pack.nodes[idx*BVH_NODE_SIZE];
- int c0 = data[3].x;
- int c1 = data[3].y;
+ int4 *data = &pack.nodes[idx];
+ int c0 = data[0].z;
+ int c1 = data[0].w;
/* refit inner node, set bbox from children */
BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty;
uint visibility0 = 0, visibility1 = 0;
@@ -575,7 +703,7 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility
refit_node((c0 < 0)? -c0-1: c0, (c0 < 0), bbox0, visibility0);
refit_node((c1 < 0)? -c1-1: c1, (c1 < 0), bbox1, visibility1);
- pack_node(idx, bbox0, bbox1, c0, c1, visibility0, visibility1);
+ pack_aligned_node(idx, bbox0, bbox1, c0, c1, visibility0, visibility1);
bbox.grow(bbox0);
bbox.grow(bbox1);
@@ -585,6 +713,33 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility
/* QBVH */
+/* Can we avoid this somehow or make more generic?
+ *
+ * Perhaps we can merge nodes in actual tree and make our
+ * life easier all over the place.
+ */
+static bool node_qbvh_is_unaligned(const BVHNode *node)
+{
+ const BVHNode *node0 = node->get_child(0),
+ *node1 = node->get_child(1);
+ bool has_unaligned = false;
+ if(node0->is_leaf()) {
+ has_unaligned |= node0->is_unaligned();
+ }
+ else {
+ has_unaligned |= node0->get_child(0)->is_unaligned();
+ has_unaligned |= node0->get_child(1)->is_unaligned();
+ }
+ if(node1->is_leaf()) {
+ has_unaligned |= node1->is_unaligned();
+ }
+ else {
+ has_unaligned |= node1->get_child(0)->is_unaligned();
+ has_unaligned |= node1->get_child(1)->is_unaligned();
+ }
+ return has_unaligned;
+}
+
QBVH::QBVH(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
{
@@ -610,66 +765,153 @@ void QBVH::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
data[0].w = __uint_as_float(pack.prim_type[leaf->m_lo]);
}
- memcpy(&pack.leaf_nodes[e.idx * BVH_QNODE_LEAF_SIZE], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
+ memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
+}
+
+void QBVH::pack_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num)
+{
+ bool has_unaligned = false;
+ /* Check whether we have to create unaligned node or all nodes are aligned
+ * and we can cut some corner here.
+ */
+ if(params.use_unaligned_nodes) {
+ for(int i = 0; i < num; i++) {
+ if(en[i].node->is_unaligned()) {
+ has_unaligned = true;
+ break;
+ }
+ }
+ }
+ if(has_unaligned) {
+ /* There's no unaligned children, pack into AABB node. */
+ pack_unaligned_inner(e, en, num);
+ }
+ else {
+ /* Create unaligned node with orientation transform for each of the
+ * children.
+ */
+ pack_aligned_inner(e, en, num);
+ }
}
-void QBVH::pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num)
+void QBVH::pack_aligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num)
{
float4 data[BVH_QNODE_SIZE];
+ memset(data, 0, sizeof(data));
+ data[0].x = __uint_as_float(e.node->m_visibility & ~PATH_RAY_NODE_UNALIGNED);
for(int i = 0; i < num; i++) {
float3 bb_min = en[i].node->m_bounds.min;
float3 bb_max = en[i].node->m_bounds.max;
- data[0][i] = bb_min.x;
- data[1][i] = bb_max.x;
- data[2][i] = bb_min.y;
- data[3][i] = bb_max.y;
- data[4][i] = bb_min.z;
- data[5][i] = bb_max.z;
+ data[1][i] = bb_min.x;
+ data[2][i] = bb_max.x;
+ data[3][i] = bb_min.y;
+ data[4][i] = bb_max.y;
+ data[5][i] = bb_min.z;
+ data[6][i] = bb_max.z;
- data[6][i] = __int_as_float(en[i].encodeIdx());
+ data[7][i] = __int_as_float(en[i].encodeIdx());
}
for(int i = num; i < 4; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
- data[0][i] = FLT_MAX;
- data[1][i] = -FLT_MAX;
+ data[1][i] = FLT_MAX;
+ data[2][i] = -FLT_MAX;
+
+ data[3][i] = FLT_MAX;
+ data[4][i] = -FLT_MAX;
+
+ data[5][i] = FLT_MAX;
+ data[6][i] = -FLT_MAX;
+
+ data[7][i] = __int_as_float(0);
+ }
+
+ memcpy(&pack.nodes[e.idx], data, sizeof(float4)*BVH_QNODE_SIZE);
+}
+
+void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num)
+{
+ float4 data[BVH_UNALIGNED_QNODE_SIZE];
+ memset(data, 0, sizeof(data));
+
+ data[0].x = __uint_as_float(e.node->m_visibility | PATH_RAY_NODE_UNALIGNED);
+
+ for(int i = 0; i < num; i++) {
+ Transform space = BVHUnaligned::compute_node_transform(
+ en[i].node->m_bounds,
+ en[i].node->get_aligned_space());
+
+ data[1][i] = space.x.x;
+ data[2][i] = space.x.y;
+ data[3][i] = space.x.z;
+
+ data[4][i] = space.y.x;
+ data[5][i] = space.y.y;
+ data[6][i] = space.y.z;
+
+ data[7][i] = space.z.x;
+ data[8][i] = space.z.y;
+ data[9][i] = space.z.z;
- data[2][i] = FLT_MAX;
- data[3][i] = -FLT_MAX;
+ data[10][i] = space.x.w;
+ data[11][i] = space.y.w;
+ data[12][i] = space.z.w;
- data[4][i] = FLT_MAX;
- data[5][i] = -FLT_MAX;
+ data[13][i] = __int_as_float(en[i].encodeIdx());
+ }
- data[6][i] = __int_as_float(0);
+ for(int i = num; i < 4; i++) {
+ /* We store BB which would never be recorded as intersection
+ * so kernel might safely assume there are always 4 child nodes.
+ */
+ for(int j = 1; j < 13; ++j) {
+ data[j][i] = 0.0f;
+ }
+ data[13][i] = __int_as_float(0);
}
- memcpy(&pack.nodes[e.idx * BVH_QNODE_SIZE], data, sizeof(float4)*BVH_QNODE_SIZE);
+ memcpy(&pack.nodes[e.idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
}
/* Quad SIMD Nodes */
void QBVH::pack_nodes(const BVHNode *root)
{
- size_t tot_node_size = root->getSubtreeSize(BVH_STAT_QNODE_COUNT);
- size_t leaf_node_size = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
- size_t node_size = tot_node_size - leaf_node_size;
-
- /* resize arrays */
+ /* Calculate size of the arrays required. */
+ const size_t num_nodes = root->getSubtreeSize(BVH_STAT_QNODE_COUNT);
+ const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
+ assert(num_leaf_nodes <= num_nodes);
+ const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
+ size_t node_size;
+ if(params.use_unaligned_nodes) {
+ const size_t num_unaligned_nodes =
+ root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_QNODE_COUNT);
+ node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
+ (num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
+ }
+ else {
+ node_size = num_inner_nodes * BVH_QNODE_SIZE;
+ }
+ /* Resize arrays. */
pack.nodes.clear();
pack.leaf_nodes.clear();
-
- /* for top level BVH, first merge existing BVH's so we know the offsets */
+ /* For top level BVH, first merge existing BVH's so we know the offsets. */
if(params.top_level) {
- pack_instances(node_size*BVH_QNODE_SIZE,
- leaf_node_size*BVH_QNODE_LEAF_SIZE);
+ pack_instances(node_size, num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
}
else {
- pack.nodes.resize(node_size*BVH_QNODE_SIZE);
- pack.leaf_nodes.resize(leaf_node_size*BVH_QNODE_LEAF_SIZE);
+ pack.nodes.resize(node_size);
+ pack.leaf_nodes.resize(num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
@@ -680,7 +922,10 @@ void QBVH::pack_nodes(const BVHNode *root)
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
- stack.push_back(BVHStackEntry(root, nextNodeIdx++));
+ stack.push_back(BVHStackEntry(root, nextNodeIdx));
+ nextNodeIdx += node_qbvh_is_unaligned(root)
+ ? BVH_UNALIGNED_QNODE_SIZE
+ : BVH_QNODE_SIZE;
}
while(stack.size()) {
@@ -689,19 +934,17 @@ void QBVH::pack_nodes(const BVHNode *root)
if(e.node->is_leaf()) {
/* leaf node */
- const LeafNode* leaf = reinterpret_cast<const LeafNode*>(e.node);
+ const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
pack_leaf(e, leaf);
}
else {
- /* inner node */
+ /* Inner node. */
const BVHNode *node = e.node;
const BVHNode *node0 = node->get_child(0);
const BVHNode *node1 = node->get_child(1);
-
- /* collect nodes */
+ /* Collect nodes. */
const BVHNode *nodes[4];
int numnodes = 0;
-
if(node0->is_leaf()) {
nodes[numnodes++] = node0;
}
@@ -709,7 +952,6 @@ void QBVH::pack_nodes(const BVHNode *root)
nodes[numnodes++] = node0->get_child(0);
nodes[numnodes++] = node0->get_child(1);
}
-
if(node1->is_leaf()) {
nodes[numnodes++] = node1;
}
@@ -717,25 +959,26 @@ void QBVH::pack_nodes(const BVHNode *root)
nodes[numnodes++] = node1->get_child(0);
nodes[numnodes++] = node1->get_child(1);
}
-
- /* push entries on the stack */
- for(int i = 0; i < numnodes; i++) {
+ /* Push entries on the stack. */
+ for(int i = 0; i < numnodes; ++i) {
int idx;
if(nodes[i]->is_leaf()) {
idx = nextLeafNodeIdx++;
}
else {
- idx = nextNodeIdx++;
+ idx = nextNodeIdx;
+ nextNodeIdx += node_qbvh_is_unaligned(nodes[i])
+ ? BVH_UNALIGNED_QNODE_SIZE
+ : BVH_QNODE_SIZE;
}
stack.push_back(BVHStackEntry(nodes[i], idx));
}
-
- /* set node */
+ /* Set node. */
pack_inner(e, &stack[stack.size()-numnodes], numnodes);
}
}
-
- /* root index to start traversal at, to handle case of single leaf node */
+ assert(node_size == nextNodeIdx);
+ /* Root index to start traversal at, to handle case of single leaf node. */
pack.root_index = (root->is_leaf())? -1: 0;
}
@@ -751,7 +994,7 @@ void QBVH::refit_nodes()
void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
{
if(leaf) {
- int4 *data = &pack.leaf_nodes[idx*BVH_QNODE_LEAF_SIZE];
+ int4 *data = &pack.leaf_nodes[idx];
int4 c = data[0];
/* Refit leaf node. */
for(int prim = c.x; prim < c.y; prim++) {
@@ -833,13 +1076,18 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
leaf_data[0].y = __int_as_float(c.y);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(c.w);
- memcpy(&pack.leaf_nodes[idx * BVH_QNODE_LEAF_SIZE],
- leaf_data,
- sizeof(float4)*BVH_QNODE_LEAF_SIZE);
+ memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
}
else {
- int4 *data = &pack.nodes[idx*BVH_QNODE_SIZE];
- int4 c = data[6];
+ int4 *data = &pack.nodes[idx];
+ bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
+ int4 c;
+ if(is_unaligned) {
+ c = data[13];
+ }
+ else {
+ c = data[7];
+ }
/* Refit inner node, set bbox from children. */
BoundBox child_bbox[4] = {BoundBox::empty,
BoundBox::empty,
@@ -858,21 +1106,62 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
}
}
- float4 inner_data[BVH_QNODE_SIZE];
- for(int i = 0; i < 4; ++i) {
- float3 bb_min = child_bbox[i].min;
- float3 bb_max = child_bbox[i].max;
- inner_data[0][i] = bb_min.x;
- inner_data[1][i] = bb_max.x;
- inner_data[2][i] = bb_min.y;
- inner_data[3][i] = bb_max.y;
- inner_data[4][i] = bb_min.z;
- inner_data[5][i] = bb_max.z;
- inner_data[6][i] = __int_as_float(c[i]);
+ /* TODO(sergey): To be de-duplicated with pack_inner(),
+ * but for that need some sort of pack_node(). which operates with
+ * direct data, not stack element.
+ */
+ if(is_unaligned) {
+ Transform aligned_space = transform_identity();
+ float4 inner_data[BVH_UNALIGNED_QNODE_SIZE];
+ inner_data[0] = make_float4(
+ __int_as_float(visibility | PATH_RAY_NODE_UNALIGNED),
+ 0.0f,
+ 0.0f,
+ 0.0f);
+ for(int i = 0; i < 4; ++i) {
+ Transform space = BVHUnaligned::compute_node_transform(
+ child_bbox[i],
+ aligned_space);
+ inner_data[1][i] = space.x.x;
+ inner_data[2][i] = space.x.y;
+ inner_data[3][i] = space.x.z;
+
+ inner_data[4][i] = space.y.x;
+ inner_data[5][i] = space.y.y;
+ inner_data[6][i] = space.y.z;
+
+ inner_data[7][i] = space.z.x;
+ inner_data[8][i] = space.z.y;
+ inner_data[9][i] = space.z.z;
+
+ inner_data[10][i] = space.x.w;
+ inner_data[11][i] = space.y.w;
+ inner_data[12][i] = space.z.w;
+
+ inner_data[13][i] = __int_as_float(c[i]);
+ }
+ memcpy(&pack.nodes[idx], inner_data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
+ }
+ else {
+ float4 inner_data[BVH_QNODE_SIZE];
+ inner_data[0] = make_float4(
+ __int_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED),
+ 0.0f,
+ 0.0f,
+ 0.0f);
+ for(int i = 0; i < 4; ++i) {
+ float3 bb_min = child_bbox[i].min;
+ float3 bb_max = child_bbox[i].max;
+ inner_data[1][i] = bb_min.x;
+ inner_data[2][i] = bb_max.x;
+ inner_data[3][i] = bb_min.y;
+ inner_data[4][i] = bb_max.y;
+ inner_data[5][i] = bb_min.z;
+ inner_data[6][i] = bb_max.z;
+ inner_data[7][i] = __int_as_float(c[i]);
+ }
+ memcpy(&pack.nodes[idx], inner_data, sizeof(float4)*BVH_QNODE_SIZE);
}
- memcpy(&pack.nodes[idx * BVH_QNODE_SIZE],
- inner_data,
- sizeof(float4)*BVH_QNODE_SIZE);
}
}
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index 6076c25ca31..16752076f6a 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -35,11 +35,14 @@ class Progress;
#define BVH_NODE_SIZE 4
#define BVH_NODE_LEAF_SIZE 1
-#define BVH_QNODE_SIZE 7
+#define BVH_QNODE_SIZE 8
#define BVH_QNODE_LEAF_SIZE 1
#define BVH_ALIGN 4096
#define TRI_NODE_SIZE 3
+#define BVH_UNALIGNED_NODE_SIZE 7
+#define BVH_UNALIGNED_QNODE_SIZE 14
+
/* Packed BVH
*
* BVH stored as it will be used for traversal on the rendering device. */
@@ -52,8 +55,10 @@ struct PackedBVH {
array<int4> leaf_nodes;
/* object index to BVH node index mapping for instances */
array<int> object_node;
- /* Aligned triangle storage for fatser lookup in the kernel. */
- array<float4> tri_storage;
+ /* Mapping from primitive index to index in triangle array. */
+ array<uint> prim_tri_index;
+ /* Continuous storage of triangle vertices. */
+ array<float4> prim_tri_verts;
/* primitive type - triangle or strand */
array<int> prim_type;
/* visibility visibilitys for primitives */
@@ -91,7 +96,7 @@ public:
protected:
BVH(const BVHParams& params, const vector<Object*>& objects);
- /* triangles and strands*/
+ /* triangles and strands */
void pack_primitives();
void pack_triangle(int idx, float4 storage[3]);
@@ -115,9 +120,32 @@ protected:
/* pack */
void pack_nodes(const BVHNode *root);
- void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
- void pack_inner(const BVHStackEntry& e, const BVHStackEntry& e0, const BVHStackEntry& e1);
- void pack_node(int idx, const BoundBox& b0, const BoundBox& b1, int c0, int c1, uint visibility0, uint visibility1);
+
+ void pack_leaf(const BVHStackEntry& e,
+ const LeafNode *leaf);
+ void pack_inner(const BVHStackEntry& e,
+ const BVHStackEntry& e0,
+ const BVHStackEntry& e1);
+
+ void pack_aligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry& e0,
+ const BVHStackEntry& e1);
+ void pack_aligned_node(int idx,
+ const BoundBox& b0,
+ const BoundBox& b1,
+ int c0, int c1,
+ uint visibility0, uint visibility1);
+
+ void pack_unaligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry& e0,
+ const BVHStackEntry& e1);
+ void pack_unaligned_node(int idx,
+ const Transform& aligned_space0,
+ const Transform& aligned_space1,
+ const BoundBox& b0,
+ const BoundBox& b1,
+ int c0, int c1,
+ uint visibility0, uint visibility1);
/* refit */
void refit_nodes();
@@ -136,9 +164,17 @@ protected:
/* pack */
void pack_nodes(const BVHNode *root);
+
void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
+ void pack_aligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num);
+ void pack_unaligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num);
+
/* refit */
void refit_nodes();
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
diff --git a/intern/cycles/bvh/bvh_binning.cpp b/intern/cycles/bvh/bvh_binning.cpp
index b07e870d759..5ddd7349f7b 100644
--- a/intern/cycles/bvh/bvh_binning.cpp
+++ b/intern/cycles/bvh/bvh_binning.cpp
@@ -52,12 +52,35 @@ __forceinline int get_best_dimension(const float4& bestSAH)
/* BVH Object Binning */
-BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims)
-: BVHRange(job), splitSAH(FLT_MAX), dim(0), pos(0)
+BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
+ BVHReference *prims,
+ const BVHUnaligned *unaligned_heuristic,
+ const Transform *aligned_space)
+: BVHRange(job),
+ splitSAH(FLT_MAX),
+ dim(0),
+ pos(0),
+ unaligned_heuristic_(unaligned_heuristic),
+ aligned_space_(aligned_space)
{
+ if(aligned_space_ == NULL) {
+ bounds_ = bounds();
+ cent_bounds_ = cent_bounds();
+ }
+ else {
+ /* TODO(sergey): With some additional storage we can avoid
+ * need in re-calculating this.
+ */
+ bounds_ = unaligned_heuristic->compute_aligned_boundbox(
+ *this,
+ prims,
+ *aligned_space,
+ &cent_bounds_);
+ }
+
/* compute number of bins to use and precompute scaling factor for binning */
num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f*size()));
- scale = rcp(cent_bounds().size()) * make_float3((float)num_bins);
+ scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
/* initialize binning counter and bounds */
BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
@@ -79,30 +102,34 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims)
const BVHReference& prim0 = prims[start() + i + 0];
const BVHReference& prim1 = prims[start() + i + 1];
- int4 bin0 = get_bin(prim0.bounds());
- int4 bin1 = get_bin(prim1.bounds());
+ BoundBox bounds0 = get_prim_bounds(prim0);
+ BoundBox bounds1 = get_prim_bounds(prim1);
+
+ int4 bin0 = get_bin(bounds0);
+ int4 bin1 = get_bin(bounds1);
/* increase bounds for bins for even primitive */
- int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds());
- int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds());
- int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds());
+ int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
+ int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
+ int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
/* increase bounds of bins for odd primitive */
- int b10 = (int)extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(prim1.bounds());
- int b11 = (int)extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(prim1.bounds());
- int b12 = (int)extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(prim1.bounds());
+ int b10 = (int)extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(bounds1);
+ int b11 = (int)extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(bounds1);
+ int b12 = (int)extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(bounds1);
}
/* for uneven number of primitives */
if(i < ssize_t(size())) {
/* map primitive to bin */
const BVHReference& prim0 = prims[start() + i];
- int4 bin0 = get_bin(prim0.bounds());
+ BoundBox bounds0 = get_prim_bounds(prim0);
+ int4 bin0 = get_bin(bounds0);
/* increase bounds of bins */
- int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds());
- int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds());
- int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds());
+ int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
+ int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
+ int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
}
}
@@ -151,17 +178,19 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims)
bestSAH = min(sah,bestSAH);
}
- int4 mask = float3_to_float4(cent_bounds().size()) <= make_float4(0.0f);
+ int4 mask = float3_to_float4(cent_bounds_.size()) <= make_float4(0.0f);
bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
/* find best dimension */
dim = get_best_dimension(bestSAH);
splitSAH = bestSAH[dim];
pos = bestSplit[dim];
- leafSAH = bounds().half_area() * blocks(size());
+ leafSAH = bounds_.half_area() * blocks(size());
}
-void BVHObjectBinning::split(BVHReference* prims, BVHObjectBinning& left_o, BVHObjectBinning& right_o) const
+void BVHObjectBinning::split(BVHReference* prims,
+ BVHObjectBinning& left_o,
+ BVHObjectBinning& right_o) const
{
size_t N = size();
@@ -176,10 +205,12 @@ void BVHObjectBinning::split(BVHReference* prims, BVHObjectBinning& left_o, BVHO
prefetch_L2(&prims[start() + l + 8]);
prefetch_L2(&prims[start() + r - 8]);
- const BVHReference& prim = prims[start() + l];
+ BVHReference prim = prims[start() + l];
+ BoundBox unaligned_bounds = get_prim_bounds(prim);
+ float3 unaligned_center = unaligned_bounds.center2();
float3 center = prim.bounds().center2();
- if(get_bin(center)[dim] < pos) {
+ if(get_bin(unaligned_center)[dim] < pos) {
lgeom_bounds.grow(prim.bounds());
lcent_bounds.grow(center);
l++;
@@ -191,7 +222,6 @@ void BVHObjectBinning::split(BVHReference* prims, BVHObjectBinning& left_o, BVHO
r--;
}
}
-
/* finish */
if(l != 0 && N-1-r != 0) {
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N-1-r), prims);
diff --git a/intern/cycles/bvh/bvh_binning.h b/intern/cycles/bvh/bvh_binning.h
index 60742157055..52955f70151 100644
--- a/intern/cycles/bvh/bvh_binning.h
+++ b/intern/cycles/bvh/bvh_binning.h
@@ -19,11 +19,14 @@
#define __BVH_BINNING_H__
#include "bvh_params.h"
+#include "bvh_unaligned.h"
#include "util_types.h"
CCL_NAMESPACE_BEGIN
+class BVHBuild;
+
/* Single threaded object binner. Finds the split with the best SAH heuristic
* by testing for each dimension multiple partitionings for regular spaced
* partition locations. A partitioning for a partition location is computed,
@@ -34,10 +37,18 @@ CCL_NAMESPACE_BEGIN
class BVHObjectBinning : public BVHRange
{
public:
- __forceinline BVHObjectBinning() {}
- BVHObjectBinning(const BVHRange& job, BVHReference *prims);
+ __forceinline BVHObjectBinning() : leafSAH(FLT_MAX) {}
+
+ BVHObjectBinning(const BVHRange& job,
+ BVHReference *prims,
+ const BVHUnaligned *unaligned_heuristic = NULL,
+ const Transform *aligned_space = NULL);
- void split(BVHReference *prims, BVHObjectBinning& left_o, BVHObjectBinning& right_o) const;
+ void split(BVHReference *prims,
+ BVHObjectBinning& left_o,
+ BVHObjectBinning& right_o) const;
+
+ __forceinline const BoundBox& unaligned_bounds() { return bounds_; }
float splitSAH; /* SAH cost of the best split */
float leafSAH; /* SAH cost of creating a leaf */
@@ -48,13 +59,20 @@ protected:
size_t num_bins; /* actual number of bins to use */
float3 scale; /* scaling factor to compute bin */
+ /* Effective bounds and centroid bounds. */
+ BoundBox bounds_;
+ BoundBox cent_bounds_;
+
+ const BVHUnaligned *unaligned_heuristic_;
+ const Transform *aligned_space_;
+
enum { MAX_BINS = 32 };
enum { LOG_BLOCK_SIZE = 2 };
/* computes the bin numbers for each dimension for a box. */
__forceinline int4 get_bin(const BoundBox& box) const
{
- int4 a = make_int4((box.center2() - cent_bounds().min)*scale - make_float3(0.5f));
+ int4 a = make_int4((box.center2() - cent_bounds_.min)*scale - make_float3(0.5f));
int4 mn = make_int4(0);
int4 mx = make_int4((int)num_bins-1);
@@ -64,7 +82,7 @@ protected:
/* computes the bin numbers for each dimension for a point. */
__forceinline int4 get_bin(const float3& c) const
{
- return make_int4((c - cent_bounds().min)*scale - make_float3(0.5f));
+ return make_int4((c - cent_bounds_.min)*scale - make_float3(0.5f));
}
/* compute the number of blocks occupied for each dimension. */
@@ -78,6 +96,17 @@ protected:
{
return (int)((a+((1LL << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE);
}
+
+ __forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
+ {
+ if(aligned_space_ == NULL) {
+ return prim.bounds();
+ }
+ else {
+ return unaligned_heuristic_->compute_aligned_prim_boundbox(
+ prim, *aligned_space_);
+ }
+ }
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
index 3f687224eee..67ffb6853d6 100644
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -33,6 +33,7 @@
#include "util_stack_allocator.h"
#include "util_simd.h"
#include "util_time.h"
+#include "util_queue.h"
CCL_NAMESPACE_BEGIN
@@ -99,7 +100,8 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_,
prim_object(prim_object_),
params(params_),
progress(progress_),
- progress_start_time(0.0)
+ progress_start_time(0.0),
+ unaligned_heuristic(objects_)
{
spatial_min_overlap = 0.0f;
}
@@ -112,70 +114,74 @@ BVHBuild::~BVHBuild()
void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
{
- Attribute *attr_mP = NULL;
-
- if(mesh->has_motion_blur())
- attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+ Attribute *attr_mP = NULL;
- size_t num_triangles = mesh->num_triangles();
- for(uint j = 0; j < num_triangles; j++) {
- Mesh::Triangle t = mesh->get_triangle(j);
- BoundBox bounds = BoundBox::empty;
- PrimitiveType type = PRIMITIVE_TRIANGLE;
+ if(mesh->has_motion_blur())
+ attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ size_t num_triangles = mesh->num_triangles();
+ for(uint j = 0; j < num_triangles; j++) {
+ Mesh::Triangle t = mesh->get_triangle(j);
+ BoundBox bounds = BoundBox::empty;
+ PrimitiveType type = PRIMITIVE_TRIANGLE;
- t.bounds_grow(&mesh->verts[0], bounds);
+ t.bounds_grow(&mesh->verts[0], bounds);
- /* motion triangles */
- if(attr_mP) {
- size_t mesh_size = mesh->verts.size();
- size_t steps = mesh->motion_steps - 1;
- float3 *vert_steps = attr_mP->data_float3();
+ /* motion triangles */
+ if(attr_mP) {
+ size_t mesh_size = mesh->verts.size();
+ size_t steps = mesh->motion_steps - 1;
+ float3 *vert_steps = attr_mP->data_float3();
- for(size_t i = 0; i < steps; i++)
- t.bounds_grow(vert_steps + i*mesh_size, bounds);
+ for(size_t i = 0; i < steps; i++)
+ t.bounds_grow(vert_steps + i*mesh_size, bounds);
- type = PRIMITIVE_MOTION_TRIANGLE;
- }
+ type = PRIMITIVE_MOTION_TRIANGLE;
+ }
- if(bounds.valid()) {
- references.push_back(BVHReference(bounds, j, i, type));
- root.grow(bounds);
- center.grow(bounds.center2());
+ if(bounds.valid()) {
+ references.push_back(BVHReference(bounds, j, i, type));
+ root.grow(bounds);
+ center.grow(bounds.center2());
+ }
}
}
- Attribute *curve_attr_mP = NULL;
+ if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
+ Attribute *curve_attr_mP = NULL;
- if(mesh->has_motion_blur())
- curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(mesh->has_motion_blur())
+ curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- size_t num_curves = mesh->num_curves();
- for(uint j = 0; j < num_curves; j++) {
- Mesh::Curve curve = mesh->get_curve(j);
- PrimitiveType type = PRIMITIVE_CURVE;
+ size_t num_curves = mesh->num_curves();
+ for(uint j = 0; j < num_curves; j++) {
+ Mesh::Curve curve = mesh->get_curve(j);
+ PrimitiveType type = PRIMITIVE_CURVE;
- for(int k = 0; k < curve.num_keys - 1; k++) {
- BoundBox bounds = BoundBox::empty;
- curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bounds);
+ for(int k = 0; k < curve.num_keys - 1; k++) {
+ BoundBox bounds = BoundBox::empty;
+ curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bounds);
- /* motion curve */
- if(curve_attr_mP) {
- size_t mesh_size = mesh->curve_keys.size();
- size_t steps = mesh->motion_steps - 1;
- float3 *key_steps = curve_attr_mP->data_float3();
+ /* motion curve */
+ if(curve_attr_mP) {
+ size_t mesh_size = mesh->curve_keys.size();
+ size_t steps = mesh->motion_steps - 1;
+ float3 *key_steps = curve_attr_mP->data_float3();
- for(size_t i = 0; i < steps; i++)
- curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bounds);
+ for(size_t i = 0; i < steps; i++)
+ curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bounds);
- type = PRIMITIVE_MOTION_CURVE;
- }
+ type = PRIMITIVE_MOTION_CURVE;
+ }
- if(bounds.valid()) {
- int packed_type = PRIMITIVE_PACK_SEGMENT(type, k);
-
- references.push_back(BVHReference(bounds, j, i, packed_type));
- root.grow(bounds);
- center.grow(bounds.center2());
+ if(bounds.valid()) {
+ int packed_type = PRIMITIVE_PACK_SEGMENT(type, k);
+
+ references.push_back(BVHReference(bounds, j, i, packed_type));
+ root.grow(bounds);
+ center.grow(bounds.center2());
+ }
}
}
}
@@ -209,15 +215,23 @@ void BVHBuild::add_references(BVHRange& root)
continue;
}
if(!ob->mesh->is_instanced()) {
- num_alloc_references += ob->mesh->num_triangles();
- num_alloc_references += count_curve_segments(ob->mesh);
+ if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+ num_alloc_references += ob->mesh->num_triangles();
+ }
+ if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
+ num_alloc_references += count_curve_segments(ob->mesh);
+ }
}
else
num_alloc_references++;
}
else {
- num_alloc_references += ob->mesh->num_triangles();
- num_alloc_references += count_curve_segments(ob->mesh);
+ if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+ num_alloc_references += ob->mesh->num_triangles();
+ }
+ if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
+ num_alloc_references += count_curve_segments(ob->mesh);
+ }
}
}
@@ -340,6 +354,8 @@ BVHNode* BVHBuild::run()
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)) << "\n"
<< " Number of leaf nodes: "
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)) << "\n"
+ << " Number of unaligned nodes: "
+ << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT)) << "\n"
<< " Allocation slop factor: "
<< ((prim_type.capacity() != 0)
? (float)prim_type.size() / prim_type.capacity()
@@ -445,10 +461,11 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
float leafSAH = params.sah_primitive_cost * range.leafSAH;
float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH;
- /* have at least one inner node on top level, for performance and correct
- * visibility tests, since object instances do not check visibility flag */
+ /* Have at least one inner node on top level, for performance and correct
+ * visibility tests, since object instances do not check visibility flag.
+ */
if(!(range.size() > 0 && params.top_level && level == 0)) {
- /* make leaf node when threshold reached or SAH tells us */
+ /* Make leaf node when threshold reached or SAH tells us. */
if((params.small_enough_for_leaf(size, level)) ||
(range_within_max_leaf_size(range, references) && leafSAH < splitSAH))
{
@@ -456,28 +473,70 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
}
}
- /* perform split */
+ BVHObjectBinning unaligned_range;
+ float unalignedSplitSAH = FLT_MAX;
+ float unalignedLeafSAH = FLT_MAX;
+ Transform aligned_space;
+ if(params.use_unaligned_nodes &&
+ splitSAH > params.unaligned_split_threshold*leafSAH)
+ {
+ aligned_space = unaligned_heuristic.compute_aligned_space(
+ range, &references[0]);
+ unaligned_range = BVHObjectBinning(range,
+ &references[0],
+ &unaligned_heuristic,
+ &aligned_space);
+ unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
+ params.sah_primitive_cost * unaligned_range.splitSAH;
+ unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
+ if(!(range.size() > 0 && params.top_level && level == 0)) {
+ if(unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
+ range_within_max_leaf_size(range, references))
+ {
+ return create_leaf_node(range, references);
+ }
+ }
+ }
+
+ /* Perform split. */
BVHObjectBinning left, right;
- range.split(&references[0], left, right);
+ if(unalignedSplitSAH < splitSAH) {
+ unaligned_range.split(&references[0], left, right);
+ }
+ else {
+ range.split(&references[0], left, right);
+ }
- /* create inner node. */
- InnerNode *inner;
+ BoundBox bounds;
+ if(unalignedSplitSAH < splitSAH) {
+ bounds = unaligned_heuristic.compute_aligned_boundbox(
+ range, &references[0], aligned_space);
+ }
+ else {
+ bounds = range.bounds();
+ }
+ /* Create inner node. */
+ InnerNode *inner;
if(range.size() < THREAD_TASK_SIZE) {
/* local build */
BVHNode *leftnode = build_node(left, level + 1);
BVHNode *rightnode = build_node(right, level + 1);
- inner = new InnerNode(range.bounds(), leftnode, rightnode);
+ inner = new InnerNode(bounds, leftnode, rightnode);
}
else {
- /* threaded build */
- inner = new InnerNode(range.bounds());
+ /* Threaded build */
+ inner = new InnerNode(bounds);
task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
}
+ if(unalignedSplitSAH < splitSAH) {
+ inner->set_aligned_space(aligned_space);
+ }
+
return inner;
}
@@ -516,16 +575,54 @@ BVHNode* BVHBuild::build_node(const BVHRange& range,
return create_leaf_node(range, *references);
}
}
+ float leafSAH = params.sah_primitive_cost * split.leafSAH;
+ float splitSAH = params.sah_node_cost * range.bounds().half_area() +
+ params.sah_primitive_cost * split.nodeSAH;
+
+ BVHMixedSplit unaligned_split;
+ float unalignedSplitSAH = FLT_MAX;
+ /* float unalignedLeafSAH = FLT_MAX; */
+ Transform aligned_space;
+ if(params.use_unaligned_nodes &&
+ splitSAH > params.unaligned_split_threshold*leafSAH)
+ {
+ aligned_space =
+ unaligned_heuristic.compute_aligned_space(range, &references->at(0));
+ unaligned_split = BVHMixedSplit(this,
+ storage,
+ range,
+ references,
+ level,
+ &unaligned_heuristic,
+ &aligned_space);
+ /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
+ unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
+ params.sah_primitive_cost * unaligned_split.nodeSAH;
+ /* TOOD(sergey): Check we can create leaf already. */
+ }
/* Do split. */
BVHRange left, right;
- split.split(this, left, right, range);
+ if(unalignedSplitSAH < splitSAH) {
+ unaligned_split.split(this, left, right, range);
+ }
+ else {
+ split.split(this, left, right, range);
+ }
progress_total += left.size() + right.size() - range.size();
+ BoundBox bounds;
+ if(unalignedSplitSAH < splitSAH) {
+ bounds = unaligned_heuristic.compute_aligned_boundbox(
+ range, &references->at(0), aligned_space);
+ }
+ else {
+ bounds = range.bounds();
+ }
+
/* Create inner node. */
InnerNode *inner;
-
if(range.size() < THREAD_TASK_SIZE) {
/* Local build. */
@@ -539,11 +636,11 @@ BVHNode* BVHBuild::build_node(const BVHRange& range,
/* Build right node. */
BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
- inner = new InnerNode(range.bounds(), leftnode, rightnode);
+ inner = new InnerNode(bounds, leftnode, rightnode);
}
else {
/* Threaded build. */
- inner = new InnerNode(range.bounds());
+ inner = new InnerNode(bounds);
task_pool.push(new BVHSpatialSplitBuildTask(this,
inner,
0,
@@ -560,6 +657,10 @@ BVHNode* BVHBuild::build_node(const BVHRange& range,
true);
}
+ if(unalignedSplitSAH < splitSAH) {
+ inner->set_aligned_space(aligned_space);
+ }
+
return inner;
}
@@ -616,6 +717,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
+ vector<BVHReference, LeafStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
/* TODO(sergey): In theory we should be able to store references. */
typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
@@ -634,6 +736,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
const BVHReference& ref = references[range.start() + i];
if(ref.prim_index() != -1) {
int type_index = bitscan(ref.prim_type() & PRIMITIVE_ALL);
+ p_ref[type_index].push_back(ref);
p_type[type_index].push_back(ref.prim_type());
p_index[type_index].push_back(ref.prim_index());
p_object[type_index].push_back(ref.prim_object());
@@ -674,16 +777,38 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
if(num != 0) {
assert(p_type[i].size() == p_index[i].size());
assert(p_type[i].size() == p_object[i].size());
+ Transform aligned_space;
+ bool alignment_found = false;
for(int j = 0; j < num; ++j) {
const int index = start_index + j;
local_prim_type[index] = p_type[i][j];
local_prim_index[index] = p_index[i][j];
local_prim_object[index] = p_object[i][j];
+ if(params.use_unaligned_nodes && !alignment_found) {
+ alignment_found =
+ unaligned_heuristic.compute_aligned_space(p_ref[i][j],
+ &aligned_space);
+ }
+ }
+ LeafNode *leaf_node = new LeafNode(bounds[i],
+ visibility[i],
+ start_index,
+ start_index + num);
+ if(alignment_found) {
+ /* Need to recalculate leaf bounds with new alignment. */
+ leaf_node->m_bounds = BoundBox::empty;
+ for(int j = 0; j < num; ++j) {
+ const BVHReference &ref = p_ref[i][j];
+ BoundBox ref_bounds =
+ unaligned_heuristic.compute_aligned_prim_boundbox(
+ ref,
+ aligned_space);
+ leaf_node->m_bounds.grow(ref_bounds);
+ }
+ /* Set alignment space. */
+ leaf_node->set_aligned_space(aligned_space);
}
- leaves[num_leaves++] = new LeafNode(bounds[i],
- visibility[i],
- start_index,
- start_index + num);
+ leaves[num_leaves++] = leaf_node;
start_index += num;
}
}
@@ -765,6 +890,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
++num_leaves;
}
+ /* TODO(sergey): Need to take care of alignment when number of leaves
+ * is more than 1.
+ */
if(num_leaves == 1) {
/* Simplest case: single leaf, just return it.
* In all the rest cases we'll be creating intermediate inner node with
diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h
index a015b89d72f..64180349935 100644
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -22,6 +22,7 @@
#include "bvh.h"
#include "bvh_binning.h"
+#include "bvh_unaligned.h"
#include "util_boundbox.h"
#include "util_task.h"
@@ -59,13 +60,14 @@ protected:
friend class BVHSpatialSplit;
friend class BVHBuildTask;
friend class BVHSpatialSplitBuildTask;
+ friend class BVHObjectBinning;
- /* adding references */
+ /* Adding references. */
void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i);
void add_references(BVHRange& root);
- /* building */
+ /* Building. */
BVHNode *build_node(const BVHRange& range,
vector<BVHReference> *references,
int level,
@@ -78,7 +80,7 @@ protected:
bool range_within_max_leaf_size(const BVHRange& range,
const vector<BVHReference>& references) const;
- /* threads */
+ /* Threads. */
enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(InnerNode *node,
int child,
@@ -92,41 +94,44 @@ protected:
int thread_id);
thread_mutex build_mutex;
- /* progress */
+ /* Progress. */
void progress_update();
- /* tree rotations */
+ /* Tree rotations. */
void rotate(BVHNode *node, int max_depth);
void rotate(BVHNode *node, int max_depth, int iterations);
- /* objects and primitive references */
+ /* Objects and primitive references. */
vector<Object*> objects;
vector<BVHReference> references;
int num_original_references;
- /* output primitive indexes and objects */
+ /* Output primitive indexes and objects. */
array<int>& prim_type;
array<int>& prim_index;
array<int>& prim_object;
- /* build parameters */
+ /* Build parameters. */
BVHParams params;
- /* progress reporting */
+ /* Progress reporting. */
Progress& progress;
double progress_start_time;
size_t progress_count;
size_t progress_total;
size_t progress_original_total;
- /* spatial splitting */
+ /* Spatial splitting. */
float spatial_min_overlap;
vector<BVHSpatialStorage> spatial_storage;
size_t spatial_free_index;
thread_spin_lock spatial_spin_lock;
- /* threads */
+ /* Threads. */
TaskPool task_pool;
+
+ /* Unaligned building. */
+ BVHUnaligned unaligned_heuristic;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp
index 8294690da7d..f5cd699bdf4 100644
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -61,6 +61,76 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
}
}
return cnt;
+ case BVH_STAT_ALIGNED_COUNT:
+ if(!is_unaligned()) {
+ cnt = 1;
+ }
+ break;
+ case BVH_STAT_UNALIGNED_COUNT:
+ if(is_unaligned()) {
+ cnt = 1;
+ }
+ break;
+ case BVH_STAT_ALIGNED_INNER_COUNT:
+ if(!is_leaf()) {
+ bool has_unaligned = false;
+ for(int j = 0; j < num_children(); j++) {
+ has_unaligned |= get_child(j)->is_unaligned();
+ }
+ cnt += has_unaligned? 0: 1;
+ }
+ break;
+ case BVH_STAT_UNALIGNED_INNER_COUNT:
+ if(!is_leaf()) {
+ bool has_unaligned = false;
+ for(int j = 0; j < num_children(); j++) {
+ has_unaligned |= get_child(j)->is_unaligned();
+ }
+ cnt += has_unaligned? 1: 0;
+ }
+ break;
+ case BVH_STAT_ALIGNED_INNER_QNODE_COUNT:
+ {
+ bool has_unaligned = false;
+ for(int i = 0; i < num_children(); i++) {
+ BVHNode *node = get_child(i);
+ if(node->is_leaf()) {
+ has_unaligned |= node->is_unaligned();
+ }
+ else {
+ for(int j = 0; j < node->num_children(); j++) {
+ cnt += node->get_child(j)->getSubtreeSize(stat);
+ has_unaligned |= node->get_child(j)->is_unaligned();
+ }
+ }
+ }
+ cnt += has_unaligned? 0: 1;
+ }
+ return cnt;
+ case BVH_STAT_UNALIGNED_INNER_QNODE_COUNT:
+ {
+ bool has_unaligned = false;
+ for(int i = 0; i < num_children(); i++) {
+ BVHNode *node = get_child(i);
+ if(node->is_leaf()) {
+ has_unaligned |= node->is_unaligned();
+ }
+ else {
+ for(int j = 0; j < node->num_children(); j++) {
+ cnt += node->get_child(j)->getSubtreeSize(stat);
+ has_unaligned |= node->get_child(j)->is_unaligned();
+ }
+ }
+ }
+ cnt += has_unaligned? 1: 0;
+ }
+ return cnt;
+ case BVH_STAT_ALIGNED_LEAF_COUNT:
+ cnt = (is_leaf() && !is_unaligned()) ? 1 : 0;
+ break;
+ case BVH_STAT_UNALIGNED_LEAF_COUNT:
+ cnt = (is_leaf() && is_unaligned()) ? 1 : 0;
+ break;
default:
assert(0); /* unknown mode */
}
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
index d476fb917ed..f2965a785e6 100644
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -31,6 +31,14 @@ enum BVH_STAT {
BVH_STAT_TRIANGLE_COUNT,
BVH_STAT_CHILDNODE_COUNT,
BVH_STAT_QNODE_COUNT,
+ BVH_STAT_ALIGNED_COUNT,
+ BVH_STAT_UNALIGNED_COUNT,
+ BVH_STAT_ALIGNED_INNER_COUNT,
+ BVH_STAT_UNALIGNED_INNER_COUNT,
+ BVH_STAT_ALIGNED_INNER_QNODE_COUNT,
+ BVH_STAT_UNALIGNED_INNER_QNODE_COUNT,
+ BVH_STAT_ALIGNED_LEAF_COUNT,
+ BVH_STAT_UNALIGNED_LEAF_COUNT,
};
class BVHParams;
@@ -38,16 +46,41 @@ class BVHParams;
class BVHNode
{
public:
- BVHNode()
+ BVHNode() : m_is_unaligned(false),
+ m_aligned_space(NULL)
{
}
- virtual ~BVHNode() {}
+ virtual ~BVHNode()
+ {
+ delete m_aligned_space;
+ }
+
virtual bool is_leaf() const = 0;
virtual int num_children() const = 0;
virtual BVHNode *get_child(int i) const = 0;
virtual int num_triangles() const { return 0; }
virtual void print(int depth = 0) const = 0;
+ bool is_unaligned() const { return m_is_unaligned; }
+
+ inline void set_aligned_space(const Transform& aligned_space)
+ {
+ m_is_unaligned = true;
+ if (m_aligned_space == NULL) {
+ m_aligned_space = new Transform(aligned_space);
+ }
+ else {
+ *m_aligned_space = aligned_space;
+ }
+ }
+
+ inline Transform get_aligned_space() const
+ {
+ if(m_aligned_space == NULL) {
+ return transform_identity();
+ }
+ return *m_aligned_space;
+ }
BoundBox m_bounds;
uint m_visibility;
@@ -58,12 +91,20 @@ public:
void deleteSubtree();
uint update_visibility();
+
+ bool m_is_unaligned;
+
+ // TODO(sergey): Can be stored as 3x3 matrix, but better to have some
+ // utilities and type defines in util_transform first.
+ Transform *m_aligned_space;
};
class InnerNode : public BVHNode
{
public:
- InnerNode(const BoundBox& bounds, BVHNode* child0, BVHNode* child1)
+ InnerNode(const BoundBox& bounds,
+ BVHNode* child0,
+ BVHNode* child1)
{
m_bounds = bounds;
children[0] = child0;
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
index cf683df1b31..2e698a80742 100644
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -20,6 +20,8 @@
#include "util_boundbox.h"
+#include "kernel_types.h"
+
CCL_NAMESPACE_BEGIN
/* BVH Parameters */
@@ -31,6 +33,9 @@ public:
bool use_spatial_split;
float spatial_split_alpha;
+ /* Unaligned nodes creation threshold */
+ float unaligned_split_threshold;
+
/* SAH costs */
float sah_node_cost;
float sah_primitive_cost;
@@ -46,6 +51,14 @@ public:
/* QBVH */
bool use_qbvh;
+ /* Mask of primitives to be included into the BVH. */
+ int primitive_mask;
+
+ /* Use unaligned bounding boxes.
+ * Only used for curves BVH.
+ */
+ bool use_unaligned_nodes;
+
/* fixed parameters */
enum {
MAX_DEPTH = 64,
@@ -58,6 +71,8 @@ public:
use_spatial_split = true;
spatial_split_alpha = 1e-5f;
+ unaligned_split_threshold = 0.7f;
+
/* todo: see if splitting up primitive cost to be separate for triangles
* and curves can help. so far in tests it doesn't help, but why? */
sah_node_cost = 1.0f;
@@ -69,6 +84,9 @@ public:
top_level = false;
use_qbvh = false;
+ use_unaligned_nodes = false;
+
+ primitive_mask = PRIMITIVE_ALL;
}
/* SAH costs */
diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp
index e9032c61c3b..e5bcf9995bf 100644
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@ -26,23 +26,27 @@ CCL_NAMESPACE_BEGIN
static const int BVH_SORT_THRESHOLD = 4096;
-/* Silly workaround for float extended precision that happens when compiling
- * on x86, due to one float staying in 80 bit precision register and the other
- * not, which causes the strictly weak ordering to break.
- */
-#if !defined(__i386__)
-# define NO_EXTENDED_PRECISION
-#else
-# define NO_EXTENDED_PRECISION volatile
-#endif
-
struct BVHReferenceCompare {
public:
int dim;
+ const BVHUnaligned *unaligned_heuristic;
+ const Transform *aligned_space;
+
+ BVHReferenceCompare(int dim,
+ const BVHUnaligned *unaligned_heuristic,
+ const Transform *aligned_space)
+ : dim(dim),
+ unaligned_heuristic(unaligned_heuristic),
+ aligned_space(aligned_space)
+ {
+ }
- explicit BVHReferenceCompare(int dim_)
+ __forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
{
- dim = dim_;
+ return (aligned_space != NULL)
+ ? unaligned_heuristic->compute_aligned_prim_boundbox(
+ prim, *aligned_space)
+ : prim.bounds();
}
/* Compare two references.
@@ -52,8 +56,10 @@ public:
__forceinline int compare(const BVHReference& ra,
const BVHReference& rb) const
{
- NO_EXTENDED_PRECISION float ca = ra.bounds().min[dim] + ra.bounds().max[dim];
- NO_EXTENDED_PRECISION float cb = rb.bounds().min[dim] + rb.bounds().max[dim];
+ BoundBox ra_bounds = get_prim_bounds(ra),
+ rb_bounds = get_prim_bounds(rb);
+ float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
+ float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
if(ca < cb) return -1;
else if(ca > cb) return 1;
@@ -171,10 +177,15 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
}
}
-void bvh_reference_sort(int start, int end, BVHReference *data, int dim)
+void bvh_reference_sort(int start,
+ int end,
+ BVHReference *data,
+ int dim,
+ const BVHUnaligned *unaligned_heuristic,
+ const Transform *aligned_space)
{
const int count = end - start;
- BVHReferenceCompare compare(dim);
+ BVHReferenceCompare compare(dim, unaligned_heuristic, aligned_space);
if(count < BVH_SORT_THRESHOLD) {
/* It is important to not use any mutex if array is small enough,
* otherwise we end up in situation when we're going to sleep far
diff --git a/intern/cycles/bvh/bvh_sort.h b/intern/cycles/bvh/bvh_sort.h
index 18aafb5f1ff..b49ca02eb60 100644
--- a/intern/cycles/bvh/bvh_sort.h
+++ b/intern/cycles/bvh/bvh_sort.h
@@ -20,7 +20,15 @@
CCL_NAMESPACE_BEGIN
-void bvh_reference_sort(int start, int end, BVHReference *data, int dim);
+class BVHUnaligned;
+struct Transform;
+
+void bvh_reference_sort(int start,
+ int end,
+ BVHReference *data,
+ int dim,
+ const BVHUnaligned *unaligned_heuristic = NULL,
+ const Transform *aligned_space = NULL);
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp
index bf68b41021f..d0d5fbe5a7a 100644
--- a/intern/cycles/bvh/bvh_split.cpp
+++ b/intern/cycles/bvh/bvh_split.cpp
@@ -32,14 +32,18 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange& range,
vector<BVHReference> *references,
- float nodeSAH)
+ float nodeSAH,
+ const BVHUnaligned *unaligned_heuristic,
+ const Transform *aligned_space)
: sah(FLT_MAX),
dim(0),
num_left(0),
left_bounds(BoundBox::empty),
right_bounds(BoundBox::empty),
storage_(storage),
- references_(references)
+ references_(references),
+ unaligned_heuristic_(unaligned_heuristic),
+ aligned_space_(aligned_space)
{
const BVHReference *ref_ptr = &references_->at(range.start());
float min_sah = FLT_MAX;
@@ -51,12 +55,15 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
bvh_reference_sort(range.start(),
range.end(),
&references_->at(0),
- dim);
+ dim,
+ unaligned_heuristic_,
+ aligned_space_);
/* sweep right to left and determine bounds. */
BoundBox right_bounds = BoundBox::empty;
for(int i = range.size() - 1; i > 0; i--) {
- right_bounds.grow(ref_ptr[i].bounds());
+ BoundBox prim_bounds = get_prim_bounds(ref_ptr[i]);
+ right_bounds.grow(prim_bounds);
storage_->right_bounds[i - 1] = right_bounds;
}
@@ -64,7 +71,8 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
BoundBox left_bounds = BoundBox::empty;
for(int i = 1; i < range.size(); i++) {
- left_bounds.grow(ref_ptr[i - 1].bounds());
+ BoundBox prim_bounds = get_prim_bounds(ref_ptr[i - 1]);
+ left_bounds.grow(prim_bounds);
right_bounds = storage_->right_bounds[i - 1];
float sah = nodeSAH +
@@ -88,16 +96,37 @@ void BVHObjectSplit::split(BVHRange& left,
BVHRange& right,
const BVHRange& range)
{
+ assert(references_->size() > 0);
/* sort references according to split */
bvh_reference_sort(range.start(),
range.end(),
&references_->at(0),
- this->dim);
+ this->dim,
+ unaligned_heuristic_,
+ aligned_space_);
+
+ BoundBox effective_left_bounds, effective_right_bounds;
+ const int num_right = range.size() - this->num_left;
+ if(aligned_space_ == NULL) {
+ effective_left_bounds = left_bounds;
+ effective_right_bounds = right_bounds;
+ }
+ else {
+ effective_left_bounds = BoundBox::empty;
+ effective_right_bounds = BoundBox::empty;
+ for(int i = 0; i < this->num_left; ++i) {
+ BoundBox prim_boundbox = references_->at(range.start() + i).bounds();
+ effective_left_bounds.grow(prim_boundbox);
+ }
+ for(int i = 0; i < num_right; ++i) {
+ BoundBox prim_boundbox = references_->at(range.start() + this->num_left + i).bounds();
+ effective_right_bounds.grow(prim_boundbox);
+ }
+ }
/* split node ranges */
- left = BVHRange(this->left_bounds, range.start(), this->num_left);
- right = BVHRange(this->right_bounds, left.end(), range.size() - this->num_left);
-
+ left = BVHRange(effective_left_bounds, range.start(), this->num_left);
+ right = BVHRange(effective_right_bounds, left.end(), num_right);
}
/* Spatial Split */
@@ -106,16 +135,31 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
BVHSpatialStorage *storage,
const BVHRange& range,
vector<BVHReference> *references,
- float nodeSAH)
+ float nodeSAH,
+ const BVHUnaligned *unaligned_heuristic,
+ const Transform *aligned_space)
: sah(FLT_MAX),
dim(0),
pos(0.0f),
storage_(storage),
- references_(references)
+ references_(references),
+ unaligned_heuristic_(unaligned_heuristic),
+ aligned_space_(aligned_space)
{
/* initialize bins. */
- float3 origin = range.bounds().min;
- float3 binSize = (range.bounds().max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
+ BoundBox range_bounds;
+ if(aligned_space == NULL) {
+ range_bounds = range.bounds();
+ }
+ else {
+ range_bounds = unaligned_heuristic->compute_aligned_boundbox(
+ range,
+ &references->at(0),
+ *aligned_space);
+ }
+
+ float3 origin = range_bounds.min;
+ float3 binSize = (range_bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
float3 invBinSize = 1.0f / binSize;
for(int dim = 0; dim < 3; dim++) {
@@ -131,8 +175,9 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
/* chop references into bins. */
for(unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) {
const BVHReference& ref = references_->at(refIdx);
- float3 firstBinf = (ref.bounds().min - origin) * invBinSize;
- float3 lastBinf = (ref.bounds().max - origin) * invBinSize;
+ BoundBox prim_bounds = get_prim_bounds(ref);
+ float3 firstBinf = (prim_bounds.min - origin) * invBinSize;
+ float3 lastBinf = (prim_bounds.max - origin) * invBinSize;
int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z);
int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z);
@@ -140,7 +185,10 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
for(int dim = 0; dim < 3; dim++) {
- BVHReference currRef = ref;
+ BVHReference currRef(get_prim_bounds(ref),
+ ref.prim_index(),
+ ref.prim_object(),
+ ref.prim_type());
for(int i = firstBin[dim]; i < lastBin[dim]; i++) {
BVHReference leftRef, rightRef;
@@ -209,14 +257,15 @@ void BVHSpatialSplit::split(BVHBuild *builder,
BoundBox right_bounds = BoundBox::empty;
for(int i = left_end; i < right_start; i++) {
- if(refs[i].bounds().max[this->dim] <= this->pos) {
+ BoundBox prim_bounds = get_prim_bounds(refs[i]);
+ if(prim_bounds.max[this->dim] <= this->pos) {
/* entirely on the left-hand side */
- left_bounds.grow(refs[i].bounds());
+ left_bounds.grow(prim_bounds);
swap(refs[i], refs[left_end++]);
}
- else if(refs[i].bounds().min[this->dim] >= this->pos) {
+ else if(prim_bounds.min[this->dim] >= this->pos) {
/* entirely on the right-hand side */
- right_bounds.grow(refs[i].bounds());
+ right_bounds.grow(prim_bounds);
swap(refs[i--], refs[--right_start]);
}
}
@@ -231,8 +280,12 @@ void BVHSpatialSplit::split(BVHBuild *builder,
new_refs.reserve(right_start - left_end);
while(left_end < right_start) {
/* split reference. */
+ BVHReference curr_ref(get_prim_bounds(refs[left_end]),
+ refs[left_end].prim_index(),
+ refs[left_end].prim_object(),
+ refs[left_end].prim_type());
BVHReference lref, rref;
- split_reference(*builder, lref, rref, refs[left_end], this->dim, this->pos);
+ split_reference(*builder, lref, rref, curr_ref, this->dim, this->pos);
/* compute SAH for duplicate/unsplit candidates. */
BoundBox lub = left_bounds; // Unsplit to left: new left-hand bounds.
@@ -240,8 +293,8 @@ void BVHSpatialSplit::split(BVHBuild *builder,
BoundBox ldb = left_bounds; // Duplicate: new left-hand bounds.
BoundBox rdb = right_bounds; // Duplicate: new right-hand bounds.
- lub.grow(refs[left_end].bounds());
- rub.grow(refs[left_end].bounds());
+ lub.grow(curr_ref.bounds());
+ rub.grow(curr_ref.bounds());
ldb.grow(lref.bounds());
rdb.grow(rref.bounds());
@@ -280,6 +333,17 @@ void BVHSpatialSplit::split(BVHBuild *builder,
new_refs.begin(),
new_refs.end());
}
+ if(aligned_space_ != NULL) {
+ left_bounds = right_bounds = BoundBox::empty;
+ for(int i = left_start; i < left_end - left_start; ++i) {
+ BoundBox prim_boundbox = references_->at(i).bounds();
+ left_bounds.grow(prim_boundbox);
+ }
+ for(int i = right_start; i < right_end - right_start; ++i) {
+ BoundBox prim_boundbox = references_->at(i).bounds();
+ right_bounds.grow(prim_boundbox);
+ }
+ }
left = BVHRange(left_bounds, left_start, left_end - left_start);
right = BVHRange(right_bounds, right_start, right_end - right_start);
}
@@ -295,11 +359,13 @@ void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
Mesh::Triangle t = mesh->get_triangle(prim_index);
const float3 *verts = &mesh->verts[0];
float3 v1 = tfm ? transform_point(tfm, verts[t.v[2]]) : verts[t.v[2]];
+ v1 = get_unaligned_point(v1);
for(int i = 0; i < 3; i++) {
float3 v0 = v1;
int vindex = t.v[i];
v1 = tfm ? transform_point(tfm, verts[vindex]) : verts[vindex];
+ v1 = get_unaligned_point(v1);
float v0p = v0[dim];
float v1p = v1[dim];
@@ -339,6 +405,8 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
v0 = transform_point(tfm, v0);
v1 = transform_point(tfm, v1);
}
+ v0 = get_unaligned_point(v0);
+ v1 = get_unaligned_point(v1);
float v0p = v0[dim];
float v1p = v1[dim];
@@ -473,6 +541,7 @@ void BVHSpatialSplit::split_reference(const BVHBuild& builder,
/* intersect with original bounds. */
left_bounds.max[dim] = pos;
right_bounds.min[dim] = pos;
+
left_bounds.intersect(ref.bounds());
right_bounds.intersect(ref.bounds());
diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h
index aea8b2565e0..dbdb51f1a5b 100644
--- a/intern/cycles/bvh/bvh_split.h
+++ b/intern/cycles/bvh/bvh_split.h
@@ -24,6 +24,7 @@
CCL_NAMESPACE_BEGIN
class BVHBuild;
+struct Transform;
/* Object Split */
@@ -41,7 +42,9 @@ public:
BVHSpatialStorage *storage,
const BVHRange& range,
vector<BVHReference> *references,
- float nodeSAH);
+ float nodeSAH,
+ const BVHUnaligned *unaligned_heuristic = NULL,
+ const Transform *aligned_space = NULL);
void split(BVHRange& left,
BVHRange& right,
@@ -50,6 +53,19 @@ public:
protected:
BVHSpatialStorage *storage_;
vector<BVHReference> *references_;
+ const BVHUnaligned *unaligned_heuristic_;
+ const Transform *aligned_space_;
+
+ __forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
+ {
+ if(aligned_space_ == NULL) {
+ return prim.bounds();
+ }
+ else {
+ return unaligned_heuristic_->compute_aligned_prim_boundbox(
+ prim, *aligned_space_);
+ }
+ }
};
/* Spatial Split */
@@ -70,7 +86,9 @@ public:
BVHSpatialStorage *storage,
const BVHRange& range,
vector<BVHReference> *references,
- float nodeSAH);
+ float nodeSAH,
+ const BVHUnaligned *unaligned_heuristic = NULL,
+ const Transform *aligned_space = NULL);
void split(BVHBuild *builder,
BVHRange& left,
@@ -87,6 +105,8 @@ public:
protected:
BVHSpatialStorage *storage_;
vector<BVHReference> *references_;
+ const BVHUnaligned *unaligned_heuristic_;
+ const Transform *aligned_space_;
/* Lower-level functions which calculates boundaries of left and right nodes
* needed for spatial split.
@@ -132,6 +152,27 @@ protected:
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
+
+ __forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
+ {
+ if(aligned_space_ == NULL) {
+ return prim.bounds();
+ }
+ else {
+ return unaligned_heuristic_->compute_aligned_prim_boundbox(
+ prim, *aligned_space_);
+ }
+ }
+
+ __forceinline float3 get_unaligned_point(const float3& point) const
+ {
+ if(aligned_space_ == NULL) {
+ return point;
+ }
+ else {
+ return transform_point(aligned_space_, point);
+ }
+ }
};
/* Mixed Object-Spatial Split */
@@ -148,19 +189,40 @@ public:
bool no_split;
+ BoundBox bounds;
+
+ BVHMixedSplit() {}
+
__forceinline BVHMixedSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange& range,
vector<BVHReference> *references,
- int level)
+ int level,
+ const BVHUnaligned *unaligned_heuristic = NULL,
+ const Transform *aligned_space = NULL)
{
+ if(aligned_space == NULL) {
+ bounds = range.bounds();
+ }
+ else {
+ bounds = unaligned_heuristic->compute_aligned_boundbox(
+ range,
+ &references->at(0),
+ *aligned_space);
+ }
/* find split candidates. */
- float area = range.bounds().safe_area();
+ float area = bounds.safe_area();
leafSAH = area * builder->params.primitive_cost(range.size());
nodeSAH = area * builder->params.node_cost(2);
- object = BVHObjectSplit(builder, storage, range, references, nodeSAH);
+ object = BVHObjectSplit(builder,
+ storage,
+ range,
+ references,
+ nodeSAH,
+ unaligned_heuristic,
+ aligned_space);
if(builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
BoundBox overlap = object.left_bounds;
@@ -171,7 +233,9 @@ public:
storage,
range,
references,
- nodeSAH);
+ nodeSAH,
+ unaligned_heuristic,
+ aligned_space);
}
}
@@ -181,7 +245,10 @@ public:
builder->range_within_max_leaf_size(range, *references));
}
- __forceinline void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range)
+ __forceinline void split(BVHBuild *builder,
+ BVHRange& left,
+ BVHRange& right,
+ const BVHRange& range)
{
if(builder->params.use_spatial_split && minSAH == spatial.sah)
spatial.split(builder, left, right, range);
@@ -193,4 +260,3 @@ public:
CCL_NAMESPACE_END
#endif /* __BVH_SPLIT_H__ */
-
diff --git a/intern/cycles/bvh/bvh_unaligned.cpp b/intern/cycles/bvh/bvh_unaligned.cpp
new file mode 100644
index 00000000000..a876c670914
--- /dev/null
+++ b/intern/cycles/bvh/bvh_unaligned.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "bvh_unaligned.h"
+
+#include "mesh.h"
+#include "object.h"
+
+#include "bvh_binning.h"
+#include "bvh_params.h"
+
+#include "util_boundbox.h"
+#include "util_debug.h"
+#include "util_transform.h"
+
+CCL_NAMESPACE_BEGIN
+
+
+BVHUnaligned::BVHUnaligned(const vector<Object*>& objects)
+ : objects_(objects)
+{
+}
+
+Transform BVHUnaligned::compute_aligned_space(
+ const BVHObjectBinning& range,
+ const BVHReference *references) const
+{
+ for(int i = range.start(); i < range.end(); ++i) {
+ const BVHReference& ref = references[i];
+ Transform aligned_space;
+ /* Use first primitive which defines correct direction to define
+ * the orientation space.
+ */
+ if(compute_aligned_space(ref, &aligned_space)) {
+ return aligned_space;
+ }
+ }
+ return transform_identity();
+}
+
+Transform BVHUnaligned::compute_aligned_space(
+ const BVHRange& range,
+ const BVHReference *references) const
+{
+ for(int i = range.start(); i < range.end(); ++i) {
+ const BVHReference& ref = references[i];
+ Transform aligned_space;
+ /* Use first primitive which defines correct direction to define
+ * the orientation space.
+ */
+ if(compute_aligned_space(ref, &aligned_space)) {
+ return aligned_space;
+ }
+ }
+ return transform_identity();
+}
+
+bool BVHUnaligned::compute_aligned_space(const BVHReference& ref,
+ Transform *aligned_space) const
+{
+ const Object *object = objects_[ref.prim_object()];
+ const int packed_type = ref.prim_type();
+ const int type = (packed_type & PRIMITIVE_ALL);
+ if(type & PRIMITIVE_CURVE) {
+ const int curve_index = ref.prim_index();
+ const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
+ const Mesh *mesh = object->mesh;
+ const Mesh::Curve& curve = mesh->get_curve(curve_index);
+ const int key = curve.first_key + segment;
+ const float3 v1 = mesh->curve_keys[key],
+ v2 = mesh->curve_keys[key + 1];
+ float length;
+ const float3 axis = normalize_len(v2 - v1, &length);
+ if(length > 1e-6f) {
+ *aligned_space = make_transform_frame(axis);
+ return true;
+ }
+ }
+ *aligned_space = transform_identity();
+ return false;
+}
+
+BoundBox BVHUnaligned::compute_aligned_prim_boundbox(
+ const BVHReference& prim,
+ const Transform& aligned_space) const
+{
+ BoundBox bounds = BoundBox::empty;
+ const Object *object = objects_[prim.prim_object()];
+ const int packed_type = prim.prim_type();
+ const int type = (packed_type & PRIMITIVE_ALL);
+ if(type & PRIMITIVE_CURVE) {
+ const int curve_index = prim.prim_index();
+ const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
+ const Mesh *mesh = object->mesh;
+ const Mesh::Curve& curve = mesh->get_curve(curve_index);
+ curve.bounds_grow(segment,
+ &mesh->curve_keys[0],
+ &mesh->curve_radius[0],
+ aligned_space,
+ bounds);
+ }
+ else {
+ bounds = prim.bounds().transformed(&aligned_space);
+ }
+ return bounds;
+}
+
+BoundBox BVHUnaligned::compute_aligned_boundbox(
+ const BVHObjectBinning& range,
+ const BVHReference *references,
+ const Transform& aligned_space,
+ BoundBox *cent_bounds) const
+{
+ BoundBox bounds = BoundBox::empty;
+ if(cent_bounds != NULL) {
+ *cent_bounds = BoundBox::empty;
+ }
+ for(int i = range.start(); i < range.end(); ++i) {
+ const BVHReference& ref = references[i];
+ BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
+ bounds.grow(ref_bounds);
+ if(cent_bounds != NULL) {
+ cent_bounds->grow(ref_bounds.center2());
+ }
+ }
+ return bounds;
+}
+
+BoundBox BVHUnaligned::compute_aligned_boundbox(
+ const BVHRange& range,
+ const BVHReference *references,
+ const Transform& aligned_space,
+ BoundBox *cent_bounds) const
+{
+ BoundBox bounds = BoundBox::empty;
+ if(cent_bounds != NULL) {
+ *cent_bounds = BoundBox::empty;
+ }
+ for(int i = range.start(); i < range.end(); ++i) {
+ const BVHReference& ref = references[i];
+ BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
+ bounds.grow(ref_bounds);
+ if(cent_bounds != NULL) {
+ cent_bounds->grow(ref_bounds.center2());
+ }
+ }
+ return bounds;
+}
+
+Transform BVHUnaligned::compute_node_transform(
+ const BoundBox& bounds,
+ const Transform& aligned_space)
+{
+ Transform space = aligned_space;
+ space.x.w -= bounds.min.x;
+ space.y.w -= bounds.min.y;
+ space.z.w -= bounds.min.z;
+ float3 dim = bounds.max - bounds.min;
+ return transform_scale(1.0f / max(1e-18f, dim.x),
+ 1.0f / max(1e-18f, dim.y),
+ 1.0f / max(1e-18f, dim.z)) * space;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_unaligned.h b/intern/cycles/bvh/bvh_unaligned.h
new file mode 100644
index 00000000000..4d0872f4a39
--- /dev/null
+++ b/intern/cycles/bvh/bvh_unaligned.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_UNALIGNED_H__
+#define __BVH_UNALIGNED_H__
+
+#include "util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BoundBox;
+class BVHObjectBinning;
+class BVHRange;
+class BVHReference;
+struct Transform;
+class Object;
+
+/* Helper class to perform calculations needed for unaligned nodes. */
+class BVHUnaligned {
+public:
+ BVHUnaligned(const vector<Object*>& objects);
+
+ /* Calculate alignment for the oriented node for a given range. */
+ Transform compute_aligned_space(
+ const BVHObjectBinning& range,
+ const BVHReference *references) const;
+ Transform compute_aligned_space(
+ const BVHRange& range,
+ const BVHReference *references) const;
+
+ /* Calculate alignment for the oriented node for a given reference.
+ *
+ * Return true when space was calculated successfully.
+ */
+ bool compute_aligned_space(const BVHReference& ref,
+ Transform *aligned_space) const;
+
+ /* Calculate primitive's bounding box in given space. */
+ BoundBox compute_aligned_prim_boundbox(
+ const BVHReference& prim,
+ const Transform& aligned_space) const;
+
+ /* Calculate bounding box in given space. */
+ BoundBox compute_aligned_boundbox(
+ const BVHObjectBinning& range,
+ const BVHReference *references,
+ const Transform& aligned_space,
+ BoundBox *cent_bounds = NULL) const;
+ BoundBox compute_aligned_boundbox(
+ const BVHRange& range,
+ const BVHReference *references,
+ const Transform& aligned_space,
+ BoundBox *cent_bounds = NULL) const;
+
+ /* Calculate affine transform for node packing.
+ * Bounds will be in the range of 0..1.
+ */
+ static Transform compute_node_transform(const BoundBox& bounds,
+ const Transform& aligned_space);
+protected:
+ /* List of objects BVH is being created for. */
+ const vector<Object*>& objects_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_UNALIGNED_H__ */
+
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f0adbc03e22..bd3969b2889 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -28,6 +28,22 @@ set(SRC
kernels/cuda/kernel.cu
)
+set(SRC_BVH_HEADERS
+ bvh/bvh.h
+ bvh/bvh_nodes.h
+ bvh/bvh_shadow_all.h
+ bvh/bvh_subsurface.h
+ bvh/bvh_traversal.h
+ bvh/bvh_volume.h
+ bvh/bvh_volume_all.h
+ bvh/qbvh_nodes.h
+ bvh/qbvh_shadow_all.h
+ bvh/qbvh_subsurface.h
+ bvh/qbvh_traversal.h
+ bvh/qbvh_volume.h
+ bvh/qbvh_volume_all.h
+)
+
set(SRC_HEADERS
kernel_accumulate.h
kernel_bake.h
@@ -140,23 +156,11 @@ set(SRC_SVM_HEADERS
set(SRC_GEOM_HEADERS
geom/geom.h
geom/geom_attribute.h
- geom/geom_bvh.h
- geom/geom_bvh_shadow.h
- geom/geom_bvh_subsurface.h
- geom/geom_bvh_traversal.h
- geom/geom_bvh_volume.h
- geom/geom_bvh_volume_all.h
geom/geom_curve.h
geom/geom_motion_curve.h
geom/geom_motion_triangle.h
geom/geom_object.h
geom/geom_primitive.h
- geom/geom_qbvh.h
- geom/geom_qbvh_shadow.h
- geom/geom_qbvh_subsurface.h
- geom/geom_qbvh_traversal.h
- geom/geom_qbvh_volume.h
- geom/geom_qbvh_volume_all.h
geom/geom_triangle.h
geom/geom_triangle_intersect.h
geom/geom_volume.h
@@ -212,7 +216,14 @@ if(WITH_CYCLES_CUDA_BINARIES)
endif()
# build for each arch
- set(cuda_sources kernels/cuda/kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
+ set(cuda_sources kernels/cuda/kernel.cu
+ ${SRC_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ )
set(cuda_cubins)
macro(CYCLES_CUDA_KERNEL_ADD arch experimental)
@@ -312,6 +323,7 @@ add_library(cycles_kernel
${SRC}
${SRC_HEADERS}
${SRC_KERNELS_CPU_HEADERS}
+ ${SRC_BVH_HEADERS}
${SRC_CLOSURE_HEADERS}
${SRC_SVM_HEADERS}
${SRC_GEOM_HEADERS}
@@ -346,6 +358,7 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteratio
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/bvh)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/geom)
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/bvh/bvh.h
index d0eedd3396a..59881738195 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -35,6 +35,13 @@ CCL_NAMESPACE_BEGIN
# define ccl_device_intersect ccl_device_inline
#endif
+/* bottom-most stack entry, indicating the end of traversal */
+#define ENTRYPOINT_SENTINEL 0x76543210
+
+/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
+#define BVH_STACK_SIZE 192
+#define BVH_QSTACK_SIZE 384
+
/* BVH intersection function variations */
#define BVH_INSTANCING 1
@@ -72,71 +79,73 @@ CCL_NAMESPACE_BEGIN
/* Common QBVH functions. */
#ifdef __QBVH__
-# include "geom_qbvh.h"
+# include "qbvh_nodes.h"
#endif
/* Regular BVH traversal */
+#include "bvh_nodes.h"
+
#define BVH_FUNCTION_NAME bvh_intersect
#define BVH_FUNCTION_FEATURES 0
-#include "geom_bvh_traversal.h"
+#include "bvh_traversal.h"
#if defined(__INSTANCING__)
# define BVH_FUNCTION_NAME bvh_intersect_instancing
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "geom_bvh_traversal.h"
+# include "bvh_traversal.h"
#endif
#if defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_hair
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
-# include "geom_bvh_traversal.h"
+# include "bvh_traversal.h"
#endif
#if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-# include "geom_bvh_traversal.h"
+# include "bvh_traversal.h"
#endif
#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
-# include "geom_bvh_traversal.h"
+# include "bvh_traversal.h"
#endif
/* Subsurface scattering BVH traversal */
#if defined(__SUBSURFACE__)
# define BVH_FUNCTION_NAME bvh_intersect_subsurface
-# define BVH_FUNCTION_FEATURES 0
-# include "geom_bvh_subsurface.h"
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION
-# include "geom_bvh_subsurface.h"
+# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR
+# include "bvh_subsurface.h"
#endif
/* Volume BVH traversal */
#if defined(__VOLUME__)
# define BVH_FUNCTION_NAME bvh_intersect_volume
-# define BVH_FUNCTION_FEATURES 0
-# include "geom_bvh_volume.h"
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "bvh_volume.h"
#endif
#if defined(__VOLUME__) && defined(__INSTANCING__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "geom_bvh_volume.h"
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+# include "bvh_volume.h"
#endif
#if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-# include "geom_bvh_volume.h"
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
+# include "bvh_volume.h"
#endif
/* Record all intersections - Shadow BVH traversal */
@@ -144,51 +153,51 @@ CCL_NAMESPACE_BEGIN
#if defined(__SHADOW_RECORD_ALL__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
# define BVH_FUNCTION_FEATURES 0
-# include "geom_bvh_shadow.h"
+# include "bvh_shadow_all.h"
#endif
#if defined(__SHADOW_RECORD_ALL__) && defined(__INSTANCING__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "geom_bvh_shadow.h"
+# include "bvh_shadow_all.h"
#endif
#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
-# include "geom_bvh_shadow.h"
+# include "bvh_shadow_all.h"
#endif
#if defined(__SHADOW_RECORD_ALL__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-# include "geom_bvh_shadow.h"
+# include "bvh_shadow_all.h"
#endif
#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
-# include "geom_bvh_shadow.h"
+# include "bvh_shadow_all.h"
#endif
/* Record all intersections - Volume BVH traversal */
#if defined(__VOLUME_RECORD_ALL__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_all
-# define BVH_FUNCTION_FEATURES 0
-# include "geom_bvh_volume_all.h"
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "bvh_volume_all.h"
#endif
#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "geom_bvh_volume_all.h"
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+# include "bvh_volume_all.h"
#endif
#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-# include "geom_bvh_volume_all.h"
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
+# include "bvh_volume_all.h"
#endif
#undef BVH_FEATURE
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
new file mode 100644
index 00000000000..db2275b0ff8
--- /dev/null
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -0,0 +1,656 @@
+/*
+ * Copyright 2011-2016, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
+// 3-vector which might be faster.
+ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
+ int node_addr,
+ int child)
+{
+ Transform space;
+ const int child_addr = node_addr + child * 3;
+ space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
+ space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
+ space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
+ space.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+ return space;
+}
+
+#if !defined(__KERNEL_SSE2__)
+ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
+ const float3 P,
+ const float3 idir,
+ const float t,
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+
+ /* fetch node data */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
+
+ /* intersect ray against child nodes */
+ float c0lox = (node0.x - P.x) * idir.x;
+ float c0hix = (node0.z - P.x) * idir.x;
+ float c0loy = (node1.x - P.y) * idir.y;
+ float c0hiy = (node1.z - P.y) * idir.y;
+ float c0loz = (node2.x - P.z) * idir.z;
+ float c0hiz = (node2.z - P.z) * idir.z;
+ float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+ float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+ float c1lox = (node0.y - P.x) * idir.x;
+ float c1hix = (node0.w - P.x) * idir.x;
+ float c1loy = (node1.y - P.y) * idir.y;
+ float c1hiy = (node1.w - P.y) * idir.y;
+ float c1loz = (node2.y - P.z) * idir.z;
+ float c1hiz = (node2.w - P.z) * idir.z;
+ float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+ float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+ dist[0] = c0min;
+ dist[1] = c1min;
+
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
+#else
+ return ((c0max >= c0min)? 1: 0) |
+ ((c1max >= c1min)? 2: 0);
+#endif
+}
+
+ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
+ const float3 P,
+ const float3 idir,
+ const float t,
+ const float difl,
+ const float extmax,
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+
+ /* fetch node data */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
+
+ /* intersect ray against child nodes */
+ float c0lox = (node0.x - P.x) * idir.x;
+ float c0hix = (node0.z - P.x) * idir.x;
+ float c0loy = (node1.x - P.y) * idir.y;
+ float c0hiy = (node1.z - P.y) * idir.y;
+ float c0loz = (node2.x - P.z) * idir.z;
+ float c0hiz = (node2.z - P.z) * idir.z;
+ float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+ float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+ float c1lox = (node0.y - P.x) * idir.x;
+ float c1hix = (node0.w - P.x) * idir.x;
+ float c1loy = (node1.y - P.y) * idir.y;
+ float c1hiy = (node1.w - P.y) * idir.y;
+ float c1loz = (node2.y - P.z) * idir.z;
+ float c1hiz = (node2.w - P.z) * idir.z;
+ float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+ float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+ if(difl != 0.0f) {
+ float hdiff = 1.0f + difl;
+ float ldiff = 1.0f - difl;
+ if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
+ c0min = max(ldiff * c0min, c0min - extmax);
+ c0max = min(hdiff * c0max, c0max + extmax);
+ }
+ if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
+ c1min = max(ldiff * c1min, c1min - extmax);
+ c1max = min(hdiff * c1max, c1max + extmax);
+ }
+ }
+
+ dist[0] = c0min;
+ dist[1] = c1min;
+
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
+#else
+ return ((c0max >= c0min)? 1: 0) |
+ ((c1max >= c1min)? 2: 0);
+#endif
+}
+
+ccl_device_inline bool bvh_unaligned_node_intersect_child(
+ KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float t,
+ int node_addr,
+ int child,
+ float dist[2])
+{
+ Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
+ float3 aligned_dir = transform_direction(&space, dir);
+ float3 aligned_P = transform_point(&space, P);
+ float3 nrdir = -bvh_inverse_direction(aligned_dir);
+ float3 lower_xyz = aligned_P * nrdir;
+ float3 upper_xyz = lower_xyz - nrdir;
+ const float near_x = min(lower_xyz.x, upper_xyz.x);
+ const float near_y = min(lower_xyz.y, upper_xyz.y);
+ const float near_z = min(lower_xyz.z, upper_xyz.z);
+ const float far_x = max(lower_xyz.x, upper_xyz.x);
+ const float far_y = max(lower_xyz.y, upper_xyz.y);
+ const float far_z = max(lower_xyz.z, upper_xyz.z);
+ const float tnear = max4(0.0f, near_x, near_y, near_z);
+ const float tfar = min4(t, far_x, far_y, far_z);
+ *dist = tnear;
+ return tnear <= tfar;
+}
+
+ccl_device_inline bool bvh_unaligned_node_intersect_child_robust(
+ KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float t,
+ const float difl,
+ int node_addr,
+ int child,
+ float dist[2])
+{
+ Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
+ float3 aligned_dir = transform_direction(&space, dir);
+ float3 aligned_P = transform_point(&space, P);
+ float3 nrdir = -bvh_inverse_direction(aligned_dir);
+ float3 tLowerXYZ = aligned_P * nrdir;
+ float3 tUpperXYZ = tLowerXYZ - nrdir;
+ const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
+ const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
+ const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
+ const float far_x = max(tLowerXYZ.x, tUpperXYZ.x);
+ const float far_y = max(tLowerXYZ.y, tUpperXYZ.y);
+ const float far_z = max(tLowerXYZ.z, tUpperXYZ.z);
+ const float tnear = max4(0.0f, near_x, near_y, near_z);
+ const float tfar = min4(t, far_x, far_y, far_z);
+ *dist = tnear;
+ if(difl != 0.0f) {
+ /* TODO(sergey): Same as for QBVH, needs a proper use. */
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ return round_down*tnear <= round_up*tfar;
+ }
+ else {
+ return tnear <= tfar;
+ }
+}
+
+ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float3 idir,
+ const float t,
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+ int mask = 0;
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
+#ifdef __VISIBILITY_FLAG__
+ if((__float_as_uint(cnodes.x) & visibility))
+#endif
+ {
+ mask |= 1;
+ }
+ }
+ if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
+#ifdef __VISIBILITY_FLAG__
+ if((__float_as_uint(cnodes.y) & visibility))
+#endif
+ {
+ mask |= 2;
+ }
+ }
+ return mask;
+}
+
+ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float3 idir,
+ const float t,
+ const float difl,
+ const float extmax,
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+ int mask = 0;
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
+#ifdef __VISIBILITY_FLAG__
+ if((__float_as_uint(cnodes.x) & visibility))
+#endif
+ {
+ mask |= 1;
+ }
+ }
+ if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
+#ifdef __VISIBILITY_FLAG__
+ if((__float_as_uint(cnodes.y) & visibility))
+#endif
+ {
+ mask |= 2;
+ }
+ }
+ return mask;
+}
+
+ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float3 idir,
+ const float t,
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect(kg,
+ P,
+ dir,
+ idir,
+ t,
+ node_addr,
+ visibility,
+ dist);
+ }
+ else {
+ return bvh_aligned_node_intersect(kg,
+ P,
+ idir,
+ t,
+ node_addr,
+ visibility,
+ dist);
+ }
+}
+
+ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const float3 idir,
+ const float t,
+ const float difl,
+ const float extmax,
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect_robust(kg,
+ P,
+ dir,
+ idir,
+ t,
+ difl,
+ extmax,
+ node_addr,
+ visibility,
+ dist);
+ }
+ else {
+ return bvh_aligned_node_intersect_robust(kg,
+ P,
+ idir,
+ t,
+ difl,
+ extmax,
+ node_addr,
+ visibility,
+ dist);
+ }
+}
+#else /* !defined(__KERNEL_SSE2__) */
+
+int ccl_device_inline bvh_aligned_node_intersect(
+ KernelGlobals *kg,
+ const float3& P,
+ const float3& dir,
+ const ssef& tsplat,
+ const ssef Psplat[3],
+ const ssef idirsplat[3],
+ const shuffle_swap_t shufflexyz[3],
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+
+ /* fetch node data */
+ const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + node_addr;
+
+ /* intersect ray against child nodes */
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+
+ dist[0] = tminmax[0];
+ dist[1] = tminmax[1];
+
+ int mask = movemask(lrhit);
+
+# ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
+ return cmask;
+# else
+ return mask & 3;
+# endif
+}
+
+int ccl_device_inline bvh_aligned_node_intersect_robust(
+ KernelGlobals *kg,
+ const float3& P,
+ const float3& dir,
+ const ssef& tsplat,
+ const ssef Psplat[3],
+ const ssef idirsplat[3],
+ const shuffle_swap_t shufflexyz[3],
+ const float difl,
+ const float extmax,
+ const int nodeAddr,
+ const uint visibility,
+ float dist[2])
+{
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+
+ /* fetch node data */
+ const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
+
+ /* intersect ray against child nodes */
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+
+ if(difl != 0.0f) {
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+ float4 *tminmaxview = (float4*)&tminmax;
+ float& c0min = tminmaxview->x, &c1min = tminmaxview->y;
+ float& c0max = tminmaxview->z, &c1max = tminmaxview->w;
+ float hdiff = 1.0f + difl;
+ float ldiff = 1.0f - difl;
+ if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
+ c0min = max(ldiff * c0min, c0min - extmax);
+ c0max = min(hdiff * c0max, c0max + extmax);
+ }
+ if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
+ c1min = max(ldiff * c1min, c1min - extmax);
+ c1max = min(hdiff * c1max, c1max + extmax);
+ }
+ }
+
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+
+ dist[0] = tminmax[0];
+ dist[1] = tminmax[1];
+
+ int mask = movemask(lrhit);
+
+# ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
+ return cmask;
+# else
+ return mask & 3;
+# endif
+}
+
+int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const ssef& isect_near,
+ const ssef& isect_far,
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+ Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
+ Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
+
+ float3 aligned_dir0 = transform_direction(&space0, dir),
+ aligned_dir1 = transform_direction(&space1, dir);;
+ float3 aligned_P0 = transform_point(&space0, P),
+ aligned_P1 = transform_point(&space1, P);
+ float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
+ nrdir1 = -bvh_inverse_direction(aligned_dir1);
+
+ ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
+ aligned_P1.x * nrdir1.x,
+ 0.0f, 0.0f),
+ lower_y = ssef(aligned_P0.y * nrdir0.y,
+ aligned_P1.y * nrdir1.y,
+ 0.0f,
+ 0.0f),
+ lower_z = ssef(aligned_P0.z * nrdir0.z,
+ aligned_P1.z * nrdir1.z,
+ 0.0f,
+ 0.0f);
+
+ ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
+ upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
+ upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
+
+ ssef tnear_x = min(lower_x, upper_x);
+ ssef tnear_y = min(lower_y, upper_y);
+ ssef tnear_z = min(lower_z, upper_z);
+ ssef tfar_x = max(lower_x, upper_x);
+ ssef tfar_y = max(lower_y, upper_y);
+ ssef tfar_z = max(lower_z, upper_z);
+
+ const ssef tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
+ const ssef tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
+ sseb vmask = tnear <= tfar;
+ dist[0] = tnear.f[0];
+ dist[1] = tnear.f[1];
+
+ int mask = (int)movemask(vmask);
+
+# ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
+ return cmask;
+# else
+ return mask & 3;
+# endif
+}
+
+int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
+ const float3 P,
+ const float3 dir,
+ const ssef& isect_near,
+ const ssef& isect_far,
+ const float difl,
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+ Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
+ Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
+
+ float3 aligned_dir0 = transform_direction(&space0, dir),
+ aligned_dir1 = transform_direction(&space1, dir);;
+ float3 aligned_P0 = transform_point(&space0, P),
+ aligned_P1 = transform_point(&space1, P);
+ float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
+ nrdir1 = -bvh_inverse_direction(aligned_dir1);
+
+ ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
+ aligned_P1.x * nrdir1.x,
+ 0.0f, 0.0f),
+ lower_y = ssef(aligned_P0.y * nrdir0.y,
+ aligned_P1.y * nrdir1.y,
+ 0.0f,
+ 0.0f),
+ lower_z = ssef(aligned_P0.z * nrdir0.z,
+ aligned_P1.z * nrdir1.z,
+ 0.0f,
+ 0.0f);
+
+ ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
+ upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
+ upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
+
+ ssef tnear_x = min(lower_x, upper_x);
+ ssef tnear_y = min(lower_y, upper_y);
+ ssef tnear_z = min(lower_z, upper_z);
+ ssef tfar_x = max(lower_x, upper_x);
+ ssef tfar_y = max(lower_y, upper_y);
+ ssef tfar_z = max(lower_z, upper_z);
+
+ const ssef tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
+ const ssef tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
+ sseb vmask;
+ if(difl != 0.0f) {
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ vmask = round_down*tnear <= round_up*tfar;
+ }
+ else {
+ vmask = tnear <= tfar;
+ }
+
+ dist[0] = tnear.f[0];
+ dist[1] = tnear.f[1];
+
+ int mask = (int)movemask(vmask);
+
+# ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+ (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
+ return cmask;
+# else
+ return mask & 3;
+# endif
+}
+
+ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
+ const float3& P,
+ const float3& dir,
+ const ssef& isect_near,
+ const ssef& isect_far,
+ const ssef& tsplat,
+ const ssef Psplat[3],
+ const ssef idirsplat[3],
+ const shuffle_swap_t shufflexyz[3],
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect(kg,
+ P,
+ dir,
+ isect_near,
+ isect_far,
+ node_addr,
+ visibility,
+ dist);
+ }
+ else {
+ return bvh_aligned_node_intersect(kg,
+ P,
+ dir,
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ visibility,
+ dist);
+ }
+}
+
+ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
+ const float3& P,
+ const float3& dir,
+ const ssef& isect_near,
+ const ssef& isect_far,
+ const ssef& tsplat,
+ const ssef Psplat[3],
+ const ssef idirsplat[3],
+ const shuffle_swap_t shufflexyz[3],
+ const float difl,
+ const float extmax,
+ const int node_addr,
+ const uint visibility,
+ float dist[2])
+{
+ float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+ if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return bvh_unaligned_node_intersect_robust(kg,
+ P,
+ dir,
+ isect_near,
+ isect_far,
+ difl,
+ node_addr,
+ visibility,
+ dist);
+ }
+ else {
+ return bvh_aligned_node_intersect_robust(kg,
+ P,
+ dir,
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ difl,
+ extmax,
+ node_addr,
+ visibility,
+ dist);
+ }
+}
+#endif /* !defined(__KERNEL_SSE2__) */
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index 4005489f77d..1869457f0c3 100644
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -18,7 +18,13 @@
*/
#ifdef __QBVH__
-# include "geom_qbvh_shadow.h"
+# include "qbvh_shadow_all.h"
+#endif
+
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT bvh_node_intersect
+#else
+# define NODE_INTERSECT bvh_aligned_node_intersect
#endif
/* This is a template BVH traversal function, where various features can be
@@ -41,14 +47,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
* - likely and unlikely for if() statements
* - test restrict attribute for pointers
*/
-
+
/* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
const float tmax = ray->t;
@@ -72,9 +78,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if defined(__KERNEL_SSE2__)
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
+
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
ssef Psplat[3], idirsplat[3];
+# if BVH_FEATURE(BVH_HAIR)
+ ssef tnear(0.0f), tfar(isect_t);
+# endif
shuffle_swap_t shufflexyz[3];
Psplat[0] = ssef(P.x);
@@ -93,130 +102,87 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- bool traverseChild0, traverseChild1;
- int nodeAddrChild1;
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_ahild1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
- /* Intersect two child bounding boxes, non-SSE version */
- float t = isect_t;
-
- /* fetch node data */
- float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
-
- /* intersect ray against child nodes */
- NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
-
- /* decide which nodes to traverse next */
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
- traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
-# else
- traverseChild0 = (c0max >= c0min);
- traverseChild1 = (c1max >= c1min);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+# if BVH_FEATURE(BVH_HAIR)
+ dir,
# endif
-
+ idir,
+ isect_t,
+ node_addr,
+ PATH_RAY_SHADOW,
+ dist);
#else // __KERNEL_SSE2__
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
- const float4 cnodes = ((float4*)bvh_nodes)[3];
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- const ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- /* decide which nodes to traverse next */
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
- traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
-# else
- traverseChild0 = (movemask(lrhit) & 1);
- traverseChild1 = (movemask(lrhit) & 2);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+# if BVH_FEATURE(BVH_HAIR)
+ tnear,
+ tfar,
# endif
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ PATH_RAY_SHADOW,
+ dist);
#endif // __KERNEL_SSE2__
- nodeAddr = __float_as_int(cnodes.x);
- nodeAddrChild1 = __float_as_int(cnodes.y);
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_ahild1 = __float_as_int(cnodes.w);
- if(traverseChild0 && traverseChild1) {
- /* both children were intersected, push the farther one */
-#if !defined(__KERNEL_SSE2__)
- bool closestChild1 = (c1min < c0min);
-#else
- bool closestChild1 = tminmax[1] < tminmax[0];
-#endif
-
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
+ if(traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if(is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_ahild1;
+ node_addr_ahild1 = tmp;
}
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_ahild1;
}
else {
- /* one child was intersected */
- if(traverseChild1) {
- nodeAddr = nodeAddrChild1;
+ /* One child was intersected. */
+ if(traverse_mask == 2) {
+ node_addr = node_addr_ahild1;
}
- else if(!traverseChild0) {
- /* neither child was intersected */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ else if(traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
}
}
/* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
- int primAddr = __float_as_int(leaf.x);
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
+ if(prim_addr >= 0) {
#endif
- const int primAddr2 = __float_as_int(leaf.y);
+ const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
/* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
/* primitive intersection */
- while(primAddr < primAddr2) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ while(prim_addr < prim_addr2) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
@@ -226,22 +192,57 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
+ hit = triangle_intersect(kg,
+ &isect_precalc,
+ isect_array,
+ P,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr);
+ hit = motion_triangle_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ ray->time,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr);
break;
}
#endif
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
- hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
- else
- hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = bvh_cardinal_curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ NULL,
+ 0, 0);
+ }
+ else {
+ hit = bvh_curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ NULL,
+ 0, 0);
+ }
break;
}
#endif
@@ -253,6 +254,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* shadow ray early termination */
if(hit) {
+ /* Update number of hits now, so we do proper check on max bounces. */
+ (*num_hits)++;
+
/* detect if this surface has a shader with transparent shadows */
/* todo: optimize so primitive visibility flag indicates if
@@ -283,23 +287,20 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
return true;
}
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
#endif
-
- isect_array->t = isect_t;
+ /* Move on to next entry in intersections array */
+ isect_array++;
}
- primAddr++;
+ prim_addr++;
}
}
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
@@ -317,21 +318,24 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect_t);
+# endif
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
- nodeAddr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
}
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
+ if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
if(num_hits_in_instance) {
@@ -369,15 +373,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect_t);
+# endif
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
return false;
}
@@ -410,3 +417,4 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
#undef BVH_FUNCTION_NAME
#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/bvh/bvh_subsurface.h
index 915e9415c93..18978efcfa3 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/bvh/bvh_subsurface.h
@@ -18,7 +18,13 @@
*/
#ifdef __QBVH__
-# include "geom_qbvh_subsurface.h"
+# include "qbvh_subsurface.h"
+#endif
+
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT bvh_node_intersect
+#else
+# define NODE_INTERSECT bvh_aligned_node_intersect
#endif
/* This is a template BVH traversal function for subsurface scattering, where
@@ -44,12 +50,12 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
*/
/* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object);
+ int stack_ptr = 0;
+ int node_addr = kernel_tex_fetch(__object_node, subsurface_object);
/* ray parameters in registers */
float3 P = ray->P;
@@ -84,6 +90,9 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
ssef Psplat[3], idirsplat[3];
+# if BVH_FEATURE(BVH_HAIR)
+ ssef tnear(0.0f), tfar(isect_t);
+# endif
shuffle_swap_t shufflexyz[3];
Psplat[0] = ssef(P.x);
@@ -100,127 +109,94 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* traversal loop */
do {
- do
- {
+ do {
/* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL)
- {
- bool traverseChild0, traverseChild1;
- int nodeAddrChild1;
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_child1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
- /* Intersect two child bounding boxes, non-SSE version */
- float t = isect_t;
-
- /* fetch node data */
- float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
-
- /* intersect ray against child nodes */
- NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
-
- /* decide which nodes to traverse next */
- traverseChild0 = (c0max >= c0min);
- traverseChild1 = (c1max >= c1min);
-
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+# if BVH_FEATURE(BVH_HAIR)
+ dir,
+# endif
+ idir,
+ isect_t,
+ node_addr,
+ PATH_RAY_ALL_VISIBILITY,
+ dist);
#else // __KERNEL_SSE2__
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
- const float4 cnodes = ((float4*)bvh_nodes)[3];
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- const ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- /* decide which nodes to traverse next */
- traverseChild0 = (movemask(lrhit) & 1);
- traverseChild1 = (movemask(lrhit) & 2);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+# if BVH_FEATURE(BVH_HAIR)
+ tnear,
+ tfar,
+# endif
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ PATH_RAY_ALL_VISIBILITY,
+ dist);
#endif // __KERNEL_SSE2__
- nodeAddr = __float_as_int(cnodes.x);
- nodeAddrChild1 = __float_as_int(cnodes.y);
-
- if(traverseChild0 && traverseChild1) {
- /* both children were intersected, push the farther one */
-#if !defined(__KERNEL_SSE2__)
- bool closestChild1 = (c1min < c0min);
-#else
- bool closestChild1 = tminmax[1] < tminmax[0];
-#endif
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_child1 = __float_as_int(cnodes.w);
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
+ if(traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if(is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
}
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_child1;
}
else {
- /* one child was intersected */
- if(traverseChild1) {
- nodeAddr = nodeAddrChild1;
+ /* One child was intersected. */
+ if(traverse_mask == 2) {
+ node_addr = node_addr_child1;
}
- else if(!traverseChild0) {
- /* neither child was intersected */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ else if(traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
}
}
/* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
- int primAddr = __float_as_int(leaf.x);
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ int prim_addr = __float_as_int(leaf.x);
- const int primAddr2 = __float_as_int(leaf.y);
+ const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
- primAddr,
+ prim_addr,
isect_t,
lcg_state,
max_hits);
@@ -230,15 +206,15 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
- primAddr,
+ prim_addr,
isect_t,
lcg_state,
max_hits);
@@ -251,8 +227,8 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
}
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
}
ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -286,3 +262,4 @@ ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
#undef BVH_FUNCTION_NAME
#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index ae919ef3f86..68a11b65ad7 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -18,7 +18,15 @@
*/
#ifdef __QBVH__
-# include "geom_qbvh_traversal.h"
+# include "qbvh_traversal.h"
+#endif
+
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT bvh_node_intersect
+# define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
+#else
+# define NODE_INTERSECT bvh_aligned_node_intersect
+# define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
#endif
/* This is a template BVH traversal function, where various features can be
@@ -49,14 +57,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
* - likely and unlikely for if() statements
* - test restrict attribute for pointers
*/
-
+
/* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
float3 P = ray->P;
@@ -79,9 +87,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if defined(__KERNEL_SSE2__)
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
+
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
ssef Psplat[3], idirsplat[3];
+# if BVH_FEATURE(BVH_HAIR)
+ ssef tnear(0.0f), tfar(isect->t);
+# endif
shuffle_swap_t shufflexyz[3];
Psplat[0] = ssef(P.x);
@@ -100,174 +111,148 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- bool traverseChild0, traverseChild1;
- int nodeAddrChild1;
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_child1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
- /* Intersect two child bounding boxes, non-SSE version */
- float t = isect->t;
-
- /* fetch node data */
- float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
-
- /* intersect ray against child nodes */
- NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
-
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
+ traverse_mask = NODE_INTERSECT_ROBUST(kg,
+ P,
+# if BVH_FEATURE(BVH_HAIR)
+ dir,
+# endif
+ idir,
+ isect->t,
+ difl,
+ extmax,
+ node_addr,
+ visibility,
+ dist);
}
+ else
# endif
-
- /* decide which nodes to traverse next */
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
- traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
-# else
- traverseChild0 = (c0max >= c0min);
- traverseChild1 = (c1max >= c1min);
-# endif
-
+ {
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+# if BVH_FEATURE(BVH_HAIR)
+ dir,
+# endif
+ idir,
+ isect->t,
+ node_addr,
+ visibility,
+ dist);
+ }
#else // __KERNEL_SSE2__
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
- const float4 cnodes = ((float4*)bvh_nodes)[3];
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
-
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
- float4 *tminmaxview = (float4*)&tminmax;
- float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
- float &c0max = tminmaxview->z, &c1max = tminmaxview->w;
-
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min, c0min - extmax);
- c0max = min(hdiff * c0max, c0max + extmax);
- }
- if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min, c1min - extmax);
- c1max = min(hdiff * c1max, c1max + extmax);
- }
+ traverse_mask = NODE_INTERSECT_ROBUST(kg,
+ P,
+ dir,
+# if BVH_FEATURE(BVH_HAIR)
+ tnear,
+ tfar,
+# endif
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ difl,
+ extmax,
+ node_addr,
+ visibility,
+ dist);
}
+ else
# endif
-
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- /* decide which nodes to traverse next */
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
- traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
-# else
- traverseChild0 = (movemask(lrhit) & 1);
- traverseChild1 = (movemask(lrhit) & 2);
-# endif
+ {
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+# if BVH_FEATURE(BVH_HAIR)
+ tnear,
+ tfar,
+# endif
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ visibility,
+ dist);
+ }
#endif // __KERNEL_SSE2__
- nodeAddr = __float_as_int(cnodes.x);
- nodeAddrChild1 = __float_as_int(cnodes.y);
-
- if(traverseChild0 && traverseChild1) {
- /* both children were intersected, push the farther one */
-#if !defined(__KERNEL_SSE2__)
- bool closestChild1 = (c1min < c0min);
-#else
- bool closestChild1 = tminmax[1] < tminmax[0];
-#endif
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_child1 = __float_as_int(cnodes.w);
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
+ if(traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if(is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
}
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_child1;
}
else {
- /* one child was intersected */
- if(traverseChild1) {
- nodeAddr = nodeAddrChild1;
+ /* One child was intersected. */
+ if(traverse_mask == 2) {
+ node_addr = node_addr_child1;
}
- else if(!traverseChild0) {
- /* neither child was intersected */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ else if(traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
}
BVH_DEBUG_NEXT_STEP();
}
/* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
- int primAddr = __float_as_int(leaf.x);
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
+ if(prim_addr >= 0) {
#endif
- const int primAddr2 = __float_as_int(leaf.y);
+ const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
+ for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if(triangle_intersect(kg,
+ &isect_precalc,
+ isect,
+ P,
+ visibility,
+ object,
+ prim_addr))
+ {
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect->t);
+# endif
#else
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
@@ -278,15 +263,26 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
+ for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if(motion_triangle_intersect(kg,
+ isect,
+ P,
+ dir,
+ ray->time,
+ visibility,
+ object,
+ prim_addr))
+ {
/* shadow ray early termination */
# if defined(__KERNEL_SSE2__)
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect->t);
+# endif
# else
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
@@ -299,20 +295,47 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
- for(; primAddr < primAddr2; primAddr++) {
+ for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
- hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
- else
- hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = bvh_cardinal_curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ else {
+ hit = bvh_curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ lcg_state,
+ difl,
+ extmax);
+ }
if(hit) {
/* shadow ray early termination */
# if defined(__KERNEL_SSE2__)
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect->t);
+# endif
# else
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
@@ -327,7 +350,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
@@ -342,24 +365,27 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect->t);
+# endif
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
- nodeAddr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
BVH_DEBUG_NEXT_INSTANCE();
}
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
+ if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
@@ -376,16 +402,19 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect->t);
+# endif
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}
@@ -433,3 +462,5 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
#undef BVH_FUNCTION_NAME
#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
+#undef NODE_INTERSECT_ROBUST
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index f3edf85d723..03499e94347 100644
--- a/intern/cycles/kernel/geom/geom_bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -18,7 +18,13 @@
*/
#ifdef __QBVH__
-#include "geom_qbvh_volume.h"
+# include "qbvh_volume.h"
+#endif
+
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT bvh_node_intersect
+#else
+# define NODE_INTERSECT bvh_aligned_node_intersect
#endif
/* This is a template BVH traversal function for volumes, where
@@ -43,12 +49,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
*/
/* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
float3 P = ray->P;
@@ -69,9 +75,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if defined(__KERNEL_SSE2__)
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
+
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
ssef Psplat[3], idirsplat[3];
+# if BVH_FEATURE(BVH_HAIR)
+ ssef tnear(0.0f), tfar(isect->t);
+# endif
shuffle_swap_t shufflexyz[3];
Psplat[0] = ssef(P.x);
@@ -90,143 +99,124 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- bool traverseChild0, traverseChild1;
- int nodeAddrChild1;
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_child1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
- /* Intersect two child bounding boxes, non-SSE version */
- float t = isect->t;
-
- /* fetch node data */
- float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
-
- /* intersect ray against child nodes */
- NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
-
- /* decide which nodes to traverse next */
- traverseChild0 = (c0max >= c0min);
- traverseChild1 = (c1max >= c1min);
-
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+# if BVH_FEATURE(BVH_HAIR)
+ dir,
+# endif
+ idir,
+ isect->t,
+ node_addr,
+ visibility,
+ dist);
#else // __KERNEL_SSE2__
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
- const float4 cnodes = ((float4*)bvh_nodes)[3];
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
-
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- /* decide which nodes to traverse next */
- traverseChild0 = (movemask(lrhit) & 1);
- traverseChild1 = (movemask(lrhit) & 2);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+# if BVH_FEATURE(BVH_HAIR)
+ tnear,
+ tfar,
+# endif
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ visibility,
+ dist);
#endif // __KERNEL_SSE2__
- nodeAddr = __float_as_int(cnodes.x);
- nodeAddrChild1 = __float_as_int(cnodes.y);
-
- if(traverseChild0 && traverseChild1) {
- /* both children were intersected, push the farther one */
-#if !defined(__KERNEL_SSE2__)
- bool closestChild1 = (c1min < c0min);
-#else
- bool closestChild1 = tminmax[1] < tminmax[0];
-#endif
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_child1 = __float_as_int(cnodes.w);
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
+ if(traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if(is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
}
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_child1;
}
else {
- /* one child was intersected */
- if(traverseChild1) {
- nodeAddr = nodeAddrChild1;
+ /* One child was intersected. */
+ if(traverse_mask == 2) {
+ node_addr = node_addr_child1;
}
- else if(!traverseChild0) {
- /* neither child was intersected */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ else if(traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
}
}
/* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
- int primAddr = __float_as_int(leaf.x);
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
+ if(prim_addr >= 0) {
#endif
- const int primAddr2 = __float_as_int(leaf.y);
+ const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
- triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
+ triangle_intersect(kg,
+ &isect_precalc,
+ isect,
+ P,
+ visibility,
+ object,
+ prim_addr);
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
- motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
+ motion_triangle_intersect(kg,
+ isect,
+ P,
+ dir,
+ ray->time,
+ visibility,
+ object,
+ prim_addr);
}
break;
}
@@ -239,7 +229,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
@@ -258,29 +248,32 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect->t);
+# endif
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
- nodeAddr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
}
else {
/* pop */
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
}
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
+ if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
@@ -298,16 +291,19 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect->t);
+# endif
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
#endif /* FEATURE(BVH_MOTION) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}
@@ -337,3 +333,4 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
#undef BVH_FUNCTION_NAME
#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index ec837212471..b5405e8e57b 100644
--- a/intern/cycles/kernel/geom/geom_bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -18,7 +18,13 @@
*/
#ifdef __QBVH__
-#include "geom_qbvh_volume_all.h"
+# include "qbvh_volume_all.h"
+#endif
+
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT bvh_node_intersect
+#else
+# define NODE_INTERSECT bvh_aligned_node_intersect
#endif
/* This is a template BVH traversal function for volumes, where
@@ -44,12 +50,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
*/
/* traversal stack in CUDA thread-local memory */
- int traversalStack[BVH_STACK_SIZE];
- traversalStack[0] = ENTRYPOINT_SENTINEL;
+ int traversal_stack[BVH_STACK_SIZE];
+ traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
const float tmax = ray->t;
@@ -73,9 +79,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if defined(__KERNEL_SSE2__)
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
+
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
ssef Psplat[3], idirsplat[3];
+# if BVH_FEATURE(BVH_HAIR)
+ ssef tnear(0.0f), tfar(isect_t);
+# endif
shuffle_swap_t shufflexyz[3];
Psplat[0] = ssef(P.x);
@@ -94,137 +103,109 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- bool traverseChild0, traverseChild1;
- int nodeAddrChild1;
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ int node_addr_child1, traverse_mask;
+ float dist[2];
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
- /* Intersect two child bounding boxes, non-SSE version */
- float t = isect_array->t;
-
- /* fetch node data */
- float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
-
- /* intersect ray against child nodes */
- NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
-
- /* decide which nodes to traverse next */
- traverseChild0 = (c0max >= c0min);
- traverseChild1 = (c1max >= c1min);
-
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+# if BVH_FEATURE(BVH_HAIR)
+ dir,
+# endif
+ idir,
+ isect_t,
+ node_addr,
+ visibility,
+ dist);
#else // __KERNEL_SSE2__
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
- const float4 cnodes = ((float4*)bvh_nodes)[3];
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
-
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- /* decide which nodes to traverse next */
- traverseChild0 = (movemask(lrhit) & 1);
- traverseChild1 = (movemask(lrhit) & 2);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+# if BVH_FEATURE(BVH_HAIR)
+ tnear,
+ tfar,
+# endif
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ node_addr,
+ visibility,
+ dist);
#endif // __KERNEL_SSE2__
- nodeAddr = __float_as_int(cnodes.x);
- nodeAddrChild1 = __float_as_int(cnodes.y);
+ node_addr = __float_as_int(cnodes.z);
+ node_addr_child1 = __float_as_int(cnodes.w);
- if(traverseChild0 && traverseChild1) {
- /* both children were intersected, push the farther one */
-#if !defined(__KERNEL_SSE2__)
- bool closestChild1 = (c1min < c0min);
-#else
- bool closestChild1 = tminmax[1] < tminmax[0];
-#endif
-
- if(closestChild1) {
- int tmp = nodeAddr;
- nodeAddr = nodeAddrChild1;
- nodeAddrChild1 = tmp;
+ if(traverse_mask == 3) {
+ /* Both children were intersected, push the farther one. */
+ bool is_closest_child1 = (dist[1] < dist[0]);
+ if(is_closest_child1) {
+ int tmp = node_addr;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
}
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = nodeAddrChild1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = node_addr_child1;
}
else {
- /* one child was intersected */
- if(traverseChild1) {
- nodeAddr = nodeAddrChild1;
+ /* One child was intersected. */
+ if(traverse_mask == 2) {
+ node_addr = node_addr_child1;
}
- else if(!traverseChild0) {
- /* neither child was intersected */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ else if(traverse_mask == 0) {
+ /* Neither child was intersected. */
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
}
}
/* if node is leaf, fetch triangle list */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
- int primAddr = __float_as_int(leaf.x);
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
+ if(prim_addr >= 0) {
#endif
- const int primAddr2 = __float_as_int(leaf.y);
+ const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
bool hit;
/* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
+ hit = triangle_intersect(kg,
+ &isect_precalc,
+ isect_array,
+ P,
+ visibility,
+ object,
+ prim_addr);
if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
+ /* Update number of hits now, so we do proper check on max bounces. */
num_hits++;
#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
#endif
- isect_array->t = isect_t;
if(num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
@@ -239,6 +220,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
+ /* Move on to next entry in intersections array */
+ isect_array++;
+ isect_array->t = isect_t;
}
}
break;
@@ -246,23 +230,28 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
+ hit = motion_triangle_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ ray->time,
+ visibility,
+ object,
+ prim_addr);
if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
+ /* Update number of hits now, so we do proper check on max bounces. */
num_hits++;
# if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
# endif
- isect_array->t = isect_t;
if(num_hits == max_hits) {
# if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
@@ -277,6 +266,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
+ /* Move on to next entry in intersections array */
+ isect_array++;
+ isect_array->t = isect_t;
}
}
break;
@@ -290,7 +282,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
@@ -311,29 +303,32 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect_t);
+# endif
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
- ++stackPtr;
- kernel_assert(stackPtr < BVH_STACK_SIZE);
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_STACK_SIZE);
+ traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
- nodeAddr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
}
else {
/* pop */
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
}
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
+ if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
if(num_hits_in_instance) {
@@ -368,16 +363,19 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+# if BVH_FEATURE(BVH_HAIR)
+ tfar = ssef(isect_t);
+# endif
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr];
+ --stack_ptr;
}
#endif /* FEATURE(BVH_MOTION) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
}
@@ -410,3 +408,4 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
#undef BVH_FUNCTION_NAME
#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
new file mode 100644
index 00000000000..4d8695bedec
--- /dev/null
+++ b/intern/cycles/kernel/bvh/qbvh_nodes.h
@@ -0,0 +1,433 @@
+/*
+ * Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+struct QBVHStackItem {
+ int addr;
+ float dist;
+};
+
+/* TOOD(sergey): Investigate if using intrinsics helps for both
+ * stack item swap and float comparison.
+ */
+ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a,
+ QBVHStackItem *ccl_restrict b)
+{
+ QBVHStackItem tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
+ QBVHStackItem *ccl_restrict s2,
+ QBVHStackItem *ccl_restrict s3)
+{
+ if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
+ if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
+ if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
+}
+
+ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
+ QBVHStackItem *ccl_restrict s2,
+ QBVHStackItem *ccl_restrict s3,
+ QBVHStackItem *ccl_restrict s4)
+{
+ if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
+ if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
+ if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); }
+ if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); }
+ if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
+}
+
+/* Axis-aligned nodes intersection */
+
+ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
+ const ssef& isect_near,
+ const ssef& isect_far,
+#ifdef __KERNEL_AVX2__
+ const sse3f& org_idir,
+#else
+ const sse3f& org,
+#endif
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ ssef *ccl_restrict dist)
+{
+ const int offset = node_addr + 1;
+#ifdef __KERNEL_AVX2__
+ const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
+ const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
+ const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
+ const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x);
+ const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y);
+ const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z);
+#else
+ const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x;
+ const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y;
+ const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z;
+ const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x;
+ const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y;
+ const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z;
+#endif
+
+#ifdef __KERNEL_SSE41__
+ const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
+ const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
+ const sseb vmask = cast(tnear) > cast(tfar);
+ int mask = (int)movemask(vmask)^0xf;
+#else
+ const ssef tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
+ const ssef tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
+ const sseb vmask = tnear <= tfar;
+ int mask = (int)movemask(vmask);
+#endif
+ *dist = tnear;
+ return mask;
+}
+
+ccl_device_inline int qbvh_aligned_node_intersect_robust(
+ KernelGlobals *ccl_restrict kg,
+ const ssef& isect_near,
+ const ssef& isect_far,
+#ifdef __KERNEL_AVX2__
+ const sse3f& P_idir,
+#else
+ const sse3f& P,
+#endif
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ const float difl,
+ ssef *ccl_restrict dist)
+{
+ const int offset = node_addr + 1;
+#ifdef __KERNEL_AVX2__
+ const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
+ const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
+ const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
+ const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x);
+ const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y);
+ const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z);
+#else
+ const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x;
+ const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y;
+ const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z;
+ const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x;
+ const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y;
+ const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z;
+#endif
+
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+ const ssef tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
+ const ssef tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
+ const sseb vmask = round_down*tnear <= round_up*tfar;
+ *dist = tnear;
+ return (int)movemask(vmask);
+}
+
+/* Unaligned nodes intersection */
+
+ccl_device_inline int qbvh_unaligned_node_intersect(
+ KernelGlobals *ccl_restrict kg,
+ const ssef& isect_near,
+ const ssef& isect_far,
+#ifdef __KERNEL_AVX2__
+ const sse3f& org_idir,
+#endif
+ const sse3f& org,
+ const sse3f& dir,
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ ssef *ccl_restrict dist)
+{
+ const int offset = node_addr;
+ const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
+ const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
+ const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
+
+ const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
+ const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
+ const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
+
+ const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
+ const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
+ const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
+
+ const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
+ const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
+ const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
+
+ const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
+ aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
+ aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
+
+ const ssef aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x,
+ aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y,
+ aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z;
+
+ const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
+ const ssef nrdir_x = neg_one / aligned_dir_x,
+ nrdir_y = neg_one / aligned_dir_y,
+ nrdir_z = neg_one / aligned_dir_z;
+
+ const ssef tlower_x = aligned_P_x * nrdir_x,
+ tlower_y = aligned_P_y * nrdir_y,
+ tlower_z = aligned_P_z * nrdir_z;
+
+ const ssef tupper_x = tlower_x - nrdir_x,
+ tupper_y = tlower_y - nrdir_y,
+ tupper_z = tlower_z - nrdir_z;
+
+#ifdef __KERNEL_SSE41__
+ const ssef tnear_x = mini(tlower_x, tupper_x);
+ const ssef tnear_y = mini(tlower_y, tupper_y);
+ const ssef tnear_z = mini(tlower_z, tupper_z);
+ const ssef tfar_x = maxi(tlower_x, tupper_x);
+ const ssef tfar_y = maxi(tlower_y, tupper_y);
+ const ssef tfar_z = maxi(tlower_z, tupper_z);
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = tnear <= tfar;
+ *dist = tnear;
+ return movemask(vmask);
+#else
+ const ssef tnear_x = min(tlower_x, tupper_x);
+ const ssef tnear_y = min(tlower_y, tupper_y);
+ const ssef tnear_z = min(tlower_z, tupper_z);
+ const ssef tfar_x = max(tlower_x, tupper_x);
+ const ssef tfar_y = max(tlower_y, tupper_y);
+ const ssef tfar_z = max(tlower_z, tupper_z);
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = tnear <= tfar;
+ *dist = tnear;
+ return movemask(vmask);
+#endif
+}
+
+ccl_device_inline int qbvh_unaligned_node_intersect_robust(
+ KernelGlobals *ccl_restrict kg,
+ const ssef& isect_near,
+ const ssef& isect_far,
+#ifdef __KERNEL_AVX2__
+ const sse3f& P_idir,
+#endif
+ const sse3f& P,
+ const sse3f& dir,
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ const float difl,
+ ssef *ccl_restrict dist)
+{
+ const int offset = node_addr;
+ const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
+ const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
+ const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
+
+ const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
+ const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
+ const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
+
+ const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
+ const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
+ const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
+
+ const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
+ const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
+ const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
+
+ const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
+ aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
+ aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
+
+ const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x,
+ aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y,
+ aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z;
+
+ const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
+ const ssef nrdir_x = neg_one / aligned_dir_x,
+ nrdir_y = neg_one / aligned_dir_y,
+ nrdir_z = neg_one / aligned_dir_z;
+
+ const ssef tlower_x = aligned_P_x * nrdir_x,
+ tlower_y = aligned_P_y * nrdir_y,
+ tlower_z = aligned_P_z * nrdir_z;
+
+ const ssef tupper_x = tlower_x - nrdir_x,
+ tupper_y = tlower_y - nrdir_y,
+ tupper_z = tlower_z - nrdir_z;
+
+ const float round_down = 1.0f - difl;
+ const float round_up = 1.0f + difl;
+
+#ifdef __KERNEL_SSE41__
+ const ssef tnear_x = mini(tlower_x, tupper_x);
+ const ssef tnear_y = mini(tlower_y, tupper_y);
+ const ssef tnear_z = mini(tlower_z, tupper_z);
+ const ssef tfar_x = maxi(tlower_x, tupper_x);
+ const ssef tfar_y = maxi(tlower_y, tupper_y);
+ const ssef tfar_z = maxi(tlower_z, tupper_z);
+#else
+ const ssef tnear_x = min(tlower_x, tupper_x);
+ const ssef tnear_y = min(tlower_y, tupper_y);
+ const ssef tnear_z = min(tlower_z, tupper_z);
+ const ssef tfar_x = max(tlower_x, tupper_x);
+ const ssef tfar_y = max(tlower_y, tupper_y);
+ const ssef tfar_z = max(tlower_z, tupper_z);
+#endif
+ const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+ const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = round_down*tnear <= round_up*tfar;
+ *dist = tnear;
+ return movemask(vmask);
+}
+
+/* Intersectors wrappers.
+ *
+ * They'll check node type and call appropriate intersection code.
+ */
+
+ccl_device_inline int qbvh_node_intersect(
+ KernelGlobals *ccl_restrict kg,
+ const ssef& isect_near,
+ const ssef& isect_far,
+#ifdef __KERNEL_AVX2__
+ const sse3f& org_idir,
+#endif
+ const sse3f& org,
+ const sse3f& dir,
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ ssef *ccl_restrict dist)
+{
+ const int offset = node_addr;
+ const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+ if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return qbvh_unaligned_node_intersect(kg,
+ isect_near,
+ isect_far,
+#ifdef __KERNEL_AVX2__
+ org_idir,
+#endif
+ org,
+ dir,
+ idir,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ dist);
+ }
+ else {
+ return qbvh_aligned_node_intersect(kg,
+ isect_near,
+ isect_far,
+#ifdef __KERNEL_AVX2__
+ org_idir,
+#else
+ org,
+#endif
+ idir,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ dist);
+ }
+}
+
+ccl_device_inline int qbvh_node_intersect_robust(
+ KernelGlobals *ccl_restrict kg,
+ const ssef& isect_near,
+ const ssef& isect_far,
+#ifdef __KERNEL_AVX2__
+ const sse3f& P_idir,
+#endif
+ const sse3f& P,
+ const sse3f& dir,
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int node_addr,
+ const float difl,
+ ssef *ccl_restrict dist)
+{
+ const int offset = node_addr;
+ const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+ if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+ return qbvh_unaligned_node_intersect_robust(kg,
+ isect_near,
+ isect_far,
+#ifdef __KERNEL_AVX2__
+ P_idir,
+#endif
+ P,
+ dir,
+ idir,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ difl,
+ dist);
+ }
+ else {
+ return qbvh_aligned_node_intersect_robust(kg,
+ isect_near,
+ isect_far,
+#ifdef __KERNEL_AVX2__
+ P_idir,
+#else
+ P,
+#endif
+ idir,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ difl,
+ dist);
+ }
+}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
index edb5b5c78c3..34753ff067d 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h
+++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
@@ -27,6 +27,12 @@
*
*/
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT qbvh_node_intersect
+#else
+# define NODE_INTERSECT qbvh_aligned_node_intersect
+#endif
+
ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect_array,
@@ -39,12 +45,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
/* Ray parameters in registers. */
const float tmax = ray->t;
@@ -72,13 +78,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif
ssef tnear(0.0f), tfar(tmax);
+#if BVH_FEATURE(BVH_HAIR)
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#else
- sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
/* Offsets to select the side that becomes the lower or upper bound. */
@@ -96,29 +106,53 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+
+#ifdef __VISIBILITY_FLAG__
+ if((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+#endif
+
ssef dist;
- int traverseChild = qbvh_node_intersect(kg,
- tnear,
- tfar,
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
-#else
- org,
+ P_idir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
-
- if(traverseChild != 0) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4,
+# endif
+# if BVH_FEATURE(BVH_HAIR)
+ dir4,
+# endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ &dist);
+
+ if(child_mask != 0) {
+ float4 cnodes;
+#if BVH_FEATURE(BVH_HAIR)
+ if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
+ }
+ else
+#endif
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
+ }
/* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
+ int r = __bscf(child_mask);
+ if(child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
continue;
}
@@ -127,24 +161,24 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
+ if(child_mask == 0) {
if(d1 < d0) {
- nodeAddr = c1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
- nodeAddr = c0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@@ -152,86 +186,86 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ if(child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
}
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
}
/* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
/* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
continue;
}
#endif
- int primAddr = __float_as_int(leaf.x);
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
+ if(prim_addr >= 0) {
#endif
- int primAddr2 = __float_as_int(leaf.y);
+ int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
/* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
/* Primitive intersection. */
- while(primAddr < primAddr2) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ while(prim_addr < prim_addr2) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
@@ -241,22 +275,57 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
+ hit = triangle_intersect(kg,
+ &isect_precalc,
+ isect_array,
+ P,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr);
+ hit = motion_triangle_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ ray->time,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr);
break;
}
#endif
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
- hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
- else
- hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = bvh_cardinal_curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ NULL,
+ 0, 0);
+ }
+ else {
+ hit = bvh_curve_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ PATH_RAY_SHADOW,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ NULL,
+ 0, 0);
+ }
break;
}
#endif
@@ -268,6 +337,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Shadow ray early termination. */
if(hit) {
+ /* Update number of hits now, so we do proper check on max bounces. */
+ (*num_hits)++;
+
/* detect if this surface has a shader with transparent shadows */
/* todo: optimize so primitive visibility flag indicates if
@@ -298,23 +370,21 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
return true;
}
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
#endif
-
+ /* Move on to next entry in intersections array */
+ isect_array++;
isect_array->t = isect_t;
}
- primAddr++;
+ prim_addr++;
}
}
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
@@ -329,28 +399,33 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
triangle_intersect_precalc(dir, &isect_precalc);
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- nodeAddr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
}
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
+ if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
if(num_hits_in_instance) {
@@ -383,21 +458,28 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(tmax);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
return false;
}
+
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/bvh/qbvh_subsurface.h
index 84512a8783c..03794e3a882 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
+++ b/intern/cycles/kernel/bvh/qbvh_subsurface.h
@@ -25,6 +25,12 @@
*
*/
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT qbvh_node_intersect
+#else
+# define NODE_INTERSECT qbvh_aligned_node_intersect
+#endif
+
ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const Ray *ray,
SubsurfaceIntersection *ss_isect,
@@ -41,12 +47,12 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object);
+ int stack_ptr = 0;
+ int node_addr = kernel_tex_fetch(__object_node, subsurface_object);
/* Ray parameters in registers. */
float3 P = ray->P;
@@ -82,13 +88,17 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif
ssef tnear(0.0f), tfar(isect_t);
+#if BVH_FEATURE(BVH_HAIR)
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#else
- sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
/* Offsets to select the side that becomes the lower or upper bound. */
@@ -106,29 +116,43 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
ssef dist;
- int traverseChild = qbvh_node_intersect(kg,
- tnear,
- tfar,
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
-#else
- org,
+ P_idir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4,
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ dir4,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ &dist);
- if(traverseChild != 0) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+ if(child_mask != 0) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ float4 cnodes;
+#if BVH_FEATURE(BVH_HAIR)
+ if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
+ }
+ else
+#endif
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
+ }
/* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
+ int r = __bscf(child_mask);
+ if(child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
continue;
}
@@ -137,24 +161,24 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
+ if(child_mask == 0) {
if(d1 < d0) {
- nodeAddr = c1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
- nodeAddr = c0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@@ -162,82 +186,82 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ if(child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
}
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
}
/* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
- int primAddr = __float_as_int(leaf.x);
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ int prim_addr = __float_as_int(leaf.x);
- int primAddr2 = __float_as_int(leaf.y);
+ int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
/* Primitive intersection. */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* Intersect ray against primitive, */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
- primAddr,
+ prim_addr,
isect_t,
lcg_state,
max_hits);
@@ -247,15 +271,15 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* Intersect ray against primitive. */
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
- primAddr,
+ prim_addr,
isect_t,
lcg_state,
max_hits);
@@ -267,6 +291,8 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
break;
}
}
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
}
+
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
new file mode 100644
index 00000000000..f82ff661495
--- /dev/null
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -0,0 +1,505 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function, where various features can be
+ * enabled/disabled. This way we can compile optimized versions for each case
+ * without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT qbvh_node_intersect
+# define NODE_INTERSECT_ROBUST qbvh_node_intersect_robust
+#else
+# define NODE_INTERSECT qbvh_aligned_node_intersect
+# define NODE_INTERSECT_ROBUST qbvh_aligned_node_intersect_robust
+#endif
+
+ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect,
+ const uint visibility
+#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
+ ,uint *lcg_state,
+ float difl,
+ float extmax
+#endif
+ )
+{
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps (for non shadow rays).
+ * - Separate version for shadow rays.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+ traversal_stack[0].dist = -FLT_MAX;
+
+ /* Traversal variables in registers. */
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
+ float node_dist = -FLT_MAX;
+
+ /* Ray parameters in registers. */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+
+#if BVH_FEATURE(BVH_MOTION)
+ Transform ob_itfm;
+#endif
+
+#ifndef __KERNEL_SSE41__
+ if(!isfinite(P.x)) {
+ return false;
+ }
+#endif
+
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
+
+ BVH_DEBUG_INIT();
+
+ ssef tnear(0.0f), tfar(ray->t);
+#if BVH_FEATURE(BVH_HAIR)
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+
+#ifdef __KERNEL_AVX2__
+ float3 P_idir = P*idir;
+ sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+
+ if(UNLIKELY(node_dist > isect->t)
+#ifdef __VISIBILITY_FLAG__
+ || (__float_as_uint(inodes.x) & visibility) == 0)
+#endif
+ {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ int child_mask;
+ ssef dist;
+
+ BVH_DEBUG_NEXT_STEP();
+
+#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
+ if(difl != 0.0f) {
+ /* NOTE: We extend all the child BB instead of fetching
+ * and checking visibility flags for each of the,
+ *
+ * Need to test if doing opposite would be any faster.
+ */
+ child_mask = NODE_INTERSECT_ROBUST(kg,
+ tnear,
+ tfar,
+# ifdef __KERNEL_AVX2__
+ P_idir4,
+# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4,
+# endif
+# if BVH_FEATURE(BVH_HAIR)
+ dir4,
+# endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ difl,
+ &dist);
+ }
+ else
+#endif /* BVH_HAIR_MINIMUM_WIDTH */
+ {
+ child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
+#ifdef __KERNEL_AVX2__
+ P_idir4,
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4,
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ dir4,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ &dist);
+ }
+
+ if(child_mask != 0) {
+ float4 cnodes;
+ /* TODO(sergey): Investigate whether moving cnodes upwards
+ * gives a speedup (will be different cache pattern but will
+ * avoid extra check here),
+ */
+#if BVH_FEATURE(BVH_HAIR)
+ if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
+ }
+ else
+#endif
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
+ }
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(child_mask);
+ float d0 = ((float*)&dist)[r];
+ if(child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
+ node_dist = d0;
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ r = __bscf(child_mask);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float*)&dist)[r];
+ if(child_mask == 0) {
+ if(d1 < d0) {
+ node_addr = c1;
+ node_dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+ continue;
+ }
+ else {
+ node_addr = c0;
+ node_dist = d0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float*)&dist)[r];
+ if(child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(child_mask);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float*)&dist)[r];
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
+ }
+
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+
+#ifdef __VISIBILITY_FLAG__
+ if(UNLIKELY((node_dist > isect->t) ||
+ ((__float_as_uint(leaf.z) & visibility) == 0)))
+#else
+ if(UNLIKELY((node_dist > isect->t)))
+#endif
+ {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ continue;
+ }
+
+ int prim_addr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(prim_addr >= 0) {
+#endif
+ int prim_addr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+
+ /* Primitive intersection. */
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_STEP();
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if(triangle_intersect(kg,
+ &isect_precalc,
+ isect,
+ P,
+ visibility,
+ object,
+ prim_addr)) {
+ tfar = ssef(isect->t);
+ /* Shadow ray early termination. */
+ if(visibility == PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ }
+ break;
+ }
+#if BVH_FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_STEP();
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ if(motion_triangle_intersect(kg,
+ isect,
+ P,
+ dir,
+ ray->time,
+ visibility,
+ object,
+ prim_addr)) {
+ tfar = ssef(isect->t);
+ /* Shadow ray early termination. */
+ if(visibility == PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ }
+ break;
+ }
+#endif /* BVH_FEATURE(BVH_MOTION) */
+#if BVH_FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ BVH_DEBUG_NEXT_STEP();
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ bool hit;
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+ hit = bvh_cardinal_curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ else {
+ hit = bvh_curve_intersect(kg,
+ isect,
+ P,
+ dir,
+ visibility,
+ object,
+ prim_addr,
+ ray->time,
+ type,
+ lcg_state,
+ difl,
+ extmax);
+ }
+ if(hit) {
+ tfar = ssef(isect->t);
+ /* Shadow ray early termination. */
+ if(visibility == PATH_RAY_SHADOW_OPAQUE) {
+ return true;
+ }
+ }
+ }
+ break;
+ }
+#endif /* BVH_FEATURE(BVH_HAIR) */
+ }
+ }
+#if BVH_FEATURE(BVH_INSTANCING)
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+
+# if BVH_FEATURE(BVH_MOTION)
+ qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
+# else
+ qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
+# endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+# ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+ traversal_stack[stack_ptr].dist = -FLT_MAX;
+
+ node_addr = kernel_tex_fetch(__object_node, object);
+
+ BVH_DEBUG_NEXT_INSTANCE();
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(node_addr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(stack_ptr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ /* Instance pop. */
+# if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
+# else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
+# endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+# ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ object = OBJECT_NONE;
+ node_addr = traversal_stack[stack_ptr].addr;
+ node_dist = traversal_stack[stack_ptr].dist;
+ --stack_ptr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(node_addr != ENTRYPOINT_SENTINEL);
+
+ return (isect->prim != PRIM_NONE);
+}
+
+#undef NODE_INTERSECT
+#undef NODE_INTERSECT_ROBUST
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
index ab2e530dd20..b4f334eb842 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_volume.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume.h
@@ -26,6 +26,12 @@
*
*/
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT qbvh_node_intersect
+#else
+# define NODE_INTERSECT qbvh_aligned_node_intersect
+#endif
+
ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect,
@@ -38,12 +44,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
/* Ray parameters in registers. */
float3 P = ray->P;
@@ -68,13 +74,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect->object = OBJECT_NONE;
ssef tnear(0.0f), tfar(ray->t);
+#if BVH_FEATURE(BVH_HAIR)
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#else
- sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
/* Offsets to select the side that becomes the lower or upper bound. */
@@ -92,29 +102,52 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+#ifdef __VISIBILITY_FLAG__
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ if((__float_as_uint(inodes.x) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+#endif
+
ssef dist;
- int traverseChild = qbvh_node_intersect(kg,
- tnear,
- tfar,
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
-#else
- org,
+ P_idir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
-
- if(traverseChild != 0) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4,
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ dir4,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ &dist);
+
+ if(child_mask != 0) {
+ float4 cnodes;
+#if BVH_FEATURE(BVH_HAIR)
+ if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
+ }
+ else
+#endif
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
+ }
/* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
+ int r = __bscf(child_mask);
+ if(child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
continue;
}
@@ -123,24 +156,24 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
+ if(child_mask == 0) {
if(d1 < d0) {
- nodeAddr = c1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
- nodeAddr = c0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@@ -148,102 +181,102 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ if(child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
}
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
}
/* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
- int primAddr = __float_as_int(leaf.x);
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
+ if(prim_addr >= 0) {
#endif
- int primAddr2 = __float_as_int(leaf.y);
+ int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
/* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
/* Primitive intersection. */
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
- triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
+ triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, prim_addr);
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
- motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
+ motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
}
break;
}
@@ -253,7 +286,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
@@ -268,34 +301,39 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
triangle_intersect_precalc(dir, &isect_precalc);
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- nodeAddr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
}
else {
/* Pop. */
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
}
}
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
+ if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
@@ -309,21 +347,28 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect->t);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}
+
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
index 5546471b0e3..a877e5bb341 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h
@@ -26,6 +26,12 @@
*
*/
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT qbvh_node_intersect
+#else
+# define NODE_INTERSECT qbvh_aligned_node_intersect
+#endif
+
ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect_array,
@@ -39,12 +45,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
+ QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+ traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
+ int stack_ptr = 0;
+ int node_addr = kernel_data.bvh.root;
/* Ray parameters in registers. */
const float tmax = ray->t;
@@ -72,13 +78,17 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif
ssef tnear(0.0f), tfar(isect_t);
+#if BVH_FEATURE(BVH_HAIR)
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#else
- sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
/* Offsets to select the side that becomes the lower or upper bound. */
@@ -96,29 +106,52 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+#ifdef __VISIBILITY_FLAG__
+ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ if((__float_as_uint(inodes.x) & visibility) == 0) {
+ /* Pop. */
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
+ continue;
+ }
+#endif
+
ssef dist;
- int traverseChild = qbvh_node_intersect(kg,
- tnear,
- tfar,
+ int child_mask = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
-#else
- org,
+ P_idir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
-
- if(traverseChild != 0) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4,
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ dir4,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ node_addr,
+ &dist);
+
+ if(child_mask != 0) {
+ float4 cnodes;
+#if BVH_FEATURE(BVH_HAIR)
+ if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
+ }
+ else
+#endif
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
+ }
/* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
+ int r = __bscf(child_mask);
+ if(child_mask == 0) {
+ node_addr = __float_as_int(cnodes[r]);
continue;
}
@@ -127,24 +160,24 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
+ if(child_mask == 0) {
if(d1 < d0) {
- nodeAddr = c1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ node_addr = c1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
- nodeAddr = c0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
+ node_addr = c0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@@ -152,96 +185,94 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c1;
+ traversal_stack[stack_ptr].dist = d1;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c0;
+ traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ if(child_mask == 0) {
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2]);
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
- r = __bscf(traverseChild);
+ r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c3;
+ traversal_stack[stack_ptr].dist = d3;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = c2;
+ traversal_stack[stack_ptr].dist = d2;
+ qbvh_stack_sort(&traversal_stack[stack_ptr],
+ &traversal_stack[stack_ptr - 1],
+ &traversal_stack[stack_ptr - 2],
+ &traversal_stack[stack_ptr - 3]);
}
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
}
/* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
- int primAddr = __float_as_int(leaf.x);
+ if(node_addr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+ int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
+ if(prim_addr >= 0) {
#endif
- int primAddr2 = __float_as_int(leaf.y);
+ int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
bool hit;
/* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
/* Primitive intersection. */
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, prim_addr);
if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
+ /* Update number of hits now, so we do proper check on max bounces. */
num_hits++;
#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
#endif
- isect_array->t = isect_t;
if(num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
@@ -256,30 +287,31 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
+ /* Move on to next entry in intersections array */
+ isect_array++;
+ isect_array->t = isect_t;
}
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ for(; prim_addr < prim_addr2; prim_addr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
+ hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
if(hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
+ /* Update number of hits now, so we do proper check on max bounces. */
num_hits++;
# if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
# endif
- isect_array->t = isect_t;
if(num_hits == max_hits) {
# if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
@@ -294,6 +326,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
+ /* Move on to next entry in intersections array */
+ isect_array++;
+ isect_array->t = isect_t;
}
}
break;
@@ -304,7 +339,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
@@ -320,35 +355,40 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
triangle_intersect_precalc(dir, &isect_precalc);
num_hits_in_instance = 0;
isect_array->t = isect_t;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
+ ++stack_ptr;
+ kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+ traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- nodeAddr = kernel_tex_fetch(__object_node, object);
+ node_addr = kernel_tex_fetch(__object_node, object);
}
else {
/* Pop. */
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
}
}
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
+ if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
@@ -379,23 +419,30 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
triangle_intersect_precalc(dir, &isect_precalc);
isect_t = tmax;
isect_array->t = isect_t;
object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- --stackPtr;
+ node_addr = traversal_stack[stack_ptr].addr;
+ --stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
+ } while(node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
}
+
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index c94a5384d1f..d2c7edb11ea 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -15,27 +15,6 @@
* limitations under the License.
*/
-/* bottom-most stack entry, indicating the end of traversal */
-#define ENTRYPOINT_SENTINEL 0x76543210
-
-/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
-#define BVH_STACK_SIZE 192
-#define BVH_QSTACK_SIZE 384
-#define BVH_NODE_SIZE 4
-#define BVH_NODE_LEAF_SIZE 1
-#define BVH_QNODE_SIZE 7
-#define BVH_QNODE_LEAF_SIZE 1
-#define TRI_NODE_SIZE 3
-
-/* silly workaround for float extended precision that happens when compiling
- * without sse support on x86, it results in different results for float ops
- * that you would otherwise expect to compare correctly */
-#if !defined(__i386__) || defined(__SSE__)
-# define NO_EXTENDED_PRECISION
-#else
-# define NO_EXTENDED_PRECISION volatile
-#endif
-
#include "geom_attribute.h"
#include "geom_object.h"
#include "geom_triangle.h"
@@ -45,5 +24,4 @@
#include "geom_curve.h"
#include "geom_volume.h"
#include "geom_primitive.h"
-#include "geom_bvh.h"
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 8894843997c..292e1bfca0e 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -450,8 +450,8 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
else if(level == 1) {
/* the maximum recursion depth is reached.
- * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
- * dP* is reversed if necessary.*/
+ * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
+ * dP* is reversed if necessary.*/
float t = isect->t;
float u = 0.0f;
float gd = 0.0f;
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index ffe55529110..2fb8e219884 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -47,13 +47,13 @@ ccl_device_inline int find_attribute_motion(KernelGlobals *kg, int object, uint
return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
-ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, float3 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3])
+ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3])
{
if(step == numsteps) {
/* center step: regular vertex location */
- verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
+ verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
+ verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
}
else {
/* center step not store in this array */
@@ -62,19 +62,19 @@ ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, float3
offset += step*numverts;
- verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
- verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
- verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
+ verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
+ verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
+ verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
}
}
-ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, float3 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3])
+ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3])
{
if(step == numsteps) {
/* center step: regular vertex location */
- normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
- normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
- normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
+ normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+ normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+ normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
}
else {
/* center step not stored in this array */
@@ -83,9 +83,9 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, float
offset += step*numverts;
- normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
- normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
- normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
+ normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
+ normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
+ normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
}
}
@@ -107,7 +107,7 @@ ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, i
/* fetch vertex coordinates */
float3 next_verts[3];
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
@@ -259,7 +259,7 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
/* fetch vertex coordinates */
float3 verts[3], next_verts[3];
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)));
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h
deleted file mode 100644
index 2a2d7822eee..00000000000
--- a/intern/cycles/kernel/geom/geom_qbvh.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-struct QBVHStackItem {
- int addr;
- float dist;
-};
-
-/* TOOD(sergey): Investigate if using intrinsics helps for both
- * stack item swap and float comparison.
- */
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a,
- QBVHStackItem *__restrict b)
-{
- QBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
- QBVHStackItem *__restrict s2,
- QBVHStackItem *__restrict s3)
-{
- if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
- if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
- if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
- QBVHStackItem *__restrict s2,
- QBVHStackItem *__restrict s3,
- QBVHStackItem *__restrict s4)
-{
- if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
- if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
- if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); }
- if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); }
- if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
-}
-
-ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
-#ifdef __KERNEL_AVX2__
- const sse3f& org_idir,
-#else
- const sse3f& org,
-#endif
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- ssef *__restrict dist)
-{
- const int offset = nodeAddr*BVH_QNODE_SIZE;
-#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
- const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
- const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z);
-#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z;
-#endif
-
-#ifdef __KERNEL_SSE41__
- const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear));
- const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar));
- const sseb vmask = cast(tNear) > cast(tFar);
- int mask = (int)movemask(vmask)^0xf;
-#else
- const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
- const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
- const sseb vmask = tNear <= tFar;
- int mask = (int)movemask(vmask);
-#endif
- *dist = tNear;
- return mask;
-}
-
-ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
-#ifdef __KERNEL_AVX2__
- const sse3f& P_idir,
-#else
- const sse3f& P,
-#endif
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- const float difl,
- ssef *__restrict dist)
-{
- const int offset = nodeAddr*BVH_QNODE_SIZE;
-#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
- const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
- const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z);
-#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z;
-#endif
-
- const float round_down = 1.0f - difl;
- const float round_up = 1.0f + difl;
- const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
- const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
- const sseb vmask = round_down*tNear <= round_up*tFar;
- *dist = tNear;
- return (int)movemask(vmask);
-}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
deleted file mode 100644
index 738d08ac6fc..00000000000
--- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
- * BVH_MOTION: motion blur rendering
- *
- */
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- ,uint *lcg_state,
- float difl,
- float extmax
-#endif
- )
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
- traversalStack[0].addr = ENTRYPOINT_SENTINEL;
- traversalStack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stackPtr = 0;
- int nodeAddr = kernel_data.bvh.root;
- float nodeDist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- BVH_DEBUG_INIT();
-
- ssef tnear(0.0f), tfar(ray->t);
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#else
- sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
-
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
- if(UNLIKELY(nodeDist > isect->t)) {
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- continue;
- }
-
- int traverseChild;
- ssef dist;
-
- BVH_DEBUG_NEXT_STEP();
-
-#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if(difl != 0.0f) {
- /* NOTE: We extend all the child BB instead of fetching
- * and checking visibility flags for each of the,
- *
- * Need to test if doing opposite would be any faster.
- */
- traverseChild = qbvh_node_intersect_robust(kg,
- tnear,
- tfar,
-# ifdef __KERNEL_AVX2__
- P_idir4,
-# else
- org,
-# endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- difl,
- &dist);
- }
- else
-#endif /* BVH_HAIR_MINIMUM_WIDTH */
- {
- traverseChild = qbvh_node_intersect(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#else
- org,
-#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
- }
-
- if(traverseChild != 0) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
-
- /* One child is hit, continue with that child. */
- int r = __bscf(traverseChild);
- float d0 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- nodeAddr = __float_as_int(cnodes[r]);
- nodeDist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(traverseChild);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- if(d1 < d0) {
- nodeAddr = c1;
- nodeDist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
- continue;
- }
- else {
- nodeAddr = c0;
- nodeDist = d0;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = d1;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float*)&dist)[r];
- if(traverseChild == 0) {
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2]);
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(traverseChild);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float*)&dist)[r];
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c3;
- traversalStack[stackPtr].dist = d3;
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = c2;
- traversalStack[stackPtr].dist = d2;
- qbvh_stack_sort(&traversalStack[stackPtr],
- &traversalStack[stackPtr - 1],
- &traversalStack[stackPtr - 2],
- &traversalStack[stackPtr - 3]);
- }
-
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
-
-#ifdef __VISIBILITY_FLAG__
- if(UNLIKELY((nodeDist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
- if(UNLIKELY((nodeDist > isect->t)))
-#endif
- {
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- continue;
- }
-
- int primAddr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(primAddr >= 0) {
-#endif
- int primAddr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
-
- /* Primitive intersection. */
- switch(type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for(; primAddr < primAddr2; primAddr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for(; primAddr < primAddr2; primAddr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- bool hit;
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
- hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
- else
- hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
- if(hit) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
-
-# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm);
-# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist);
-# endif
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(isect->t);
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
- triangle_intersect_precalc(dir, &isect_precalc);
-
- ++stackPtr;
- kernel_assert(stackPtr < BVH_QSTACK_SIZE);
- traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
- traversalStack[stackPtr].dist = -FLT_MAX;
-
- nodeAddr = kernel_tex_fetch(__object_node, object);
-
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if(stackPtr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
-# endif
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(isect->t);
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P*idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
- triangle_intersect_precalc(dir, &isect_precalc);
-
- object = OBJECT_NONE;
- nodeAddr = traversalStack[stackPtr].addr;
- nodeDist = traversalStack[stackPtr].dist;
- --stackPtr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 995dfac5b09..0c2351e1d1b 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -27,12 +27,11 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd)
{
/* load triangle vertices */
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
+ const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
+ const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
+ const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
- float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
-
/* return normal */
if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
return normalize(cross(v2 - v0, v1 - v0));
@@ -44,11 +43,10 @@ ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd)
ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
{
/* load triangle vertices */
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
- float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+ float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
+ float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
+ float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
/* compute point */
float t = 1.0f - u - v;
@@ -71,11 +69,10 @@ ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int
ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3])
{
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
- P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+ P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
+ P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
+ P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
}
/* Interpolate smooth vertex normal from vertices */
@@ -83,11 +80,10 @@ ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3
ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, float u, float v)
{
/* load triangle vertices */
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
- float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
- float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
- float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+ float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+ float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+ float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
return normalize((1.0f - u - v)*n2 + u*n0 + v*n1);
}
@@ -97,11 +93,10 @@ ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, flo
ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv)
{
/* fetch triangle vertex coordinates */
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
- float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+ const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
+ const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
+ const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
/* compute derivatives of P w.r.t. uv */
*dPdu = (p0 - p2);
@@ -119,11 +114,11 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s
return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim));
}
else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
- float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
- float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
- float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z));
+ float f0 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.x);
+ float f1 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.y);
+ float f2 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.z);
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
@@ -162,11 +157,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData
return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim)));
}
else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
- float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index b6dfc769012..fc081bda525 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -106,9 +106,10 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- const float4 tri_a = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+0),
- tri_b = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+1),
- tri_c = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+2);
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
+ const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
+ tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
+ tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
@@ -202,9 +203,10 @@ ccl_device_inline void triangle_intersect_subsurface(
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- const float4 tri_a = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+0),
- tri_b = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+1),
- tri_c = kernel_tex_fetch(__tri_storage, triAddr*TRI_NODE_SIZE+2);
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
+ const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
+ tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
+ tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
@@ -324,9 +326,10 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
P = P + D*t;
- const float4 tri_a = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+0),
- tri_b = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+1),
- tri_c = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+2);
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
+ const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
+ tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
+ tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
@@ -381,9 +384,10 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
P = P + D*t;
#ifdef __INTERSECTION_REFINE__
- const float4 tri_a = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+0),
- tri_b = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+1),
- tri_c = kernel_tex_fetch(__tri_storage, isect->prim*TRI_NODE_SIZE+2);
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
+ const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
+ tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
+ tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 42314756f02..08f6f457805 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -42,6 +42,7 @@
#define ccl_constant
#define ccl_may_alias
#define ccl_addr_space
+#define ccl_restrict __restrict__
/* No assert supported for CUDA */
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index a5708448e23..8505cb85576 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -39,6 +39,7 @@
#define ccl_global __global
#define ccl_local __local
#define ccl_private __private
+#define ccl_restrict restrict
#ifdef __SPLIT_KERNEL__
# define ccl_addr_space __global
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 736a884f819..93c4bd3f7d5 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -51,8 +51,8 @@ ccl_device float area_light_sample(float3 P,
bool sample_coord)
{
/* In our name system we're using P for the center,
- * which is o in the paper.
- */
+ * which is o in the paper.
+ */
float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
float axisu_len, axisv_len;
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 3c3503eab8b..d5b31037723 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -25,6 +25,7 @@
#include "kernel_camera.h"
#include "geom/geom.h"
+#include "bvh/bvh.h"
#include "kernel_accumulate.h"
#include "kernel_shader.h"
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 94598e2565e..731dc0407c5 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -309,7 +309,7 @@ ccl_device_inline void path_state_branch(PathState *state, int branch, int num_b
state->num_samples = state->num_samples*num_branches;
}
-ccl_device_inline uint lcg_state_init(RNG *rng, const ccl_addr_space PathState *state, uint scramble)
+ccl_device_inline uint lcg_state_init(RNG *rng, const PathState *state, uint scramble)
{
return lcg_init(*rng + state->rng_offset + state->sample*scramble);
}
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index 245d236ff97..5ba262c1044 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -25,7 +25,8 @@
/* bvh */
KERNEL_TEX(float4, texture_float4, __bvh_nodes)
KERNEL_TEX(float4, texture_float4, __bvh_leaf_nodes)
-KERNEL_TEX(float4, texture_float4, __tri_storage)
+KERNEL_TEX(float4, texture_float4, __prim_tri_verts)
+KERNEL_TEX(uint, texture_uint, __prim_tri_index)
KERNEL_TEX(uint, texture_uint, __prim_type)
KERNEL_TEX(uint, texture_uint, __prim_visibility)
KERNEL_TEX(uint, texture_uint, __prim_index)
@@ -39,8 +40,7 @@ KERNEL_TEX(float4, texture_float4, __objects_vector)
/* triangles */
KERNEL_TEX(uint, texture_uint, __tri_shader)
KERNEL_TEX(float4, texture_float4, __tri_vnormal)
-KERNEL_TEX(float4, texture_float4, __tri_vindex)
-KERNEL_TEX(float4, texture_float4, __tri_verts)
+KERNEL_TEX(uint4, texture_uint4, __tri_vindex)
/* curves */
KERNEL_TEX(float4, texture_float4, __curves)
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 76d2a6b98e6..5de58ba28ed 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -292,11 +292,14 @@ enum PathRayFlag {
PATH_RAY_CURVE = 512, /* visibility flag to define curve segments */
PATH_RAY_VOLUME_SCATTER = 1024, /* volume scattering */
- PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512|1024),
+ /* Special flag to tag unaligned BVH nodes. */
+ PATH_RAY_NODE_UNALIGNED = 2048,
- PATH_RAY_MIS_SKIP = 2048,
- PATH_RAY_DIFFUSE_ANCESTOR = 4096,
- PATH_RAY_SINGLE_PASS_DONE = 8192,
+ PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512|1024|2048),
+
+ PATH_RAY_MIS_SKIP = 4096,
+ PATH_RAY_DIFFUSE_ANCESTOR = 8192,
+ PATH_RAY_SINGLE_PASS_DONE = 16384,
};
/* Closure Label */
@@ -769,7 +772,7 @@ typedef ccl_addr_space struct ShaderData {
int type;
/* parametric coordinates
- * - barycentric weights for triangles */
+ * - barycentric weights for triangles */
float u;
float v;
/* object id if there is one, ~0 otherwise */
@@ -792,14 +795,14 @@ typedef ccl_addr_space struct ShaderData {
#endif
#ifdef __DPDU__
/* differential of P w.r.t. parametric coordinates. note that dPdu is
- * not readily suitable as a tangent for shading on triangles. */
+ * not readily suitable as a tangent for shading on triangles. */
float3 dPdu;
float3 dPdv;
#endif
#ifdef __OBJECT_MOTION__
/* object <-> world space transformations, cached to avoid
- * re-interpolating them constantly for shading */
+ * re-interpolating them constantly for shading */
Transform ob_tfm;
Transform ob_itfm;
#endif
@@ -1171,11 +1174,11 @@ typedef ccl_addr_space struct DebugData {
#define QUEUE_EMPTY_SLOT -1
/*
-* Queue 1 - Active rays
-* Queue 2 - Background queue
-* Queue 3 - Shadow ray cast kernel - AO
-* Queeu 4 - Shadow ray cast kernel - direct lighting
-*/
+ * Queue 1 - Active rays
+ * Queue 2 - Background queue
+ * Queue 3 - Shadow ray cast kernel - AO
+ * Queeu 4 - Shadow ray cast kernel - direct lighting
+ */
#define NUM_QUEUES 4
/* Queue names */
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index aad06ed5c76..37907cd8fdc 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -35,6 +35,7 @@
# include "../../kernel_montecarlo.h"
# include "../../kernel_projection.h"
# include "../../geom/geom.h"
+# include "../../bvh/bvh.h"
# include "../../kernel_accumulate.h"
# include "../../kernel_camera.h"
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index ebe739ebd0e..2bb2be5e6b3 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -47,6 +47,7 @@
#include "kernel_camera.h"
#include "kernels/cpu/kernel_cpu_image.h"
#include "geom/geom.h"
+#include "bvh/bvh.h"
#include "kernel_projection.h"
#include "kernel_accumulate.h"
@@ -912,7 +913,7 @@ bool OSLRenderServices::texture(ustring filename,
#endif
bool status;
- if(filename[0] == '@') {
+ if(filename.length() && filename[0] == '@') {
int slot = atoi(filename.c_str() + 1);
float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t);
@@ -993,7 +994,7 @@ bool OSLRenderServices::texture3d(ustring filename,
}
bool status;
- if(filename[0] == '@') {
+ if(filename.length() && filename[0] == '@') {
int slot = atoi(filename.c_str() + 1);
float4 rgba = kernel_tex_image_interp_3d(slot, P.x, P.y, P.z);
diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt
index 49030f33c26..b43f8402d42 100644
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -81,6 +81,7 @@ set(SRC_OSL
node_wireframe.osl
node_hair_bsdf.osl
node_uv_map.osl
+ node_rgb_to_bw.osl
)
set(SRC_OSL_HEADERS
diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl
index a00401845c8..7cd2922dd4f 100644
--- a/intern/cycles/kernel/shaders/node_image_texture.osl
+++ b/intern/cycles/kernel/shaders/node_image_texture.osl
@@ -88,7 +88,7 @@ shader node_image_texture(
string color_space = "sRGB",
string projection = "flat",
string interpolation = "smartcubic",
- string wrap = "periodic",
+ string extension = "periodic",
float projection_blend = 0.0,
int is_float = 1,
int use_alpha = 1,
@@ -108,7 +108,7 @@ shader node_image_texture(
use_alpha,
is_float,
interpolation,
- wrap);
+ extension);
}
else if (projection == "box") {
/* object space normal */
@@ -184,7 +184,7 @@ shader node_image_texture(
use_alpha,
is_float,
interpolation,
- wrap);
+ extension);
Alpha += weight[0] * tmp_alpha;
}
if (weight[1] > 0.0) {
@@ -195,7 +195,7 @@ shader node_image_texture(
use_alpha,
is_float,
interpolation,
- wrap);
+ extension);
Alpha += weight[1] * tmp_alpha;
}
if (weight[2] > 0.0) {
@@ -206,7 +206,7 @@ shader node_image_texture(
use_alpha,
is_float,
interpolation,
- wrap);
+ extension);
Alpha += weight[2] * tmp_alpha;
}
}
@@ -219,7 +219,7 @@ shader node_image_texture(
use_alpha,
is_float,
interpolation,
- wrap);
+ extension);
}
else if (projection == "tube") {
point projected = map_to_tube(texco_remap_square(p));
@@ -230,6 +230,6 @@ shader node_image_texture(
use_alpha,
is_float,
interpolation,
- wrap);
+ extension);
}
}
diff --git a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
new file mode 100644
index 00000000000..903dfcdc881
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdosl.h"
+
+shader node_rgb_to_bw(
+ color Color = 0.0,
+ output float Val = 0.0)
+{
+ Val = Color[0] * 0.2126 + Color[1] * 0.7152 + Color[2] * 0.0722;
+}
+
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index e1c7e2cea99..88d6dab04d0 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -31,6 +31,7 @@
#include "kernel_camera.h"
#include "geom/geom.h"
+#include "bvh/bvh.h"
#include "kernel_accumulate.h"
#include "kernel_shader.h"
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index aa9c07c867e..44732734c31 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -72,8 +72,16 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
uint width = info.x;
uint height = info.y;
uint offset = info.z;
- uint periodic = (info.w & 0x1);
- uint interpolation = info.w >> 1;
+
+ /* Image Options */
+ uint interpolation = (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
+ uint extension;
+ if(info.w & (1 << 1))
+ extension = EXTENSION_REPEAT;
+ else if(info.w & (1 << 2))
+ extension = EXTENSION_EXTEND;
+ else
+ extension = EXTENSION_CLIP;
float4 r;
int ix, iy, nix, niy;
@@ -81,22 +89,26 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
svm_image_texture_frac(x*width, &ix);
svm_image_texture_frac(y*height, &iy);
- if(periodic) {
+ if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
}
- else {
+ else if(extension == EXTENSION_CLIP) {
+ if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f)
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ else { /* EXTENSION_EXTEND */
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
-
}
+
r = svm_image_texture_read(kg, id, offset + ix + iy*width);
}
- else { /* We default to linear interpolation if it is not closest */
+ else { /* INTERPOLATION_LINEAR */
float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
- if(periodic) {
+ if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
@@ -104,14 +116,17 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
niy = svm_image_texture_wrap_periodic(iy+1, height);
}
else {
- ix = svm_image_texture_wrap_clamp(ix, width);
- iy = svm_image_texture_wrap_clamp(iy, height);
-
+ if(extension == EXTENSION_CLIP) {
+ if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
nix = svm_image_texture_wrap_clamp(ix+1, width);
niy = svm_image_texture_wrap_clamp(iy+1, height);
+ ix = svm_image_texture_wrap_clamp(ix, width);
+ iy = svm_image_texture_wrap_clamp(iy, height);
}
-
r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width);
r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width);
r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index ecde2e99a7b..614620c14af 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -1076,6 +1076,26 @@ void ImageManager::device_update_slot(Device *device,
}
}
+uint8_t ImageManager::pack_image_options(ImageDataType type, size_t slot)
+{
+ uint8_t options = 0;
+
+ /* Image Options are packed into one uint:
+ * bit 0 -> Interpolation
+ * bit 1 + 2 + 3-> Extension */
+ if(images[type][slot]->interpolation == INTERPOLATION_CLOSEST)
+ options |= (1 << 0);
+
+ if(images[type][slot]->extension == EXTENSION_REPEAT)
+ options |= (1 << 1);
+ else if(images[type][slot]->extension == EXTENSION_EXTEND)
+ options |= (1 << 2);
+ else /* EXTENSION_CLIP */
+ options |= (1 << 3);
+
+ return options;
+}
+
void ImageManager::device_pack_images(Device *device,
DeviceScene *dscene,
Progress& /*progess*/)
@@ -1107,11 +1127,9 @@ void ImageManager::device_pack_images(Device *device,
device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
- /* The image options are packed
- bit 0 -> periodic
- bit 1 + 2 -> interpolation type */
- uint8_t interpolation = (images[type][slot]->interpolation << 1) + 1;
- info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation);
+ uint8_t options = pack_image_options(type, slot);
+
+ info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
@@ -1139,11 +1157,8 @@ void ImageManager::device_pack_images(Device *device,
/* todo: support 3D textures, only CPU for now */
- /* The image options are packed
- bit 0 -> periodic
- bit 1 + 2 -> interpolation type */
- uint8_t interpolation = (images[type][slot]->interpolation << 1) + 1;
- info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation);
+ uint8_t options = pack_image_options(type, slot);
+ info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index 01d02f4dbec..07998684b23 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -122,6 +122,8 @@ private:
int flattened_slot_to_type_index(int flat_slot, ImageDataType *type);
string name_from_type(int type);
+ uint8_t pack_image_options(ImageDataType type, size_t slot);
+
void device_load_image(Device *device, DeviceScene *dscene, ImageDataType type, int slot, Progress *progess);
void device_free_image(Device *device, DeviceScene *dscene, ImageDataType type, int slot);
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 764a925983e..661719ed545 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -73,6 +73,37 @@ void Mesh::Curve::bounds_grow(const int k, const float3 *curve_keys, const float
bounds.grow(upper, mr);
}
+void Mesh::Curve::bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ const Transform& aligned_space,
+ BoundBox& bounds) const
+{
+ float3 P[4];
+
+ P[0] = curve_keys[max(first_key + k - 1,first_key)];
+ P[1] = curve_keys[first_key + k];
+ P[2] = curve_keys[first_key + k + 1];
+ P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)];
+
+ P[0] = transform_point(&aligned_space, P[0]);
+ P[1] = transform_point(&aligned_space, P[1]);
+ P[2] = transform_point(&aligned_space, P[2]);
+ P[3] = transform_point(&aligned_space, P[3]);
+
+ float3 lower;
+ float3 upper;
+
+ curvebounds(&lower.x, &upper.x, P, 0);
+ curvebounds(&lower.y, &upper.y, P, 1);
+ curvebounds(&lower.z, &upper.z, P, 2);
+
+ float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]);
+
+ bounds.grow(lower, mr);
+ bounds.grow(upper, mr);
+}
+
/* Mesh */
NODE_DEFINE(Mesh)
@@ -472,30 +503,19 @@ void Mesh::pack_normals(Scene *scene, uint *tri_shader, float4 *vnormal)
}
}
-void Mesh::pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset)
+void Mesh::pack_verts(const vector<uint>& tri_prim_index,
+ uint4 *tri_vindex,
+ size_t vert_offset,
+ size_t tri_offset)
{
- size_t verts_size = verts.size();
-
- if(verts_size) {
- float3 *verts_ptr = &verts[0];
-
- for(size_t i = 0; i < verts_size; i++) {
- float3 p = verts_ptr[i];
- tri_verts[i] = make_float4(p.x, p.y, p.z, 0.0f);
- }
- }
-
- size_t triangles_size = num_triangles();
-
+ const size_t triangles_size = num_triangles();
if(triangles_size) {
for(size_t i = 0; i < triangles_size; i++) {
Triangle t = get_triangle(i);
-
- tri_vindex[i] = make_float4(
- __int_as_float(t.v[0] + vert_offset),
- __int_as_float(t.v[1] + vert_offset),
- __int_as_float(t.v[2] + vert_offset),
- 0);
+ tri_vindex[i] = make_uint4(t.v[0] + vert_offset,
+ t.v[1] + vert_offset,
+ t.v[2] + vert_offset,
+ tri_prim_index[i + tri_offset]);
}
}
}
@@ -533,7 +553,11 @@ void Mesh::pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, s
}
}
-void Mesh::compute_bvh(SceneParams *params, Progress *progress, int n, int total)
+void Mesh::compute_bvh(DeviceScene *dscene,
+ SceneParams *params,
+ Progress *progress,
+ int n,
+ int total)
{
if(progress->get_cancel())
return;
@@ -564,6 +588,7 @@ void Mesh::compute_bvh(SceneParams *params, Progress *progress, int n, int total
BVHParams bparams;
bparams.use_spatial_split = params->use_bvh_spatial_split;
bparams.use_qbvh = params->use_qbvh;
+ bparams.use_unaligned_nodes = dscene->data.bvh.have_curves;
delete bvh;
bvh = BVH::create(bparams, objects);
@@ -1070,42 +1095,82 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene,
}
}
-void MeshManager::device_update_mesh(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
+void MeshManager::mesh_calc_offset(Scene *scene)
{
- /* count and update offsets */
size_t vert_size = 0;
size_t tri_size = 0;
-
size_t curve_key_size = 0;
size_t curve_size = 0;
foreach(Mesh *mesh, scene->meshes) {
mesh->vert_offset = vert_size;
mesh->tri_offset = tri_size;
-
mesh->curvekey_offset = curve_key_size;
mesh->curve_offset = curve_size;
vert_size += mesh->verts.size();
tri_size += mesh->num_triangles();
-
curve_key_size += mesh->curve_keys.size();
curve_size += mesh->num_curves();
}
+}
+void MeshManager::device_update_mesh(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ bool for_displacement,
+ Progress& progress)
+{
+ /* Count. */
+ size_t vert_size = 0;
+ size_t tri_size = 0;
+ size_t curve_key_size = 0;
+ size_t curve_size = 0;
+ foreach(Mesh *mesh, scene->meshes) {
+ vert_size += mesh->verts.size();
+ tri_size += mesh->num_triangles();
+ curve_key_size += mesh->curve_keys.size();
+ curve_size += mesh->num_curves();
+ }
+ /* Create mapping from triangle to primitive triangle array. */
+ vector<uint> tri_prim_index(tri_size);
+ if(for_displacement) {
+ /* For displacement kernels we do some trickery to make them believe
+ * we've got all required data ready. However, that data is different
+ * from final render kernels since we don't have BVH yet, so can't
+ * really use same semantic of arrays.
+ */
+ foreach(Mesh *mesh, scene->meshes) {
+ for(size_t i = 0; i < mesh->num_triangles(); ++i) {
+ tri_prim_index[i + mesh->tri_offset] = 3 * (i + mesh->tri_offset);
+ }
+ }
+ }
+ else {
+ PackedBVH& pack = bvh->pack;
+ for(size_t i = 0; i < pack.prim_index.size(); ++i) {
+ if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
+ tri_prim_index[pack.prim_index[i]] = pack.prim_tri_index[i];
+ }
+ }
+ }
+ /* Fill in all the arrays. */
if(tri_size != 0) {
/* normals */
progress.set_status("Updating Mesh", "Computing normals");
uint *tri_shader = dscene->tri_shader.resize(tri_size);
float4 *vnormal = dscene->tri_vnormal.resize(vert_size);
- float4 *tri_verts = dscene->tri_verts.resize(vert_size);
- float4 *tri_vindex = dscene->tri_vindex.resize(tri_size);
+ uint4 *tri_vindex = dscene->tri_vindex.resize(tri_size);
foreach(Mesh *mesh, scene->meshes) {
- mesh->pack_normals(scene, &tri_shader[mesh->tri_offset], &vnormal[mesh->vert_offset]);
- mesh->pack_verts(&tri_verts[mesh->vert_offset], &tri_vindex[mesh->tri_offset], mesh->vert_offset);
-
+ mesh->pack_normals(scene,
+ &tri_shader[mesh->tri_offset],
+ &vnormal[mesh->vert_offset]);
+ mesh->pack_verts(tri_prim_index,
+ &tri_vindex[mesh->tri_offset],
+ mesh->vert_offset,
+ mesh->tri_offset);
if(progress.get_cancel()) return;
}
@@ -1114,10 +1179,8 @@ void MeshManager::device_update_mesh(Device *device, DeviceScene *dscene, Scene
device->tex_alloc("__tri_shader", dscene->tri_shader);
device->tex_alloc("__tri_vnormal", dscene->tri_vnormal);
- device->tex_alloc("__tri_verts", dscene->tri_verts);
device->tex_alloc("__tri_vindex", dscene->tri_vindex);
}
-
if(curve_size != 0) {
progress.set_status("Updating Mesh", "Copying Strands to device");
@@ -1132,6 +1195,19 @@ void MeshManager::device_update_mesh(Device *device, DeviceScene *dscene, Scene
device->tex_alloc("__curve_keys", dscene->curve_keys);
device->tex_alloc("__curves", dscene->curves);
}
+ if(for_displacement) {
+ float4 *prim_tri_verts = dscene->prim_tri_verts.resize(tri_size * 3);
+ foreach(Mesh *mesh, scene->meshes) {
+ for(size_t i = 0; i < mesh->num_triangles(); ++i) {
+ Mesh::Triangle t = mesh->get_triangle(i);
+ size_t offset = 3 * (i + mesh->tri_offset);
+ prim_tri_verts[offset + 0] = float3_to_float4(mesh->verts[t.v[0]]);
+ prim_tri_verts[offset + 1] = float3_to_float4(mesh->verts[t.v[1]]);
+ prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]);
+ }
+ }
+ device->tex_alloc("__prim_tri_verts", dscene->prim_tri_verts);
+ }
}
void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
@@ -1146,6 +1222,7 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
bparams.top_level = true;
bparams.use_qbvh = scene->params.use_qbvh;
bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
+ bparams.use_unaligned_nodes = dscene->data.bvh.have_curves;
delete bvh;
bvh = BVH::create(bparams, scene->objects);
@@ -1170,9 +1247,13 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
dscene->object_node.reference((uint*)&pack.object_node[0], pack.object_node.size());
device->tex_alloc("__object_node", dscene->object_node);
}
- if(pack.tri_storage.size()) {
- dscene->tri_storage.reference(&pack.tri_storage[0], pack.tri_storage.size());
- device->tex_alloc("__tri_storage", dscene->tri_storage);
+ if(pack.prim_tri_index.size()) {
+ dscene->prim_tri_index.reference((uint*)&pack.prim_tri_index[0], pack.prim_tri_index.size());
+ device->tex_alloc("__prim_tri_index", dscene->prim_tri_index);
+ }
+ if(pack.prim_tri_verts.size()) {
+ dscene->prim_tri_verts.reference((float4*)&pack.prim_tri_verts[0], pack.prim_tri_verts.size());
+ device->tex_alloc("__prim_tri_verts", dscene->prim_tri_verts);
}
if(pack.prim_type.size()) {
dscene->prim_type.reference((uint*)&pack.prim_type[0], pack.prim_type.size());
@@ -1273,7 +1354,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
VLOG(1) << "Total " << scene->meshes.size() << " meshes.";
- /* update normals */
+ /* Update normals. */
foreach(Mesh *mesh, scene->meshes) {
foreach(Shader *shader, mesh->used_shaders) {
if(shader->need_update_attributes)
@@ -1289,17 +1370,17 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
}
/* Update images needed for true displacement. */
- bool need_displacement_images = false;
+ bool true_displacement_used = false;
bool old_need_object_flags_update = false;
foreach(Mesh *mesh, scene->meshes) {
if(mesh->need_update &&
mesh->displacement_method != Mesh::DISPLACE_BUMP)
{
- need_displacement_images = true;
+ true_displacement_used = true;
break;
}
}
- if(need_displacement_images) {
+ if(true_displacement_used) {
VLOG(1) << "Updating images used for true displacement.";
device_update_displacement_images(device, dscene, scene, progress);
old_need_object_flags_update = scene->object_manager->need_flags_update;
@@ -1310,49 +1391,52 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
false);
}
- /* device update */
+ /* Device update. */
device_free(device, dscene);
- device_update_mesh(device, dscene, scene, progress);
+ mesh_calc_offset(scene);
+ if(true_displacement_used) {
+ device_update_mesh(device, dscene, scene, true, progress);
+ }
if(progress.get_cancel()) return;
device_update_attributes(device, dscene, scene, progress);
if(progress.get_cancel()) return;
- /* update displacement */
+ /* Update displacement. */
bool displacement_done = false;
-
- foreach(Mesh *mesh, scene->meshes)
- if(mesh->need_update && displace(device, dscene, scene, mesh, progress))
+ foreach(Mesh *mesh, scene->meshes) {
+ if(mesh->need_update &&
+ displace(device, dscene, scene, mesh, progress))
+ {
displacement_done = true;
+ }
+ }
- /* todo: properly handle cancel halfway displacement */
+ /* TODO: properly handle cancel halfway displacement */
if(progress.get_cancel()) return;
- /* device re-update after displacement */
+ /* Device re-update after displacement. */
if(displacement_done) {
device_free(device, dscene);
- device_update_mesh(device, dscene, scene, progress);
- if(progress.get_cancel()) return;
-
device_update_attributes(device, dscene, scene, progress);
if(progress.get_cancel()) return;
}
- /* update bvh */
+ /* Update bvh. */
size_t i = 0, num_bvh = 0;
-
- foreach(Mesh *mesh, scene->meshes)
- if(mesh->need_update && mesh->need_build_bvh())
+ foreach(Mesh *mesh, scene->meshes) {
+ if(mesh->need_update && mesh->need_build_bvh()) {
num_bvh++;
-
+ }
+ }
TaskPool pool;
-
foreach(Mesh *mesh, scene->meshes) {
if(mesh->need_update) {
pool.push(function_bind(&Mesh::compute_bvh,
mesh,
+ dscene,
&scene->params,
&progress,
i,
@@ -1362,14 +1446,14 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
}
}
}
-
TaskPool::Summary summary;
pool.wait_work(&summary);
VLOG(2) << "Objects BVH build pool statistics:\n"
<< summary.full_report();
- foreach(Shader *shader, scene->shaders)
+ foreach(Shader *shader, scene->shaders) {
shader->need_update_attributes = false;
+ }
#ifdef __OBJECT_MOTION__
Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading);
@@ -1378,18 +1462,23 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
bool motion_blur = false;
#endif
- /* update obejcts */
+ /* Update objects. */
vector<Object *> volume_objects;
- foreach(Object *object, scene->objects)
+ foreach(Object *object, scene->objects) {
object->compute_bounds(motion_blur);
+ }
if(progress.get_cancel()) return;
device_update_bvh(device, dscene, scene, progress);
+ if(progress.get_cancel()) return;
+
+ device_update_mesh(device, dscene, scene, false, progress);
+ if(progress.get_cancel()) return;
need_update = false;
- if(need_displacement_images) {
+ if(true_displacement_used) {
/* Re-tag flags for update, so they're re-evaluated
* for meshes with correct bounding boxes.
*
@@ -1405,7 +1494,8 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene)
device->tex_free(dscene->bvh_nodes);
device->tex_free(dscene->bvh_leaf_nodes);
device->tex_free(dscene->object_node);
- device->tex_free(dscene->tri_storage);
+ device->tex_free(dscene->prim_tri_verts);
+ device->tex_free(dscene->prim_tri_index);
device->tex_free(dscene->prim_type);
device->tex_free(dscene->prim_visibility);
device->tex_free(dscene->prim_index);
@@ -1413,7 +1503,6 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene)
device->tex_free(dscene->tri_shader);
device->tex_free(dscene->tri_vnormal);
device->tex_free(dscene->tri_vindex);
- device->tex_free(dscene->tri_verts);
device->tex_free(dscene->curves);
device->tex_free(dscene->curve_keys);
device->tex_free(dscene->attributes_map);
@@ -1423,7 +1512,8 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene)
dscene->bvh_nodes.clear();
dscene->object_node.clear();
- dscene->tri_storage.clear();
+ dscene->prim_tri_verts.clear();
+ dscene->prim_tri_index.clear();
dscene->prim_type.clear();
dscene->prim_visibility.clear();
dscene->prim_index.clear();
@@ -1431,7 +1521,6 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene)
dscene->tri_shader.clear();
dscene->tri_vnormal.clear();
dscene->tri_vindex.clear();
- dscene->tri_verts.clear();
dscene->curves.clear();
dscene->curve_keys.clear();
dscene->attributes_map.clear();
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index edad6d32f00..0aea55544f2 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -72,7 +72,15 @@ public:
int num_segments() { return num_keys - 1; }
- void bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, BoundBox& bounds) const;
+ void bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ BoundBox& bounds) const;
+ void bounds_grow(const int k,
+ const float3 *curve_keys,
+ const float *curve_radius,
+ const Transform& aligned_space,
+ BoundBox& bounds) const;
};
Curve get_curve(size_t i) const
@@ -167,9 +175,16 @@ public:
void add_vertex_normals();
void pack_normals(Scene *scene, uint *shader, float4 *vnormal);
- void pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset);
+ void pack_verts(const vector<uint>& tri_prim_index,
+ uint4 *tri_vindex,
+ size_t vert_offset,
+ size_t tri_offset);
void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset);
- void compute_bvh(SceneParams *params, Progress *progress, int n, int total);
+ void compute_bvh(DeviceScene *dscene,
+ SceneParams *params,
+ Progress *progress,
+ int n,
+ int total);
bool need_attribute(Scene *scene, AttributeStandard std);
bool need_attribute(Scene *scene, ustring name);
@@ -213,15 +228,41 @@ public:
void update_svm_attributes(Device *device, DeviceScene *dscene, Scene *scene, vector<AttributeRequestSet>& mesh_attributes);
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
- void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
- void device_update_mesh(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
- void device_update_attributes(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
- void device_update_bvh(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
void device_update_flags(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
- void device_update_displacement_images(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
+
void device_free(Device *device, DeviceScene *dscene);
void tag_update(Scene *scene);
+
+protected:
+ /* Calculate verts/triangles/curves offsets in global arrays. */
+ void mesh_calc_offset(Scene *scene);
+
+ void device_update_object(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress& progress);
+
+ void device_update_mesh(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ bool for_displacement,
+ Progress& progress);
+
+ void device_update_attributes(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress& progress);
+
+ void device_update_bvh(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress& progress);
+
+ void device_update_displacement_images(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress& progress);
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 87020823df9..15b55d17301 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -611,10 +611,10 @@ static float sky_perez_function(float lam[6], float theta, float gamma)
static void sky_texture_precompute_old(SunSky *sunsky, float3 dir, float turbidity)
{
/*
- * We re-use the SunSky struct of the new model, to avoid extra variables
- * zenith_Y/x/y is now radiance_x/y/z
- * perez_Y/x/y is now config_x/y/z
- */
+ * We re-use the SunSky struct of the new model, to avoid extra variables
+ * zenith_Y/x/y is now radiance_x/y/z
+ * perez_Y/x/y is now config_x/y/z
+ */
float2 spherical = sky_spherical_coordinates(dir);
float theta = spherical.x;
@@ -1596,7 +1596,7 @@ void RGBToBWNode::compile(SVMCompiler& compiler)
void RGBToBWNode::compile(OSLCompiler& compiler)
{
- compiler.add(this, "node_convert_from_color");
+ compiler.add(this, "node_rgb_to_bw");
}
/* Convert */
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index b821e2b6475..925e84ad96d 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -63,7 +63,8 @@ public:
device_vector<float4> bvh_nodes;
device_vector<float4> bvh_leaf_nodes;
device_vector<uint> object_node;
- device_vector<float4> tri_storage;
+ device_vector<uint> prim_tri_index;
+ device_vector<float4> prim_tri_verts;
device_vector<uint> prim_type;
device_vector<uint> prim_visibility;
device_vector<uint> prim_index;
@@ -72,8 +73,7 @@ public:
/* mesh */
device_vector<uint> tri_shader;
device_vector<float4> tri_vnormal;
- device_vector<float4> tri_vindex;
- device_vector<float4> tri_verts;
+ device_vector<uint4> tri_vindex;
device_vector<float4> curves;
device_vector<float4> curve_keys;
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
index cef5adc0a61..599222da9c5 100644
--- a/intern/cycles/util/util_boundbox.h
+++ b/intern/cycles/util/util_boundbox.h
@@ -151,7 +151,7 @@ public:
(isfinite(max.x) && isfinite(max.y) && isfinite(max.z));
}
- BoundBox transformed(const Transform *tfm)
+ BoundBox transformed(const Transform *tfm) const
{
BoundBox result = BoundBox::empty;
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index f01db64a79b..6fed18a3db8 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -127,6 +127,19 @@ ccl_device_inline Transform make_transform(float a, float b, float c, float d,
return t;
}
+/* Constructs a coordinate frame from a normalized normal. */
+ccl_device_inline Transform make_transform_frame(float3 N)
+{
+ const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N);
+ const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N);
+ const float3 dx = normalize((dot(dx0,dx0) > dot(dx1,dx1))? dx0: dx1);
+ const float3 dy = normalize(cross(N, dx));
+ return make_transform(dx.x, dx.y, dx.z, 0.0f,
+ dy.x, dy.y, dy.z, 0.0f,
+ N.x , N.y, N.z, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f);
+}
+
#ifndef __KERNEL_GPU__
ccl_device_inline Transform operator*(const Transform a, const Transform b)
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 972befa185b..257c6ad7491 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -37,6 +37,7 @@
#define ccl_device_noinline static
#define ccl_global
#define ccl_constant
+#define ccl_restrict __restrict
#define __KERNEL_WITH_SSE_ALIGN__
#if defined(_WIN32) && !defined(FREE_WINDOWS)