diff options
Diffstat (limited to 'source/blender/render/intern/raytrace')
-rw-r--r-- | source/blender/render/intern/raytrace/bvh.h | 407 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject.cpp | 534 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_hint.h | 72 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_instance.cpp | 211 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_octree.cpp | 1101 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_qbvh.cpp | 160 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_raycounter.cpp | 91 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_rtbuild.cpp | 531 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_rtbuild.h | 125 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_svbvh.cpp | 192 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_vbvh.cpp | 206 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/reorganize.h | 513 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/svbvh.h | 317 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/vbvh.h | 238 |
14 files changed, 4698 insertions, 0 deletions
diff --git a/source/blender/render/intern/raytrace/bvh.h b/source/blender/render/intern/raytrace/bvh.h new file mode 100644 index 00000000000..0f9a506762b --- /dev/null +++ b/source/blender/render/intern/raytrace/bvh.h @@ -0,0 +1,407 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/bvh.h + * \ingroup render + */ + + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" + +#include "raycounter.h" +#include "rayintersection.h" +#include "rayobject.h" +#include "rayobject_hint.h" +#include "rayobject_rtbuild.h" + +#include <assert.h> + +#ifdef __SSE__ +#include <xmmintrin.h> +#endif + +#ifndef __BVH_H__ +#define __BVH_H__ + +#ifdef __SSE__ +inline int test_bb_group4(__m128 *bb_group, const Isect *isec) +{ + const __m128 tmin0 = _mm_setzero_ps(); + const __m128 tmax0 = _mm_set_ps1(isec->dist); + + float start[3], idot_axis[3]; + copy_v3_v3(start, isec->start); + copy_v3_v3(idot_axis, isec->idot_axis); + + const __m128 tmin1 = _mm_max_ps(tmin0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[0]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) ); + const __m128 tmax1 = _mm_min_ps(tmax0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[1]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) ); + const __m128 tmin2 = _mm_max_ps(tmin1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[2]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) ); + const __m128 tmax2 = _mm_min_ps(tmax1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[3]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) ); + const __m128 tmin3 = _mm_max_ps(tmin2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[4]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) ); + const __m128 tmax3 = _mm_min_ps(tmax2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[5]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) ); + + return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3)); +} +#endif + +/* + * Determines the distance that the ray must travel to hit the bounding volume of the given node + * Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe + * [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9] + */ +static inline int rayobject_bb_intersect_test(const Isect *isec, const float *_bb) +{ + const float *bb = _bb; + + float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0]; + float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0]; + float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1]; + float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1]; + float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2]; + float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2]; + + RE_RC_COUNT(isec->raycounter->bb.test); + + if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0; + if (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) return 0; + if (t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0; + RE_RC_COUNT(isec->raycounter->bb.hit); + + return 1; +} + +/* bvh tree generics */ +template<class Tree> static void bvh_add(Tree *obj, RayObject *ob) +{ + rtbuild_add(obj->builder, ob); +} + +template<class Node> +inline bool is_leaf(Node *node) +{ + return !RE_rayobject_isAligned(node); +} + +template<class Tree> static void bvh_done(Tree *obj); + +template<class Tree> +static void bvh_free(Tree *obj) +{ + if (obj->builder) + rtbuild_free(obj->builder); + + if (obj->node_arena) + BLI_memarena_free(obj->node_arena); + + MEM_freeN(obj); +} + +template<class Tree> +static void bvh_bb(Tree *obj, float *min, float *max) +{ + if (obj->root) + bvh_node_merge_bb(obj->root, min, max); +} + + +template<class Tree> +static float bvh_cost(Tree *obj) +{ + assert(obj->cost >= 0.0f); + return obj->cost; +} + + + +/* bvh tree nodes generics */ +template<class Node> static inline int bvh_node_hit_test(Node *node, Isect *isec) +{ + return rayobject_bb_intersect_test(isec, (const float *)node->bb); +} + + +template<class Node> +static inline void bvh_node_merge_bb(Node *node, float min[3], float max[3]) +{ + if (is_leaf(node)) { + RE_rayobject_merge_bb((RayObject *)node, min, max); + } + else { + DO_MIN(node->bb, min); + DO_MAX(node->bb + 3, max); + } +} + + + +/* + * recursively transverse a BVH looking for a rayhit using a local stack + */ +template<class Node> static inline void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, int &stack_pos); + +template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT, bool SHADOW> +static int bvh_node_stack_raycast(Node *root, Isect *isec) +{ + Node *stack[MAX_STACK_SIZE]; + int hit = 0, stack_pos = 0; + + if (!TEST_ROOT && !is_leaf(root)) + bvh_node_push_childs(root, isec, stack, stack_pos); + else + stack[stack_pos++] = root; + + while (stack_pos) { + Node *node = stack[--stack_pos]; + if (!is_leaf(node)) { + if (bvh_node_hit_test(node, isec)) { + bvh_node_push_childs(node, isec, stack, stack_pos); + assert(stack_pos <= MAX_STACK_SIZE); + } + } + else { + hit |= RE_rayobject_intersect( (RayObject *)node, isec); + if (SHADOW && hit) return hit; + } + } + return hit; +} + + +#ifdef __SSE__ +/* + * Generic SIMD bvh recursion + * this was created to be able to use any simd (with the cost of some memmoves) + * it can take advantage of any SIMD width and doens't needs any special tree care + */ +template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT> +static int bvh_node_stack_raycast_simd(Node *root, Isect *isec) +{ + Node *stack[MAX_STACK_SIZE]; + + int hit = 0, stack_pos = 0; + + if (!TEST_ROOT) { + if (!is_leaf(root)) { + if (!is_leaf(root->child)) + bvh_node_push_childs(root, isec, stack, stack_pos); + else + return RE_rayobject_intersect( (RayObject *)root->child, isec); + } + else + return RE_rayobject_intersect( (RayObject *)root, isec); + } + else { + if (!is_leaf(root)) + stack[stack_pos++] = root; + else + return RE_rayobject_intersect( (RayObject *)root, isec); + } + + while (true) { + //Use SIMD 4 + if (stack_pos >= 4) { + __m128 t_bb[6]; + Node *t_node[4]; + + stack_pos -= 4; + + /* prepare the 4BB for SIMD */ + t_node[0] = stack[stack_pos + 0]->child; + t_node[1] = stack[stack_pos + 1]->child; + t_node[2] = stack[stack_pos + 2]->child; + t_node[3] = stack[stack_pos + 3]->child; + + const float *bb0 = stack[stack_pos + 0]->bb; + const float *bb1 = stack[stack_pos + 1]->bb; + const float *bb2 = stack[stack_pos + 2]->bb; + const float *bb3 = stack[stack_pos + 3]->bb; + + const __m128 x0y0x1y1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(1, 0, 1, 0) ); + const __m128 x2y2x3y3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(1, 0, 1, 0) ); + t_bb[0] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2, 0, 2, 0) ); + t_bb[1] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3, 1, 3, 1) ); + + const __m128 z0X0z1X1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(3, 2, 3, 2) ); + const __m128 z2X2z3X3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(3, 2, 3, 2) ); + t_bb[2] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2, 0, 2, 0) ); + t_bb[3] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3, 1, 3, 1) ); + + const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps(_mm_load_ps(bb0 + 4), _mm_load_ps(bb1 + 4), _MM_SHUFFLE(1, 0, 1, 0) ); + const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps(_mm_load_ps(bb2 + 4), _mm_load_ps(bb3 + 4), _MM_SHUFFLE(1, 0, 1, 0) ); + t_bb[4] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2, 0, 2, 0) ); + t_bb[5] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3, 1, 3, 1) ); +#if 0 + for (int i = 0; i < 4; i++) + { + Node *t = stack[stack_pos + i]; + assert(!is_leaf(t)); + + float *bb = ((float *)t_bb) + i; + bb[4 * 0] = t->bb[0]; + bb[4 * 1] = t->bb[1]; + bb[4 * 2] = t->bb[2]; + bb[4 * 3] = t->bb[3]; + bb[4 * 4] = t->bb[4]; + bb[4 * 5] = t->bb[5]; + t_node[i] = t->child; + } +#endif + RE_RC_COUNT(isec->raycounter->simd_bb.test); + int res = test_bb_group4(t_bb, isec); + + for (int i = 0; i < 4; i++) + if (res & (1 << i)) { + RE_RC_COUNT(isec->raycounter->simd_bb.hit); + if (!is_leaf(t_node[i])) { + for (Node *t = t_node[i]; t; t = t->sibling) { + assert(stack_pos < MAX_STACK_SIZE); + stack[stack_pos++] = t; + } + } + else { + hit |= RE_rayobject_intersect( (RayObject *)t_node[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + } + else if (stack_pos > 0) { + Node *node = stack[--stack_pos]; + assert(!is_leaf(node)); + + if (bvh_node_hit_test(node, isec)) { + if (!is_leaf(node->child)) { + bvh_node_push_childs(node, isec, stack, stack_pos); + assert(stack_pos <= MAX_STACK_SIZE); + } + else { + hit |= RE_rayobject_intersect( (RayObject *)node->child, isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + } + else break; + } + return hit; +} +#endif + +/* + * recursively transverse a BVH looking for a rayhit using system stack + */ +#if 0 +template<class Node> +static int bvh_node_raycast(Node *node, Isect *isec) +{ + int hit = 0; + if (bvh_test_node(node, isec)) + { + if (isec->idot_axis[node->split_axis] > 0.0f) + { + int i; + for (i = 0; i < BVH_NCHILDS; i++) + if (!is_leaf(node->child[i])) + { + if (node->child[i] == 0) break; + + hit |= bvh_node_raycast(node->child[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + else { + hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + else { + int i; + for (i = BVH_NCHILDS - 1; i >= 0; i--) + if (!is_leaf(node->child[i])) + { + if (node->child[i]) + { + hit |= dfs_raycast(node->child[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + else { + hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + } + return hit; +} +#endif + +template<class Node, class HintObject> +static void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject *hintObject) +{ + assert(hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE); + + if (is_leaf(node)) { + hint->stack[hint->size++] = (RayObject *)node; + } + else { + int childs = count_childs(node); + if (hint->size + reserve_space + childs <= RE_RAY_LCTS_MAX_SIZE) { + int result = hint_test_bb(hintObject, node->bb, node->bb + 3); + if (result == HINT_RECURSE) { + /* We are 100% sure the ray will be pass inside this node */ + bvh_dfs_make_hint_push_siblings(node->child, hint, reserve_space, hintObject); + } + else if (result == HINT_ACCEPT) { + hint->stack[hint->size++] = (RayObject *)node; + } + } + else { + hint->stack[hint->size++] = (RayObject *)node; + } + } +} + + +template<class Tree> +static RayObjectAPI *bvh_get_api(int maxstacksize); + + +template<class Tree, int DFS_STACK_SIZE> +static inline RayObject *bvh_create_tree(int size) +{ + Tree *obj = (Tree *)MEM_callocN(sizeof(Tree), "BVHTree"); + assert(RE_rayobject_isAligned(obj)); /* RayObject API assumes real data to be 4-byte aligned */ + + obj->rayobj.api = bvh_get_api<Tree>(DFS_STACK_SIZE); + obj->root = NULL; + + obj->node_arena = NULL; + obj->builder = rtbuild_create(size); + + return RE_rayobject_unalignRayAPI((RayObject *) obj); +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject.cpp b/source/blender/render/intern/raytrace/rayobject.cpp new file mode 100644 index 00000000000..fee877b311d --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject.cpp @@ -0,0 +1,534 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject.cpp + * \ingroup render + */ + + +#include <assert.h> + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +#include "DNA_material_types.h" + +#include "rayintersection.h" +#include "rayobject.h" +#include "raycounter.h" +#include "render_types.h" +#include "renderdatabase.h" + +/* RayFace + * + * note we force always inline here, because compiler refuses to otherwise + * because function is too long. Since this is code that is called billions + * of times we really do want to inline. */ + +MALWAYS_INLINE RayObject *rayface_from_coords(RayFace *rayface, void *ob, void *face, + float *v1, float *v2, float *v3, float *v4) +{ + rayface->ob = ob; + rayface->face = face; + + copy_v3_v3(rayface->v1, v1); + copy_v3_v3(rayface->v2, v2); + copy_v3_v3(rayface->v3, v3); + + if (v4) { + copy_v3_v3(rayface->v4, v4); + rayface->quad = 1; + } + else { + rayface->quad = 0; + } + + return RE_rayobject_unalignRayFace(rayface); +} + +MALWAYS_INLINE void rayface_from_vlak(RayFace *rayface, ObjectInstanceRen *obi, VlakRen *vlr) +{ + rayface_from_coords(rayface, obi, vlr, vlr->v1->co, vlr->v2->co, vlr->v3->co, vlr->v4 ? vlr->v4->co : NULL); + + if (obi->transform_primitives) { + mul_m4_v3(obi->mat, rayface->v1); + mul_m4_v3(obi->mat, rayface->v2); + mul_m4_v3(obi->mat, rayface->v3); + + if (RE_rayface_isQuad(rayface)) + mul_m4_v3(obi->mat, rayface->v4); + } +} + +RayObject *RE_rayface_from_vlak(RayFace *rayface, ObjectInstanceRen *obi, VlakRen *vlr) +{ + return rayface_from_coords(rayface, obi, vlr, vlr->v1->co, vlr->v2->co, vlr->v3->co, vlr->v4 ? vlr->v4->co : NULL); +} + +RayObject *RE_rayface_from_coords(RayFace *rayface, void *ob, void *face, float *v1, float *v2, float *v3, float *v4) +{ + return rayface_from_coords(rayface, ob, face, v1, v2, v3, v4); +} + +/* VlakPrimitive */ + +RayObject *RE_vlakprimitive_from_vlak(VlakPrimitive *face, struct ObjectInstanceRen *obi, struct VlakRen *vlr) +{ + face->ob = obi; + face->face = vlr; + + return RE_rayobject_unalignVlakPrimitive(face); +} + +/* Checks for ignoring faces or materials */ + +MALWAYS_INLINE int vlr_check_intersect(Isect *is, ObjectInstanceRen *obi, VlakRen *vlr) +{ + /* for baking selected to active non-traceable materials might still + * be in the raytree */ + if (!(vlr->flag & R_TRACEBLE)) + return 0; + + /* I know... cpu cycle waste, might do smarter once */ + if (is->mode == RE_RAY_MIRROR) + return !(vlr->mat->mode & MA_ONLYCAST); + else + return (vlr->mat->mode2 & MA_CASTSHADOW) && (is->lay & obi->lay); +} + +MALWAYS_INLINE int vlr_check_intersect_solid(Isect *UNUSED(is), ObjectInstanceRen *UNUSED(obi), VlakRen *vlr) +{ + /* solid material types only */ + if (vlr->mat->material_type == MA_TYPE_SURFACE) + return 1; + else + return 0; +} + +MALWAYS_INLINE int vlr_check_bake(Isect *is, ObjectInstanceRen *obi, VlakRen *UNUSED(vlr)) +{ + return (obi->obr->ob != is->userdata) && (obi->obr->ob->flag & SELECT); +} + +/* Ray Triangle/Quad Intersection */ + +static bool isect_ray_tri_watertight_no_sign_check_v3( + const float ray_origin[3], const struct IsectRayPrecalc *isect_precalc, + const float v0[3], const float v1[3], const float v2[3], + float *r_lambda, float r_uv[2]) +{ + const int kx = isect_precalc->kx; + const int ky = isect_precalc->ky; + const int kz = isect_precalc->kz; + const float sx = isect_precalc->sx; + const float sy = isect_precalc->sy; + const float sz = isect_precalc->sz; + + /* Calculate vertices relative to ray origin. */ + const float a[3] = {v0[0] - ray_origin[0], v0[1] - ray_origin[1], v0[2] - ray_origin[2]}; + const float b[3] = {v1[0] - ray_origin[0], v1[1] - ray_origin[1], v1[2] - ray_origin[2]}; + const float c[3] = {v2[0] - ray_origin[0], v2[1] - ray_origin[1], v2[2] - ray_origin[2]}; + + const float a_kx = a[kx], a_ky = a[ky], a_kz = a[kz]; + const float b_kx = b[kx], b_ky = b[ky], b_kz = b[kz]; + const float c_kx = c[kx], c_ky = c[ky], c_kz = c[kz]; + + /* Perform shear and scale of vertices. */ + const float ax = a_kx - sx * a_kz; + const float ay = a_ky - sy * a_kz; + const float bx = b_kx - sx * b_kz; + const float by = b_ky - sy * b_kz; + const float cx = c_kx - sx * c_kz; + const float cy = c_ky - sy * c_kz; + + /* Calculate scaled barycentric coordinates. */ + const float u = cx * by - cy * bx; + const float v = ax * cy - ay * cx; + const float w = bx * ay - by * ax; + float det; + + if ((u < 0.0f || v < 0.0f || w < 0.0f) && + (u > 0.0f || v > 0.0f || w > 0.0f)) + { + return false; + } + + /* Calculate determinant. */ + det = u + v + w; + if (UNLIKELY(det == 0.0f)) { + return false; + } + else { + /* Calculate scaled z-coordinates of vertices and use them to calculate + * the hit distance. + */ + const float t = (u * a_kz + v * b_kz + w * c_kz) * sz; + /* Normalize u, v and t. */ + const float inv_det = 1.0f / det; + if (r_uv) { + r_uv[0] = u * inv_det; + r_uv[1] = v * inv_det; + } + *r_lambda = t * inv_det; + return true; + } +} + +MALWAYS_INLINE int isec_tri_quad(const float start[3], + const struct IsectRayPrecalc *isect_precalc, + const RayFace *face, + float r_uv[2], float *r_lambda) +{ + float uv[2], l; + + if (isect_ray_tri_watertight_v3(start, isect_precalc, face->v1, face->v2, face->v3, &l, uv)) { + /* check if intersection is within ray length */ + if (l > -RE_RAYTRACE_EPSILON && l < *r_lambda) { + r_uv[0] = -uv[0]; + r_uv[1] = -uv[1]; + *r_lambda = l; + return 1; + } + } + + /* intersect second triangle in quad */ + if (RE_rayface_isQuad(face)) { + if (isect_ray_tri_watertight_v3(start, isect_precalc, face->v1, face->v3, face->v4, &l, uv)) { + /* check if intersection is within ray length */ + if (l > -RE_RAYTRACE_EPSILON && l < *r_lambda) { + r_uv[0] = -uv[0]; + r_uv[1] = -uv[1]; + *r_lambda = l; + return 2; + } + } + } + + return 0; +} + +/* Simpler yes/no Ray Triangle/Quad Intersection */ + +MALWAYS_INLINE int isec_tri_quad_neighbour(const float start[3], + const float dir[3], + const RayFace *face) +{ + float r[3]; + struct IsectRayPrecalc isect_precalc; + float uv[2], l; + + negate_v3_v3(r, dir); /* note, different than above function */ + + isect_ray_tri_watertight_v3_precalc(&isect_precalc, r); + + if (isect_ray_tri_watertight_no_sign_check_v3(start, &isect_precalc, face->v1, face->v2, face->v3, &l, uv)) { + return 1; + } + + /* intersect second triangle in quad */ + if (RE_rayface_isQuad(face)) { + if (isect_ray_tri_watertight_no_sign_check_v3(start, &isect_precalc, face->v1, face->v3, face->v4, &l, uv)) { + return 2; + } + } + + return 0; +} + +/* RayFace intersection with checks and neighbor verifaction included, + * Isect is modified if the face is hit. */ + +MALWAYS_INLINE int intersect_rayface(RayObject *hit_obj, RayFace *face, Isect *is) +{ + float dist, uv[2]; + int ok = 0; + + /* avoid self-intersection */ + if (is->orig.ob == face->ob && is->orig.face == face->face) + return 0; + + /* check if we should intersect this face */ + if (is->check == RE_CHECK_VLR_RENDER) { + if (vlr_check_intersect(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0) + return 0; + } + else if (is->check == RE_CHECK_VLR_NON_SOLID_MATERIAL) { + if (vlr_check_intersect(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0) + return 0; + if (vlr_check_intersect_solid(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0) + return 0; + } + else if (is->check == RE_CHECK_VLR_BAKE) { + if (vlr_check_bake(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0) + return 0; + } + + /* ray counter */ + RE_RC_COUNT(is->raycounter->faces.test); + + dist = is->dist; + ok = isec_tri_quad(is->start, &is->isect_precalc, face, uv, &dist); + + if (ok) { + + /* when a shadow ray leaves a face, it can be little outside the edges + * of it, causing intersection to be detected in its neighbor face */ + if (is->skip & RE_SKIP_VLR_NEIGHBOUR) { + if (dist < 0.1f && is->orig.ob == face->ob) { + VlakRen *a = (VlakRen *)is->orig.face; + VlakRen *b = (VlakRen *)face->face; + ObjectRen *obr = ((ObjectInstanceRen *)face->ob)->obr; + + VertRen **va, **vb; + int *org_idx_a, *org_idx_b; + int i, j; + bool is_neighbor = false; + + /* "same" vertex means either the actual same VertRen, or the same 'final org index', if available + * (autosmooth only, currently). */ + for (i = 0, va = &a->v1; !is_neighbor && i < 4 && *va; ++i, ++va) { + org_idx_a = RE_vertren_get_origindex(obr, *va, false); + for (j = 0, vb = &b->v1; !is_neighbor && j < 4 && *vb; ++j, ++vb) { + if (*va == *vb) { + is_neighbor = true; + } + else if (org_idx_a) { + org_idx_b = RE_vertren_get_origindex(obr, *vb, 0); + if (org_idx_b && *org_idx_a == *org_idx_b) { + is_neighbor = true; + } + } + } + } + + /* So there's a shared edge or vertex, let's intersect ray with self, if that's true + * we can safely return 1, otherwise we assume the intersection is invalid, 0 */ + if (is_neighbor) { + /* create RayFace from original face, transformed if necessary */ + RayFace origface; + ObjectInstanceRen *ob = (ObjectInstanceRen *)is->orig.ob; + rayface_from_vlak(&origface, ob, (VlakRen *)is->orig.face); + + if (!isec_tri_quad_neighbour(is->start, is->dir, &origface)) { + return 0; + } + } + } + } + + RE_RC_COUNT(is->raycounter->faces.hit); + + is->isect = ok; // which half of the quad + is->dist = dist; + is->u = uv[0]; is->v = uv[1]; + + is->hit.ob = face->ob; + is->hit.face = face->face; +#ifdef RT_USE_LAST_HIT + is->last_hit = hit_obj; +#endif + return 1; + } + + return 0; +} + +/* Intersection */ + +int RE_rayobject_raycast(RayObject *r, Isect *isec) +{ + int i; + + /* Pre-calculate orientation for watertight intersection checks. */ + isect_ray_tri_watertight_v3_precalc(&isec->isect_precalc, isec->dir); + + RE_RC_COUNT(isec->raycounter->raycast.test); + + /* setup vars used on raycast */ + for (i = 0; i < 3; i++) { + isec->idot_axis[i] = 1.0f / isec->dir[i]; + + isec->bv_index[2 * i] = isec->idot_axis[i] < 0.0f ? 1 : 0; + isec->bv_index[2 * i + 1] = 1 - isec->bv_index[2 * i]; + + isec->bv_index[2 * i] = i + 3 * isec->bv_index[2 * i]; + isec->bv_index[2 * i + 1] = i + 3 * isec->bv_index[2 * i + 1]; + } + +#ifdef RT_USE_LAST_HIT + /* last hit heuristic */ + if (isec->mode == RE_RAY_SHADOW && isec->last_hit) { + RE_RC_COUNT(isec->raycounter->rayshadow_last_hit.test); + + if (RE_rayobject_intersect(isec->last_hit, isec)) { + RE_RC_COUNT(isec->raycounter->raycast.hit); + RE_RC_COUNT(isec->raycounter->rayshadow_last_hit.hit); + return 1; + } + } +#endif + +#ifdef RT_USE_HINT + isec->hit_hint = 0; +#endif + + if (RE_rayobject_intersect(r, isec)) { + RE_RC_COUNT(isec->raycounter->raycast.hit); + +#ifdef RT_USE_HINT + isec->hint = isec->hit_hint; +#endif + return 1; + } + + return 0; +} + +int RE_rayobject_intersect(RayObject *r, Isect *i) +{ + if (RE_rayobject_isRayFace(r)) { + return intersect_rayface(r, (RayFace *) RE_rayobject_align(r), i); + } + else if (RE_rayobject_isVlakPrimitive(r)) { + //TODO optimize (useless copy to RayFace to avoid duplicate code) + VlakPrimitive *face = (VlakPrimitive *) RE_rayobject_align(r); + RayFace nface; + rayface_from_vlak(&nface, face->ob, face->face); + + return intersect_rayface(r, &nface, i); + } + else if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + return r->api->raycast(r, i); + } + else { + assert(0); + return 0; + } +} + +/* Building */ + +void RE_rayobject_add(RayObject *r, RayObject *o) +{ + r = RE_rayobject_align(r); + return r->api->add(r, o); +} + +void RE_rayobject_done(RayObject *r) +{ + r = RE_rayobject_align(r); + r->api->done(r); +} + +void RE_rayobject_free(RayObject *r) +{ + r = RE_rayobject_align(r); + r->api->free(r); +} + +float RE_rayobject_cost(RayObject *r) +{ + if (RE_rayobject_isRayFace(r) || RE_rayobject_isVlakPrimitive(r)) { + return 1.0f; + } + else if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + return r->api->cost(r); + } + else { + assert(0); + return 1.0f; + } +} + +/* Bounding Boxes */ + +void RE_rayobject_merge_bb(RayObject *r, float min[3], float max[3]) +{ + if (RE_rayobject_isRayFace(r)) { + RayFace *face = (RayFace *) RE_rayobject_align(r); + + DO_MINMAX(face->v1, min, max); + DO_MINMAX(face->v2, min, max); + DO_MINMAX(face->v3, min, max); + if (RE_rayface_isQuad(face)) DO_MINMAX(face->v4, min, max); + } + else if (RE_rayobject_isVlakPrimitive(r)) { + VlakPrimitive *face = (VlakPrimitive *) RE_rayobject_align(r); + RayFace nface; + rayface_from_vlak(&nface, face->ob, face->face); + + DO_MINMAX(nface.v1, min, max); + DO_MINMAX(nface.v2, min, max); + DO_MINMAX(nface.v3, min, max); + if (RE_rayface_isQuad(&nface)) DO_MINMAX(nface.v4, min, max); + } + else if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + r->api->bb(r, min, max); + } + else + assert(0); +} + +/* Hints */ + +void RE_rayobject_hint_bb(RayObject *r, RayHint *hint, float *min, float *max) +{ + if (RE_rayobject_isRayFace(r) || RE_rayobject_isVlakPrimitive(r)) { + return; + } + else if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + return r->api->hint_bb(r, hint, min, max); + } + else + assert(0); +} + +/* RayObjectControl */ + +int RE_rayobjectcontrol_test_break(RayObjectControl *control) +{ + if (control->test_break) + return control->test_break(control->data); + + return 0; +} + +void RE_rayobject_set_control(RayObject *r, void *data, RE_rayobjectcontrol_test_break_callback test_break) +{ + if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + r->control.data = data; + r->control.test_break = test_break; + } +} + diff --git a/source/blender/render/intern/raytrace/rayobject_hint.h b/source/blender/render/intern/raytrace/rayobject_hint.h new file mode 100644 index 00000000000..88a32819bd2 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_hint.h @@ -0,0 +1,72 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_hint.h + * \ingroup render + */ + + +#ifndef __RAYOBJECT_HINT_H__ +#define __RAYOBJECT_HINT_H__ + +#define HINT_RECURSE 1 +#define HINT_ACCEPT 0 +#define HINT_DISCARD -1 + +struct HintBB { + float bb[6]; +}; + +inline int hint_test_bb(HintBB *obj, float *Nmin, float *Nmax) +{ + if (bb_fits_inside(Nmin, Nmax, obj->bb, obj->bb + 3) ) + return HINT_RECURSE; + else + return HINT_ACCEPT; +} +#if 0 +struct HintFrustum { + float co[3]; + float no[4][3]; +}; + +inline int hint_test_bb(HintFrustum &obj, float *Nmin, float *Nmax) +{ + //if frustum inside BB + { + return HINT_RECURSE; + } + //if BB outside frustum + { + return HINT_DISCARD; + } + + return HINT_ACCEPT; +} +#endif + +#endif /* __RAYOBJECT_HINT_H__ */ diff --git a/source/blender/render/intern/raytrace/rayobject_instance.cpp b/source/blender/render/intern/raytrace/rayobject_instance.cpp new file mode 100644 index 00000000000..361e7963d96 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_instance.cpp @@ -0,0 +1,211 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_instance.cpp + * \ingroup render + */ + + +#include <assert.h> + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +#include "rayintersection.h" +#include "rayobject.h" + +#define RE_COST_INSTANCE (1.0f) + +static int RE_rayobject_instance_intersect(RayObject *o, Isect *isec); +static void RE_rayobject_instance_free(RayObject *o); +static void RE_rayobject_instance_bb(RayObject *o, float *min, float *max); +static float RE_rayobject_instance_cost(RayObject *o); + +static void RE_rayobject_instance_hint_bb(RayObject *UNUSED(o), RayHint *UNUSED(hint), + float *UNUSED(min), float *UNUSED(max)) +{} + +static RayObjectAPI instance_api = +{ + RE_rayobject_instance_intersect, + NULL, //static void RE_rayobject_instance_add(RayObject *o, RayObject *ob); + NULL, //static void RE_rayobject_instance_done(RayObject *o); + RE_rayobject_instance_free, + RE_rayobject_instance_bb, + RE_rayobject_instance_cost, + RE_rayobject_instance_hint_bb +}; + +typedef struct InstanceRayObject { + RayObject rayobj; + RayObject *target; + + void *ob; //Object represented by this instance + void *target_ob; //Object represented by the inner RayObject, needed to handle self-intersection + + float global2target[4][4]; + float target2global[4][4]; + +} InstanceRayObject; + + +RayObject *RE_rayobject_instance_create(RayObject *target, float transform[4][4], void *ob, void *target_ob) +{ + InstanceRayObject *obj = (InstanceRayObject *)MEM_callocN(sizeof(InstanceRayObject), "InstanceRayObject"); + assert(RE_rayobject_isAligned(obj) ); /* RayObject API assumes real data to be 4-byte aligned */ + + obj->rayobj.api = &instance_api; + obj->target = target; + obj->ob = ob; + obj->target_ob = target_ob; + + copy_m4_m4(obj->target2global, transform); + invert_m4_m4(obj->global2target, obj->target2global); + + return RE_rayobject_unalignRayAPI((RayObject *) obj); +} + +static int RE_rayobject_instance_intersect(RayObject *o, Isect *isec) +{ + InstanceRayObject *obj = (InstanceRayObject *)o; + float start[3], dir[3], idot_axis[3], dist; + int changed = 0, i, res; + + // TODO - this is disabling self intersection on instances + if (isec->orig.ob == obj->ob && obj->ob) { + changed = 1; + isec->orig.ob = obj->target_ob; + } + + // backup old values + copy_v3_v3(start, isec->start); + copy_v3_v3(dir, isec->dir); + copy_v3_v3(idot_axis, isec->idot_axis); + dist = isec->dist; + + // transform to target coordinates system + mul_m4_v3(obj->global2target, isec->start); + mul_mat3_m4_v3(obj->global2target, isec->dir); + isec->dist *= normalize_v3(isec->dir); + + // update idot_axis and bv_index + for (i = 0; i < 3; i++) { + isec->idot_axis[i] = 1.0f / isec->dir[i]; + + isec->bv_index[2 * i] = isec->idot_axis[i] < 0.0f ? 1 : 0; + isec->bv_index[2 * i + 1] = 1 - isec->bv_index[2 * i]; + + isec->bv_index[2 * i] = i + 3 * isec->bv_index[2 * i]; + isec->bv_index[2 * i + 1] = i + 3 * isec->bv_index[2 * i + 1]; + } + + // Pre-calculate orientation for watertight intersection checks. + isect_ray_tri_watertight_v3_precalc(&isec->isect_precalc, isec->dir); + + // raycast + res = RE_rayobject_intersect(obj->target, isec); + + // map dist into original coordinate space + if (res == 0) { + isec->dist = dist; + } + else { + // note we don't just multiply dist, because of possible + // non-uniform scaling in the transform matrix + float vec[3]; + + mul_v3_v3fl(vec, isec->dir, isec->dist); + mul_mat3_m4_v3(obj->target2global, vec); + + isec->dist = len_v3(vec); + isec->hit.ob = obj->ob; + +#ifdef RT_USE_LAST_HIT + // TODO support for last hit optimization in instances that can jump + // directly to the last hit face. + // For now it jumps directly to the last-hit instance root node. + isec->last_hit = RE_rayobject_unalignRayAPI((RayObject *) obj); +#endif + } + + // restore values + copy_v3_v3(isec->start, start); + copy_v3_v3(isec->dir, dir); + copy_v3_v3(isec->idot_axis, idot_axis); + + if (changed) + isec->orig.ob = obj->ob; + + // restore bv_index + for (i = 0; i < 3; i++) { + isec->bv_index[2 * i] = isec->idot_axis[i] < 0.0f ? 1 : 0; + isec->bv_index[2 * i + 1] = 1 - isec->bv_index[2 * i]; + + isec->bv_index[2 * i] = i + 3 * isec->bv_index[2 * i]; + isec->bv_index[2 * i + 1] = i + 3 * isec->bv_index[2 * i + 1]; + } + + // Pre-calculate orientation for watertight intersection checks. + isect_ray_tri_watertight_v3_precalc(&isec->isect_precalc, isec->dir); + + return res; +} + +static void RE_rayobject_instance_free(RayObject *o) +{ + InstanceRayObject *obj = (InstanceRayObject *)o; + MEM_freeN(obj); +} + +static float RE_rayobject_instance_cost(RayObject *o) +{ + InstanceRayObject *obj = (InstanceRayObject *)o; + return RE_rayobject_cost(obj->target) + RE_COST_INSTANCE; +} + +static void RE_rayobject_instance_bb(RayObject *o, float *min, float *max) +{ + //TODO: + // *better bb.. calculated without rotations of bb + // *maybe cache that better-fitted-BB at the InstanceRayObject + InstanceRayObject *obj = (InstanceRayObject *)o; + + float m[3], M[3], t[3]; + int i, j; + INIT_MINMAX(m, M); + RE_rayobject_merge_bb(obj->target, m, M); + + //There must be a faster way than rotating all the 8 vertexs of the BB + for (i = 0; i < 8; i++) { + for (j = 0; j < 3; j++) t[j] = (i & (1 << j)) ? M[j] : m[j]; + mul_m4_v3(obj->target2global, t); + DO_MINMAX(t, min, max); + } +} + diff --git a/source/blender/render/intern/raytrace/rayobject_octree.cpp b/source/blender/render/intern/raytrace/rayobject_octree.cpp new file mode 100644 index 00000000000..4b73e64ca45 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_octree.cpp @@ -0,0 +1,1101 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 1990-1998 NeoGeo BV. + * All rights reserved. + * + * Contributors: 2004/2005 Blender Foundation, full recode + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_octree.cpp + * \ingroup render + */ + + +/* IMPORTANT NOTE: this code must be independent of any other render code + * to use it outside the renderer! */ + +#include <math.h> +#include <string.h> +#include <stdlib.h> +#include <float.h> +#include <assert.h> + +#include "MEM_guardedalloc.h" + +#include "DNA_material_types.h" + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +#include "rayintersection.h" +#include "rayobject.h" + +/* ********** structs *************** */ +#define BRANCH_ARRAY 1024 +#define NODE_ARRAY 4096 + +typedef struct Branch { + struct Branch *b[8]; +} Branch; + +typedef struct OcVal { + short ocx, ocy, ocz; +} OcVal; + +typedef struct Node { + struct RayFace *v[8]; + struct OcVal ov[8]; + struct Node *next; +} Node; + +typedef struct Octree { + RayObject rayobj; + + struct Branch **adrbranch; + struct Node **adrnode; + float ocsize; /* ocsize: mult factor, max size octree */ + float ocfacx, ocfacy, ocfacz; + float min[3], max[3]; + int ocres; + int branchcount, nodecount; + + /* during building only */ + char *ocface; + + RayFace **ro_nodes; + int ro_nodes_size, ro_nodes_used; + +} Octree; + +static int RE_rayobject_octree_intersect(RayObject *o, Isect *isec); +static void RE_rayobject_octree_add(RayObject *o, RayObject *ob); +static void RE_rayobject_octree_done(RayObject *o); +static void RE_rayobject_octree_free(RayObject *o); +static void RE_rayobject_octree_bb(RayObject *o, float *min, float *max); + +/* + * This function is not expected to be called by current code state. + */ +static float RE_rayobject_octree_cost(RayObject *UNUSED(o)) +{ + return 1.0; +} + +static void RE_rayobject_octree_hint_bb(RayObject *UNUSED(o), RayHint *UNUSED(hint), + float *UNUSED(min), float *UNUSED(max)) +{ + return; +} + +static RayObjectAPI octree_api = +{ + RE_rayobject_octree_intersect, + RE_rayobject_octree_add, + RE_rayobject_octree_done, + RE_rayobject_octree_free, + RE_rayobject_octree_bb, + RE_rayobject_octree_cost, + RE_rayobject_octree_hint_bb +}; + +/* **************** ocval method ******************* */ +/* within one octree node, a set of 3x15 bits defines a 'boundbox' to OR with */ + +#define OCVALRES 15 +#define BROW16(min, max) \ + (((max) >= OCVALRES ? 0xFFFF : (1 << ((max) + 1)) - 1) - (((min) > 0) ? ((1 << (min)) - 1) : 0)) + +static void calc_ocval_face(float *v1, float *v2, float *v3, float *v4, short x, short y, short z, OcVal *ov) +{ + float min[3], max[3]; + int ocmin, ocmax; + + copy_v3_v3(min, v1); + copy_v3_v3(max, v1); + DO_MINMAX(v2, min, max); + DO_MINMAX(v3, min, max); + if (v4) { + DO_MINMAX(v4, min, max); + } + + ocmin = OCVALRES * (min[0] - x); + ocmax = OCVALRES * (max[0] - x); + ov->ocx = BROW16(ocmin, ocmax); + + ocmin = OCVALRES * (min[1] - y); + ocmax = OCVALRES * (max[1] - y); + ov->ocy = BROW16(ocmin, ocmax); + + ocmin = OCVALRES * (min[2] - z); + ocmax = OCVALRES * (max[2] - z); + ov->ocz = BROW16(ocmin, ocmax); + +} + +static void calc_ocval_ray(OcVal *ov, float xo, float yo, float zo, float *vec1, float *vec2) +{ + int ocmin, ocmax; + + if (vec1[0] < vec2[0]) { + ocmin = OCVALRES * (vec1[0] - xo); + ocmax = OCVALRES * (vec2[0] - xo); + } + else { + ocmin = OCVALRES * (vec2[0] - xo); + ocmax = OCVALRES * (vec1[0] - xo); + } + ov->ocx = BROW16(ocmin, ocmax); + + if (vec1[1] < vec2[1]) { + ocmin = OCVALRES * (vec1[1] - yo); + ocmax = OCVALRES * (vec2[1] - yo); + } + else { + ocmin = OCVALRES * (vec2[1] - yo); + ocmax = OCVALRES * (vec1[1] - yo); + } + ov->ocy = BROW16(ocmin, ocmax); + + if (vec1[2] < vec2[2]) { + ocmin = OCVALRES * (vec1[2] - zo); + ocmax = OCVALRES * (vec2[2] - zo); + } + else { + ocmin = OCVALRES * (vec2[2] - zo); + ocmax = OCVALRES * (vec1[2] - zo); + } + ov->ocz = BROW16(ocmin, ocmax); +} + +/* ************* octree ************** */ + +static Branch *addbranch(Octree *oc, Branch *br, short ocb) +{ + int index; + + if (br->b[ocb]) return br->b[ocb]; + + oc->branchcount++; + index = oc->branchcount >> 12; + + if (oc->adrbranch[index] == NULL) + oc->adrbranch[index] = (Branch *)MEM_callocN(4096 * sizeof(Branch), "new oc branch"); + + if (oc->branchcount >= BRANCH_ARRAY * 4096) { + printf("error; octree branches full\n"); + oc->branchcount = 0; + } + + return br->b[ocb] = oc->adrbranch[index] + (oc->branchcount & 4095); +} + +static Node *addnode(Octree *oc) +{ + int index; + + oc->nodecount++; + index = oc->nodecount >> 12; + + if (oc->adrnode[index] == NULL) + oc->adrnode[index] = (Node *)MEM_callocN(4096 * sizeof(Node), "addnode"); + + if (oc->nodecount > NODE_ARRAY * NODE_ARRAY) { + printf("error; octree nodes full\n"); + oc->nodecount = 0; + } + + return oc->adrnode[index] + (oc->nodecount & 4095); +} + +static bool face_in_node(RayFace *face, short x, short y, short z, float rtf[4][3]) +{ + static float nor[3], d; + float fx, fy, fz; + + // init static vars + if (face) { + normal_tri_v3(nor, rtf[0], rtf[1], rtf[2]); + d = -nor[0] * rtf[0][0] - nor[1] * rtf[0][1] - nor[2] * rtf[0][2]; + return 0; + } + + fx = x; + fy = y; + fz = z; + + if ((fx) * nor[0] + (fy) * nor[1] + (fz) * nor[2] + d > 0.0f) { + if ((fx + 1) * nor[0] + (fy ) * nor[1] + (fz ) * nor[2] + d < 0.0f) return 1; + if ((fx ) * nor[0] + (fy + 1) * nor[1] + (fz ) * nor[2] + d < 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz ) * nor[2] + d < 0.0f) return 1; + + if ((fx ) * nor[0] + (fy ) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy ) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1; + if ((fx ) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1; + } + else { + if ((fx + 1) * nor[0] + (fy ) * nor[1] + (fz ) * nor[2] + d > 0.0f) return 1; + if ((fx ) * nor[0] + (fy + 1) * nor[1] + (fz ) * nor[2] + d > 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz ) * nor[2] + d > 0.0f) return 1; + + if ((fx ) * nor[0] + (fy ) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy ) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1; + if ((fx ) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1; + } + + return 0; +} + +static void ocwrite(Octree *oc, RayFace *face, int quad, short x, short y, short z, float rtf[4][3]) +{ + Branch *br; + Node *no; + short a, oc0, oc1, oc2, oc3, oc4, oc5; + + x <<= 2; + y <<= 1; + + br = oc->adrbranch[0]; + + if (oc->ocres == 512) { + oc0 = ((x & 1024) + (y & 512) + (z & 256)) >> 8; + br = addbranch(oc, br, oc0); + } + if (oc->ocres >= 256) { + oc0 = ((x & 512) + (y & 256) + (z & 128)) >> 7; + br = addbranch(oc, br, oc0); + } + if (oc->ocres >= 128) { + oc0 = ((x & 256) + (y & 128) + (z & 64)) >> 6; + br = addbranch(oc, br, oc0); + } + + oc0 = ((x & 128) + (y & 64) + (z & 32)) >> 5; + oc1 = ((x & 64) + (y & 32) + (z & 16)) >> 4; + oc2 = ((x & 32) + (y & 16) + (z & 8)) >> 3; + oc3 = ((x & 16) + (y & 8) + (z & 4)) >> 2; + oc4 = ((x & 8) + (y & 4) + (z & 2)) >> 1; + oc5 = ((x & 4) + (y & 2) + (z & 1)); + + br = addbranch(oc, br, oc0); + br = addbranch(oc, br, oc1); + br = addbranch(oc, br, oc2); + br = addbranch(oc, br, oc3); + br = addbranch(oc, br, oc4); + no = (Node *)br->b[oc5]; + if (no == NULL) br->b[oc5] = (Branch *)(no = addnode(oc)); + + while (no->next) no = no->next; + + a = 0; + if (no->v[7]) { /* node full */ + no->next = addnode(oc); + no = no->next; + } + else { + while (no->v[a] != NULL) a++; + } + + no->v[a] = (RayFace *) RE_rayobject_align(face); + + if (quad) + calc_ocval_face(rtf[0], rtf[1], rtf[2], rtf[3], x >> 2, y >> 1, z, &no->ov[a]); + else + calc_ocval_face(rtf[0], rtf[1], rtf[2], NULL, x >> 2, y >> 1, z, &no->ov[a]); +} + +static void d2dda(Octree *oc, short b1, short b2, short c1, short c2, char *ocface, short rts[4][3], float rtf[4][3]) +{ + int ocx1, ocx2, ocy1, ocy2; + int x, y, dx = 0, dy = 0; + float ox1, ox2, oy1, oy2; + float lambda, lambda_o, lambda_x, lambda_y, ldx, ldy; + + ocx1 = rts[b1][c1]; + ocy1 = rts[b1][c2]; + ocx2 = rts[b2][c1]; + ocy2 = rts[b2][c2]; + + if (ocx1 == ocx2 && ocy1 == ocy2) { + ocface[oc->ocres * ocx1 + ocy1] = 1; + return; + } + + ox1 = rtf[b1][c1]; + oy1 = rtf[b1][c2]; + ox2 = rtf[b2][c1]; + oy2 = rtf[b2][c2]; + + if (ox1 != ox2) { + if (ox2 - ox1 > 0.0f) { + lambda_x = (ox1 - ocx1 - 1.0f) / (ox1 - ox2); + ldx = -1.0f / (ox1 - ox2); + dx = 1; + } + else { + lambda_x = (ox1 - ocx1) / (ox1 - ox2); + ldx = 1.0f / (ox1 - ox2); + dx = -1; + } + } + else { + lambda_x = 1.0f; + ldx = 0; + } + + if (oy1 != oy2) { + if (oy2 - oy1 > 0.0f) { + lambda_y = (oy1 - ocy1 - 1.0f) / (oy1 - oy2); + ldy = -1.0f / (oy1 - oy2); + dy = 1; + } + else { + lambda_y = (oy1 - ocy1) / (oy1 - oy2); + ldy = 1.0f / (oy1 - oy2); + dy = -1; + } + } + else { + lambda_y = 1.0f; + ldy = 0; + } + + x = ocx1; y = ocy1; + lambda = MIN2(lambda_x, lambda_y); + + while (true) { + + if (x < 0 || y < 0 || x >= oc->ocres || y >= oc->ocres) { + /* pass*/ + } + else { + ocface[oc->ocres * x + y] = 1; + } + + lambda_o = lambda; + if (lambda_x == lambda_y) { + lambda_x += ldx; + x += dx; + lambda_y += ldy; + y += dy; + } + else { + if (lambda_x < lambda_y) { + lambda_x += ldx; + x += dx; + } + else { + lambda_y += ldy; + y += dy; + } + } + lambda = MIN2(lambda_x, lambda_y); + if (lambda == lambda_o) break; + if (lambda >= 1.0f) break; + } + ocface[oc->ocres * ocx2 + ocy2] = 1; +} + +static void filltriangle(Octree *oc, short c1, short c2, char *ocface, short *ocmin, short *ocmax) +{ + int a, x, y, y1, y2; + + for (x = ocmin[c1]; x <= ocmax[c1]; x++) { + a = oc->ocres * x; + for (y = ocmin[c2]; y <= ocmax[c2]; y++) { + if (ocface[a + y]) { + y++; + while (ocface[a + y] && y != ocmax[c2]) y++; + for (y1 = ocmax[c2]; y1 > y; y1--) { + if (ocface[a + y1]) { + for (y2 = y; y2 <= y1; y2++) ocface[a + y2] = 1; + y1 = 0; + } + } + y = ocmax[c2]; + } + } + } +} + +static void RE_rayobject_octree_free(RayObject *tree) +{ + Octree *oc = (Octree *)tree; + +#if 0 + printf("branches %d nodes %d\n", oc->branchcount, oc->nodecount); + printf("raycount %d\n", raycount); + printf("ray coherent %d\n", coherent_ray); + printf("accepted %d rejected %d\n", accepted, rejected); +#endif + if (oc->ocface) + MEM_freeN(oc->ocface); + + if (oc->adrbranch) { + int a = 0; + while (oc->adrbranch[a]) { + MEM_freeN(oc->adrbranch[a]); + oc->adrbranch[a] = NULL; + a++; + } + MEM_freeN(oc->adrbranch); + oc->adrbranch = NULL; + } + oc->branchcount = 0; + + if (oc->adrnode) { + int a = 0; + while (oc->adrnode[a]) { + MEM_freeN(oc->adrnode[a]); + oc->adrnode[a] = NULL; + a++; + } + MEM_freeN(oc->adrnode); + oc->adrnode = NULL; + } + oc->nodecount = 0; + + MEM_freeN(oc); +} + + +RayObject *RE_rayobject_octree_create(int ocres, int size) +{ + Octree *oc = (Octree *)MEM_callocN(sizeof(Octree), "Octree"); + assert(RE_rayobject_isAligned(oc) ); /* RayObject API assumes real data to be 4-byte aligned */ + + oc->rayobj.api = &octree_api; + + oc->ocres = ocres; + + oc->ro_nodes = (RayFace **)MEM_callocN(sizeof(RayFace *) * size, "octree rayobject nodes"); + oc->ro_nodes_size = size; + oc->ro_nodes_used = 0; + + + return RE_rayobject_unalignRayAPI((RayObject *) oc); +} + + +static void RE_rayobject_octree_add(RayObject *tree, RayObject *node) +{ + Octree *oc = (Octree *)tree; + + assert(RE_rayobject_isRayFace(node) ); + assert(oc->ro_nodes_used < oc->ro_nodes_size); + oc->ro_nodes[oc->ro_nodes_used++] = (RayFace *)RE_rayobject_align(node); +} + +static void octree_fill_rayface(Octree *oc, RayFace *face) +{ + float ocfac[3], rtf[4][3]; + float co1[3], co2[3], co3[3], co4[3]; + short rts[4][3]; + short ocmin[3], ocmax[3]; + char *ocface = oc->ocface; // front, top, size view of face, to fill in + int a, b, c, oc1, oc2, oc3, oc4, x, y, z, ocres2; + + ocfac[0] = oc->ocfacx; + ocfac[1] = oc->ocfacy; + ocfac[2] = oc->ocfacz; + + ocres2 = oc->ocres * oc->ocres; + + copy_v3_v3(co1, face->v1); + copy_v3_v3(co2, face->v2); + copy_v3_v3(co3, face->v3); + if (RE_rayface_isQuad(face)) + copy_v3_v3(co4, face->v4); + + for (c = 0; c < 3; c++) { + rtf[0][c] = (co1[c] - oc->min[c]) * ocfac[c]; + rts[0][c] = (short)rtf[0][c]; + rtf[1][c] = (co2[c] - oc->min[c]) * ocfac[c]; + rts[1][c] = (short)rtf[1][c]; + rtf[2][c] = (co3[c] - oc->min[c]) * ocfac[c]; + rts[2][c] = (short)rtf[2][c]; + if (RE_rayface_isQuad(face)) { + rtf[3][c] = (co4[c] - oc->min[c]) * ocfac[c]; + rts[3][c] = (short)rtf[3][c]; + } + } + + for (c = 0; c < 3; c++) { + oc1 = rts[0][c]; + oc2 = rts[1][c]; + oc3 = rts[2][c]; + if (!RE_rayface_isQuad(face)) { + ocmin[c] = min_iii(oc1, oc2, oc3); + ocmax[c] = max_iii(oc1, oc2, oc3); + } + else { + oc4 = rts[3][c]; + ocmin[c] = min_iiii(oc1, oc2, oc3, oc4); + ocmax[c] = max_iiii(oc1, oc2, oc3, oc4); + } + if (ocmax[c] > oc->ocres - 1) ocmax[c] = oc->ocres - 1; + if (ocmin[c] < 0) ocmin[c] = 0; + } + + if (ocmin[0] == ocmax[0] && ocmin[1] == ocmax[1] && ocmin[2] == ocmax[2]) { + ocwrite(oc, face, RE_rayface_isQuad(face), ocmin[0], ocmin[1], ocmin[2], rtf); + } + else { + + d2dda(oc, 0, 1, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 0, 1, 0, 2, ocface, rts, rtf); + d2dda(oc, 0, 1, 1, 2, ocface + 2 * ocres2, rts, rtf); + d2dda(oc, 1, 2, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 1, 2, 0, 2, ocface, rts, rtf); + d2dda(oc, 1, 2, 1, 2, ocface + 2 * ocres2, rts, rtf); + if (!RE_rayface_isQuad(face)) { + d2dda(oc, 2, 0, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 2, 0, 0, 2, ocface, rts, rtf); + d2dda(oc, 2, 0, 1, 2, ocface + 2 * ocres2, rts, rtf); + } + else { + d2dda(oc, 2, 3, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 2, 3, 0, 2, ocface, rts, rtf); + d2dda(oc, 2, 3, 1, 2, ocface + 2 * ocres2, rts, rtf); + d2dda(oc, 3, 0, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 3, 0, 0, 2, ocface, rts, rtf); + d2dda(oc, 3, 0, 1, 2, ocface + 2 * ocres2, rts, rtf); + } + /* nothing todo with triangle..., just fills :) */ + filltriangle(oc, 0, 1, ocface + ocres2, ocmin, ocmax); + filltriangle(oc, 0, 2, ocface, ocmin, ocmax); + filltriangle(oc, 1, 2, ocface + 2 * ocres2, ocmin, ocmax); + + /* init static vars here */ + face_in_node(face, 0, 0, 0, rtf); + + for (x = ocmin[0]; x <= ocmax[0]; x++) { + a = oc->ocres * x; + for (y = ocmin[1]; y <= ocmax[1]; y++) { + if (ocface[a + y + ocres2]) { + b = oc->ocres * y + 2 * ocres2; + for (z = ocmin[2]; z <= ocmax[2]; z++) { + if (ocface[b + z] && ocface[a + z]) { + if (face_in_node(NULL, x, y, z, rtf)) + ocwrite(oc, face, RE_rayface_isQuad(face), x, y, z, rtf); + } + } + } + } + } + + /* same loops to clear octree, doubt it can be done smarter */ + for (x = ocmin[0]; x <= ocmax[0]; x++) { + a = oc->ocres * x; + for (y = ocmin[1]; y <= ocmax[1]; y++) { + /* x-y */ + ocface[a + y + ocres2] = 0; + + b = oc->ocres * y + 2 * ocres2; + for (z = ocmin[2]; z <= ocmax[2]; z++) { + /* y-z */ + ocface[b + z] = 0; + /* x-z */ + ocface[a + z] = 0; + } + } + } + } +} + +static void RE_rayobject_octree_done(RayObject *tree) +{ + Octree *oc = (Octree *)tree; + int c; + float t00, t01, t02; + int ocres2 = oc->ocres * oc->ocres; + + INIT_MINMAX(oc->min, oc->max); + + /* Calculate Bounding Box */ + for (c = 0; c < oc->ro_nodes_used; c++) + RE_rayobject_merge_bb(RE_rayobject_unalignRayFace(oc->ro_nodes[c]), oc->min, oc->max); + + /* Alloc memory */ + oc->adrbranch = (Branch **)MEM_callocN(sizeof(void *) * BRANCH_ARRAY, "octree branches"); + oc->adrnode = (Node **)MEM_callocN(sizeof(void *) * NODE_ARRAY, "octree nodes"); + + oc->adrbranch[0] = (Branch *)MEM_callocN(4096 * sizeof(Branch), "makeoctree"); + + /* the lookup table, per face, for which nodes to fill in */ + oc->ocface = (char *)MEM_callocN(3 * ocres2 + 8, "ocface"); + memset(oc->ocface, 0, 3 * ocres2); + + for (c = 0; c < 3; c++) { /* octree enlarge, still needed? */ + oc->min[c] -= 0.01f; + oc->max[c] += 0.01f; + } + + t00 = oc->max[0] - oc->min[0]; + t01 = oc->max[1] - oc->min[1]; + t02 = oc->max[2] - oc->min[2]; + + /* this minus 0.1 is old safety... seems to be needed? */ + oc->ocfacx = (oc->ocres - 0.1f) / t00; + oc->ocfacy = (oc->ocres - 0.1f) / t01; + oc->ocfacz = (oc->ocres - 0.1f) / t02; + + oc->ocsize = sqrtf(t00 * t00 + t01 * t01 + t02 * t02); /* global, max size octree */ + + for (c = 0; c < oc->ro_nodes_used; c++) { + octree_fill_rayface(oc, oc->ro_nodes[c]); + } + + MEM_freeN(oc->ocface); + oc->ocface = NULL; + MEM_freeN(oc->ro_nodes); + oc->ro_nodes = NULL; + +#if 0 + printf("%f %f - %f\n", oc->min[0], oc->max[0], oc->ocfacx); + printf("%f %f - %f\n", oc->min[1], oc->max[1], oc->ocfacy); + printf("%f %f - %f\n", oc->min[2], oc->max[2], oc->ocfacz); +#endif +} + +static void RE_rayobject_octree_bb(RayObject *tree, float *min, float *max) +{ + Octree *oc = (Octree *)tree; + DO_MINMAX(oc->min, min, max); + DO_MINMAX(oc->max, min, max); +} + +/* check all faces in this node */ +static int testnode(Octree *UNUSED(oc), Isect *is, Node *no, OcVal ocval) +{ + short nr = 0; + + /* return on any first hit */ + if (is->mode == RE_RAY_SHADOW) { + + for (; no; no = no->next) { + for (nr = 0; nr < 8; nr++) { + RayFace *face = no->v[nr]; + OcVal *ov = no->ov + nr; + + if (!face) break; + + if ( (ov->ocx & ocval.ocx) && (ov->ocy & ocval.ocy) && (ov->ocz & ocval.ocz) ) { + if (RE_rayobject_intersect(RE_rayobject_unalignRayFace(face), is) ) + return 1; + } + } + } + } + else { + /* else mirror or glass or shadowtra, return closest face */ + int found = 0; + + for (; no; no = no->next) { + for (nr = 0; nr < 8; nr++) { + RayFace *face = no->v[nr]; + OcVal *ov = no->ov + nr; + + if (!face) break; + + if ( (ov->ocx & ocval.ocx) && (ov->ocy & ocval.ocy) && (ov->ocz & ocval.ocz) ) { + if (RE_rayobject_intersect(RE_rayobject_unalignRayFace(face), is) ) { + found = 1; + } + } + } + } + + return found; + } + + return 0; +} + +/* find the Node for the octree coord x y z */ +static Node *ocread(Octree *oc, int x, int y, int z) +{ + Branch *br; + int oc1; + + x <<= 2; + y <<= 1; + + br = oc->adrbranch[0]; + + if (oc->ocres == 512) { + oc1 = ((x & 1024) + (y & 512) + (z & 256)) >> 8; + br = br->b[oc1]; + if (br == NULL) { + return NULL; + } + } + if (oc->ocres >= 256) { + oc1 = ((x & 512) + (y & 256) + (z & 128)) >> 7; + br = br->b[oc1]; + if (br == NULL) { + return NULL; + } + } + if (oc->ocres >= 128) { + oc1 = ((x & 256) + (y & 128) + (z & 64)) >> 6; + br = br->b[oc1]; + if (br == NULL) { + return NULL; + } + } + + oc1 = ((x & 128) + (y & 64) + (z & 32)) >> 5; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 64) + (y & 32) + (z & 16)) >> 4; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 32) + (y & 16) + (z & 8)) >> 3; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 16) + (y & 8) + (z & 4)) >> 2; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 8) + (y & 4) + (z & 2)) >> 1; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 4) + (y & 2) + (z & 1)); + return (Node *)br->b[oc1]; + } + } + } + } + } + + return NULL; +} + +static int cliptest(float p, float q, float *u1, float *u2) +{ + float r; + + if (p < 0.0f) { + if (q < p) return 0; + else if (q < 0.0f) { + r = q / p; + if (r > *u2) return 0; + else if (r > *u1) *u1 = r; + } + } + else { + if (p > 0.0f) { + if (q < 0.0f) return 0; + else if (q < p) { + r = q / p; + if (r < *u1) return 0; + else if (r < *u2) *u2 = r; + } + } + else if (q < 0.0f) return 0; + } + return 1; +} + +/* extensive coherence checks/storage cancels out the benefit of it, and gives errors... we + * need better methods, sample code commented out below (ton) */ + +#if 0 + +in top : static int coh_nodes[16 * 16 * 16][6]; +in makeoctree : memset(coh_nodes, 0, sizeof(coh_nodes)); + +static void add_coherence_test(int ocx1, int ocx2, int ocy1, int ocy2, int ocz1, int ocz2) +{ + short *sp; + + sp = coh_nodes[(ocx2 & 15) + 16 * (ocy2 & 15) + 256 * (ocz2 & 15)]; + sp[0] = ocx1; sp[1] = ocy1; sp[2] = ocz1; + sp[3] = ocx2; sp[4] = ocy2; sp[5] = ocz2; + +} + +static int do_coherence_test(int ocx1, int ocx2, int ocy1, int ocy2, int ocz1, int ocz2) +{ + short *sp; + + sp = coh_nodes[(ocx2 & 15) + 16 * (ocy2 & 15) + 256 * (ocz2 & 15)]; + if (sp[0] == ocx1 && sp[1] == ocy1 && sp[2] == ocz1 && + sp[3] == ocx2 && sp[4] == ocy2 && sp[5] == ocz2) return 1; + return 0; +} + +#endif + +/* return 1: found valid intersection */ +/* starts with is->orig.face */ +static int RE_rayobject_octree_intersect(RayObject *tree, Isect *is) +{ + Octree *oc = (Octree *)tree; + Node *no; + OcVal ocval; + float vec1[3], vec2[3], start[3], end[3]; + float u1, u2, ox1, ox2, oy1, oy2, oz1, oz2; + float lambda_o, lambda_x, ldx, lambda_y, ldy, lambda_z, ldz, dda_lambda; + float o_lambda = 0; + int dx, dy, dz; + int xo, yo, zo, c1 = 0; + int ocx1, ocx2, ocy1, ocy2, ocz1, ocz2; + + /* clip with octree */ + if (oc->branchcount == 0) return 0; + + /* do this before intersect calls */ +#if 0 + is->facecontr = NULL; /* to check shared edge */ + is->obcontr = 0; + is->faceisect = is->isect = 0; /* shared edge, quad half flag */ + is->userdata = oc->userdata; +#endif + + copy_v3_v3(start, is->start); + madd_v3_v3v3fl(end, is->start, is->dir, is->dist); + ldx = is->dir[0] * is->dist; + o_lambda = is->dist; + u1 = 0.0f; + u2 = 1.0f; + + /* clip with octree cube */ + if (cliptest(-ldx, start[0] - oc->min[0], &u1, &u2)) { + if (cliptest(ldx, oc->max[0] - start[0], &u1, &u2)) { + ldy = is->dir[1] * is->dist; + if (cliptest(-ldy, start[1] - oc->min[1], &u1, &u2)) { + if (cliptest(ldy, oc->max[1] - start[1], &u1, &u2)) { + ldz = is->dir[2] * is->dist; + if (cliptest(-ldz, start[2] - oc->min[2], &u1, &u2)) { + if (cliptest(ldz, oc->max[2] - start[2], &u1, &u2)) { + c1 = 1; + if (u2 < 1.0f) { + end[0] = start[0] + u2 * ldx; + end[1] = start[1] + u2 * ldy; + end[2] = start[2] + u2 * ldz; + } + + if (u1 > 0.0f) { + start[0] += u1 * ldx; + start[1] += u1 * ldy; + start[2] += u1 * ldz; + } + } + } + } + } + } + } + + if (c1 == 0) return 0; + + /* reset static variables in ocread */ + //ocread(oc, oc->ocres, 0, 0); + + /* setup 3dda to traverse octree */ + ox1 = (start[0] - oc->min[0]) * oc->ocfacx; + oy1 = (start[1] - oc->min[1]) * oc->ocfacy; + oz1 = (start[2] - oc->min[2]) * oc->ocfacz; + ox2 = (end[0] - oc->min[0]) * oc->ocfacx; + oy2 = (end[1] - oc->min[1]) * oc->ocfacy; + oz2 = (end[2] - oc->min[2]) * oc->ocfacz; + + ocx1 = (int)ox1; + ocy1 = (int)oy1; + ocz1 = (int)oz1; + ocx2 = (int)ox2; + ocy2 = (int)oy2; + ocz2 = (int)oz2; + + if (ocx1 == ocx2 && ocy1 == ocy2 && ocz1 == ocz2) { + no = ocread(oc, ocx1, ocy1, ocz1); + if (no) { + /* exact intersection with node */ + vec1[0] = ox1; vec1[1] = oy1; vec1[2] = oz1; + vec2[0] = ox2; vec2[1] = oy2; vec2[2] = oz2; + calc_ocval_ray(&ocval, (float)ocx1, (float)ocy1, (float)ocz1, vec1, vec2); + if (testnode(oc, is, no, ocval) ) return 1; + } + } + else { + int found = 0; + //static int coh_ocx1, coh_ocx2, coh_ocy1, coh_ocy2, coh_ocz1, coh_ocz2; + float dox, doy, doz; + int eqval; + + /* calc lambda en ld */ + dox = ox1 - ox2; + doy = oy1 - oy2; + doz = oz1 - oz2; + + if (dox < -FLT_EPSILON) { + ldx = -1.0f / dox; + lambda_x = (ocx1 - ox1 + 1.0f) * ldx; + dx = 1; + } + else if (dox > FLT_EPSILON) { + ldx = 1.0f / dox; + lambda_x = (ox1 - ocx1) * ldx; + dx = -1; + } + else { + lambda_x = 1.0f; + ldx = 0; + dx = 0; + } + + if (doy < -FLT_EPSILON) { + ldy = -1.0f / doy; + lambda_y = (ocy1 - oy1 + 1.0f) * ldy; + dy = 1; + } + else if (doy > FLT_EPSILON) { + ldy = 1.0f / doy; + lambda_y = (oy1 - ocy1) * ldy; + dy = -1; + } + else { + lambda_y = 1.0f; + ldy = 0; + dy = 0; + } + + if (doz < -FLT_EPSILON) { + ldz = -1.0f / doz; + lambda_z = (ocz1 - oz1 + 1.0f) * ldz; + dz = 1; + } + else if (doz > FLT_EPSILON) { + ldz = 1.0f / doz; + lambda_z = (oz1 - ocz1) * ldz; + dz = -1; + } + else { + lambda_z = 1.0f; + ldz = 0; + dz = 0; + } + + xo = ocx1; yo = ocy1; zo = ocz1; + dda_lambda = min_fff(lambda_x, lambda_y, lambda_z); + + vec2[0] = ox1; + vec2[1] = oy1; + vec2[2] = oz1; + + /* this loop has been constructed to make sure the first and last node of ray + * are always included, even when dda_lambda==1.0f or larger */ + + while (true) { + + no = ocread(oc, xo, yo, zo); + if (no) { + + /* calculate ray intersection with octree node */ + copy_v3_v3(vec1, vec2); + // dox, y, z is negative + vec2[0] = ox1 - dda_lambda * dox; + vec2[1] = oy1 - dda_lambda * doy; + vec2[2] = oz1 - dda_lambda * doz; + calc_ocval_ray(&ocval, (float)xo, (float)yo, (float)zo, vec1, vec2); + + //is->dist = (u1 + dda_lambda * (u2 - u1)) * o_lambda; + if (testnode(oc, is, no, ocval) ) + found = 1; + + if (is->dist < (u1 + dda_lambda * (u2 - u1)) * o_lambda) + return found; + } + + + lambda_o = dda_lambda; + + /* traversing octree nodes need careful detection of smallest values, with proper + * exceptions for equal lambdas */ + eqval = (lambda_x == lambda_y); + if (lambda_y == lambda_z) eqval += 2; + if (lambda_x == lambda_z) eqval += 4; + + if (eqval) { // only 4 cases exist! + if (eqval == 7) { // x=y=z + xo += dx; lambda_x += ldx; + yo += dy; lambda_y += ldy; + zo += dz; lambda_z += ldz; + } + else if (eqval == 1) { // x=y + if (lambda_y < lambda_z) { + xo += dx; lambda_x += ldx; + yo += dy; lambda_y += ldy; + } + else { + zo += dz; lambda_z += ldz; + } + } + else if (eqval == 2) { // y=z + if (lambda_x < lambda_y) { + xo += dx; lambda_x += ldx; + } + else { + yo += dy; lambda_y += ldy; + zo += dz; lambda_z += ldz; + } + } + else { // x=z + if (lambda_y < lambda_x) { + yo += dy; lambda_y += ldy; + } + else { + xo += dx; lambda_x += ldx; + zo += dz; lambda_z += ldz; + } + } + } + else { // all three different, just three cases exist + eqval = (lambda_x < lambda_y); + if (lambda_y < lambda_z) eqval += 2; + if (lambda_x < lambda_z) eqval += 4; + + if (eqval == 7 || eqval == 5) { // x smallest + xo += dx; lambda_x += ldx; + } + else if (eqval == 2 || eqval == 6) { // y smallest + yo += dy; lambda_y += ldy; + } + else { // z smallest + zo += dz; lambda_z += ldz; + } + + } + + dda_lambda = min_fff(lambda_x, lambda_y, lambda_z); + if (dda_lambda == lambda_o) break; + /* to make sure the last node is always checked */ + if (lambda_o >= 1.0f) break; + } + } + + /* reached end, no intersections found */ + return 0; +} + + + diff --git a/source/blender/render/intern/raytrace/rayobject_qbvh.cpp b/source/blender/render/intern/raytrace/rayobject_qbvh.cpp new file mode 100644 index 00000000000..8e3dd87efd1 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_qbvh.cpp @@ -0,0 +1,160 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_qbvh.cpp + * \ingroup render + */ + + +#include "MEM_guardedalloc.h" + +#include "BLI_utildefines.h" + +#include "vbvh.h" +#include "svbvh.h" +#include "reorganize.h" + +#ifdef __SSE__ + +#define DFS_STACK_SIZE 256 + +struct QBVHTree { + RayObject rayobj; + + SVBVHNode *root; + MemArena *node_arena; + + float cost; + RTBuilder *builder; +}; + + +template<> +void bvh_done<QBVHTree>(QBVHTree *obj) +{ + rtbuild_done(obj->builder, &obj->rayobj.control); + + //TODO find a away to exactly calculate the needed memory + MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "qbvh arena"); + BLI_memarena_use_malloc(arena1); + + MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "qbvh arena 2"); + BLI_memarena_use_malloc(arena2); + BLI_memarena_use_align(arena2, 16); + + //Build and optimize the tree + //TODO do this in 1 pass (half memory usage during building) + VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder); + + if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) { + BLI_memarena_free(arena1); + BLI_memarena_free(arena2); + return; + } + + if (root) { + pushup_simd<VBVHNode, 4>(root); + obj->root = Reorganize_SVBVH<VBVHNode>(arena2).transform(root); + } + else + obj->root = NULL; + + //Free data + BLI_memarena_free(arena1); + + obj->node_arena = arena2; + obj->cost = 1.0; + + rtbuild_free(obj->builder); + obj->builder = NULL; +} + +template<int StackSize> +static int intersect(QBVHTree *obj, Isect *isec) +{ + //TODO renable hint support + if (RE_rayobject_isAligned(obj->root)) { + if (isec->mode == RE_RAY_SHADOW) + return svbvh_node_stack_raycast<StackSize, true>(obj->root, isec); + else + return svbvh_node_stack_raycast<StackSize, false>(obj->root, isec); + } + else + return RE_rayobject_intersect((RayObject *)obj->root, isec); +} + +template<class Tree> +static void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *UNUSED(min), float *UNUSED(max)) +{ + //TODO renable hint support + { + hint->size = 0; + hint->stack[hint->size++] = (RayObject *)tree->root; + } +} +/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */ +template<class Tree, int STACK_SIZE> +static RayObjectAPI make_api() +{ + static RayObjectAPI api = + { + (RE_rayobject_raycast_callback) ((int (*)(Tree *, Isect *)) & intersect<STACK_SIZE>), + (RE_rayobject_add_callback) ((void (*)(Tree *, RayObject *)) & bvh_add<Tree>), + (RE_rayobject_done_callback) ((void (*)(Tree *)) & bvh_done<Tree>), + (RE_rayobject_free_callback) ((void (*)(Tree *)) & bvh_free<Tree>), + (RE_rayobject_merge_bb_callback)((void (*)(Tree *, float *, float *)) & bvh_bb<Tree>), + (RE_rayobject_cost_callback) ((float (*)(Tree *)) & bvh_cost<Tree>), + (RE_rayobject_hint_bb_callback) ((void (*)(Tree *, LCTSHint *, float *, float *)) & bvh_hint_bb<Tree>) + }; + + return api; +} + +template<class Tree> +RayObjectAPI *bvh_get_api(int maxstacksize) +{ + static RayObjectAPI bvh_api256 = make_api<Tree, 1024>(); + + if (maxstacksize <= 1024) return &bvh_api256; + assert(maxstacksize <= 256); + return NULL; +} + +RayObject *RE_rayobject_qbvh_create(int size) +{ + return bvh_create_tree<QBVHTree, DFS_STACK_SIZE>(size); +} + +#else + +RayObject *RE_rayobject_qbvh_create(int UNUSED(size)) +{ + puts("WARNING: SSE disabled at compile time\n"); + return NULL; +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject_raycounter.cpp b/source/blender/render/intern/raytrace/rayobject_raycounter.cpp new file mode 100644 index 00000000000..429c47f1c0f --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_raycounter.cpp @@ -0,0 +1,91 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_raycounter.cpp + * \ingroup render + */ + + +#include "rayobject.h" +#include "raycounter.h" + +#ifdef RE_RAYCOUNTER + +void RE_RC_INFO(RayCounter *info) +{ + printf("----------- Raycast counter --------\n"); + printf("Rays total: %llu\n", info->raycast.test ); + printf("Rays hit: %llu\n", info->raycast.hit ); + printf("\n"); + printf("BB tests: %llu\n", info->bb.test ); + printf("BB hits: %llu\n", info->bb.hit ); + printf("\n"); + printf("SIMD BB tests: %llu\n", info->simd_bb.test ); + printf("SIMD BB hits: %llu\n", info->simd_bb.hit ); + printf("\n"); + printf("Primitives tests: %llu\n", info->faces.test ); + printf("Primitives hits: %llu\n", info->faces.hit ); + printf("------------------------------------\n"); + printf("Shadow last-hit tests per ray: %f\n", info->rayshadow_last_hit.test / ((float)info->raycast.test) ); + printf("Shadow last-hit hits per ray: %f\n", info->rayshadow_last_hit.hit / ((float)info->raycast.test) ); + printf("\n"); + printf("Hint tests per ray: %f\n", info->raytrace_hint.test / ((float)info->raycast.test) ); + printf("Hint hits per ray: %f\n", info->raytrace_hint.hit / ((float)info->raycast.test) ); + printf("\n"); + printf("BB tests per ray: %f\n", info->bb.test / ((float)info->raycast.test) ); + printf("BB hits per ray: %f\n", info->bb.hit / ((float)info->raycast.test) ); + printf("\n"); + printf("SIMD tests per ray: %f\n", info->simd_bb.test / ((float)info->raycast.test) ); + printf("SIMD hits per ray: %f\n", info->simd_bb.hit / ((float)info->raycast.test) ); + printf("\n"); + printf("Primitives tests per ray: %f\n", info->faces.test / ((float)info->raycast.test) ); + printf("Primitives hits per ray: %f\n", info->faces.hit / ((float)info->raycast.test) ); + printf("------------------------------------\n"); +} + +void RE_RC_MERGE(RayCounter *dest, RayCounter *tmp) +{ + dest->faces.test += tmp->faces.test; + dest->faces.hit += tmp->faces.hit; + + dest->bb.test += tmp->bb.test; + dest->bb.hit += tmp->bb.hit; + + dest->simd_bb.test += tmp->simd_bb.test; + dest->simd_bb.hit += tmp->simd_bb.hit; + + dest->raycast.test += tmp->raycast.test; + dest->raycast.hit += tmp->raycast.hit; + + dest->rayshadow_last_hit.test += tmp->rayshadow_last_hit.test; + dest->rayshadow_last_hit.hit += tmp->rayshadow_last_hit.hit; + + dest->raytrace_hint.test += tmp->raytrace_hint.test; + dest->raytrace_hint.hit += tmp->raytrace_hint.hit; +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp b/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp new file mode 100644 index 00000000000..51f89784674 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp @@ -0,0 +1,531 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_rtbuild.cpp + * \ingroup render + */ + + +#include <assert.h> +#include <stdlib.h> +#include <algorithm> + +#if __cplusplus >= 201103L +#include <cmath> +using std::isfinite; +#else +#include <math.h> +#endif + +#include "rayobject_rtbuild.h" + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +static bool selected_node(RTBuilder::Object *node) +{ + return node->selected; +} + +static void rtbuild_init(RTBuilder *b) +{ + b->split_axis = -1; + b->primitives.begin = NULL; + b->primitives.end = NULL; + b->primitives.maxsize = 0; + b->depth = 0; + + for (int i = 0; i < RTBUILD_MAX_CHILDS; i++) + b->child_offset[i] = 0; + + for (int i = 0; i < 3; i++) + b->sorted_begin[i] = b->sorted_end[i] = NULL; + + INIT_MINMAX(b->bb, b->bb + 3); +} + +RTBuilder *rtbuild_create(int size) +{ + RTBuilder *builder = (RTBuilder *) MEM_mallocN(sizeof(RTBuilder), "RTBuilder"); + RTBuilder::Object *memblock = (RTBuilder::Object *)MEM_mallocN(sizeof(RTBuilder::Object) * size, "RTBuilder.objects"); + + + rtbuild_init(builder); + + builder->primitives.begin = builder->primitives.end = memblock; + builder->primitives.maxsize = size; + + for (int i = 0; i < 3; i++) { + builder->sorted_begin[i] = (RTBuilder::Object **)MEM_mallocN(sizeof(RTBuilder::Object *) * size, "RTBuilder.sorted_objects"); + builder->sorted_end[i] = builder->sorted_begin[i]; + } + + + return builder; +} + +void rtbuild_free(RTBuilder *b) +{ + if (b->primitives.begin) MEM_freeN(b->primitives.begin); + + for (int i = 0; i < 3; i++) + if (b->sorted_begin[i]) + MEM_freeN(b->sorted_begin[i]); + + MEM_freeN(b); +} + +void rtbuild_add(RTBuilder *b, RayObject *o) +{ + float bb[6]; + + assert(b->primitives.begin + b->primitives.maxsize != b->primitives.end); + + INIT_MINMAX(bb, bb + 3); + RE_rayobject_merge_bb(o, bb, bb + 3); + + /* skip objects with invalid bounding boxes, nan causes DO_MINMAX + * to do nothing, so we get these invalid values. this shouldn't + * happen usually, but bugs earlier in the pipeline can cause it. */ + if (bb[0] > bb[3] || bb[1] > bb[4] || bb[2] > bb[5]) + return; + /* skip objects with inf bounding boxes */ + if (!isfinite(bb[0]) || !isfinite(bb[1]) || !isfinite(bb[2])) + return; + if (!isfinite(bb[3]) || !isfinite(bb[4]) || !isfinite(bb[5])) + return; + /* skip objects with zero bounding box, they are of no use, and + * will give problems in rtbuild_heuristic_object_split later */ + if (bb[0] == bb[3] && bb[1] == bb[4] && bb[2] == bb[5]) + return; + + copy_v3_v3(b->primitives.end->bb, bb); + copy_v3_v3(b->primitives.end->bb + 3, bb + 3); + b->primitives.end->obj = o; + b->primitives.end->cost = RE_rayobject_cost(o); + + for (int i = 0; i < 3; i++) { + *(b->sorted_end[i]) = b->primitives.end; + b->sorted_end[i]++; + } + b->primitives.end++; +} + +int rtbuild_size(RTBuilder *b) +{ + return b->sorted_end[0] - b->sorted_begin[0]; +} + + +template<class Obj, int Axis> +static bool obj_bb_compare(const Obj &a, const Obj &b) +{ + if (a->bb[Axis] != b->bb[Axis]) + return a->bb[Axis] < b->bb[Axis]; + return a->obj < b->obj; +} + +template<class Item> +static void object_sort(Item *begin, Item *end, int axis) +{ + if (axis == 0) return std::sort(begin, end, obj_bb_compare<Item, 0> ); + if (axis == 1) return std::sort(begin, end, obj_bb_compare<Item, 1> ); + if (axis == 2) return std::sort(begin, end, obj_bb_compare<Item, 2> ); + assert(false); +} + +void rtbuild_done(RTBuilder *b, RayObjectControl *ctrl) +{ + for (int i = 0; i < 3; i++) { + if (b->sorted_begin[i]) { + if (RE_rayobjectcontrol_test_break(ctrl)) break; + object_sort(b->sorted_begin[i], b->sorted_end[i], i); + } + } +} + +RayObject *rtbuild_get_primitive(RTBuilder *b, int index) +{ + return b->sorted_begin[0][index]->obj; +} + +RTBuilder *rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp) +{ + rtbuild_init(tmp); + + tmp->depth = b->depth + 1; + + for (int i = 0; i < 3; i++) + if (b->sorted_begin[i]) { + tmp->sorted_begin[i] = b->sorted_begin[i] + b->child_offset[child]; + tmp->sorted_end[i] = b->sorted_begin[i] + b->child_offset[child + 1]; + } + else { + tmp->sorted_begin[i] = NULL; + tmp->sorted_end[i] = NULL; + } + + return tmp; +} + +static void rtbuild_calc_bb(RTBuilder *b) +{ + if (b->bb[0] == 1.0e30f) { + for (RTBuilder::Object **index = b->sorted_begin[0]; index != b->sorted_end[0]; index++) + RE_rayobject_merge_bb( (*index)->obj, b->bb, b->bb + 3); + } +} + +void rtbuild_merge_bb(RTBuilder *b, float min[3], float max[3]) +{ + rtbuild_calc_bb(b); + DO_MIN(b->bb, min); + DO_MAX(b->bb + 3, max); +} + +#if 0 +int rtbuild_get_largest_axis(RTBuilder *b) +{ + rtbuild_calc_bb(b); + return bb_largest_axis(b->bb, b->bb + 3); +} + +//Left balanced tree +int rtbuild_mean_split(RTBuilder *b, int nchilds, int axis) +{ + int i; + int mleafs_per_child, Mleafs_per_child; + int tot_leafs = rtbuild_size(b); + int missing_leafs; + + long long s; + + assert(nchilds <= RTBUILD_MAX_CHILDS); + + //TODO optimize calc of leafs_per_child + for (s = nchilds; s < tot_leafs; s *= nchilds) ; + Mleafs_per_child = s / nchilds; + mleafs_per_child = Mleafs_per_child / nchilds; + + //split min leafs per child + b->child_offset[0] = 0; + for (i = 1; i <= nchilds; i++) + b->child_offset[i] = mleafs_per_child; + + //split remaining leafs + missing_leafs = tot_leafs - mleafs_per_child * nchilds; + for (i = 1; i <= nchilds; i++) + { + if (missing_leafs > Mleafs_per_child - mleafs_per_child) + { + b->child_offset[i] += Mleafs_per_child - mleafs_per_child; + missing_leafs -= Mleafs_per_child - mleafs_per_child; + } + else { + b->child_offset[i] += missing_leafs; + missing_leafs = 0; + break; + } + } + + //adjust for accumulative offsets + for (i = 1; i <= nchilds; i++) + b->child_offset[i] += b->child_offset[i - 1]; + + //Count created childs + for (i = nchilds; b->child_offset[i] == b->child_offset[i - 1]; i--) ; + split_leafs(b, b->child_offset, i, axis); + + assert(b->child_offset[0] == 0 && b->child_offset[i] == tot_leafs); + return i; +} + + +int rtbuild_mean_split_largest_axis(RTBuilder *b, int nchilds) +{ + int axis = rtbuild_get_largest_axis(b); + return rtbuild_mean_split(b, nchilds, axis); +} +#endif + +/* + * "separators" is an array of dim NCHILDS-1 + * and indicates where to cut the childs + */ +#if 0 +int rtbuild_median_split(RTBuilder *b, float *separators, int nchilds, int axis) +{ + int size = rtbuild_size(b); + + assert(nchilds <= RTBUILD_MAX_CHILDS); + if (size <= nchilds) + { + return rtbuild_mean_split(b, nchilds, axis); + } + else { + int i; + + b->split_axis = axis; + + //Calculate child offsets + b->child_offset[0] = 0; + for (i = 0; i < nchilds - 1; i++) + b->child_offset[i + 1] = split_leafs_by_plane(b, b->child_offset[i], size, separators[i]); + b->child_offset[nchilds] = size; + + for (i = 0; i < nchilds; i++) + if (b->child_offset[i + 1] - b->child_offset[i] == size) + return rtbuild_mean_split(b, nchilds, axis); + + return nchilds; + } +} + +int rtbuild_median_split_largest_axis(RTBuilder *b, int nchilds) +{ + int la, i; + float separators[RTBUILD_MAX_CHILDS]; + + rtbuild_calc_bb(b); + + la = bb_largest_axis(b->bb, b->bb + 3); + for (i = 1; i < nchilds; i++) + separators[i - 1] = (b->bb[la + 3] - b->bb[la]) * i / nchilds; + + return rtbuild_median_split(b, separators, nchilds, la); +} +#endif + +//Heuristics Object Splitter + + +struct SweepCost { + float bb[6]; + float cost; +}; + +/* Object Surface Area Heuristic splitter */ +int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds) +{ + int size = rtbuild_size(b); + assert(nchilds == 2); + assert(size > 1); + int baxis = -1, boffset = 0; + + if (size > nchilds) { + if (b->depth > RTBUILD_MAX_SAH_DEPTH) { + // for degenerate cases we avoid running out of stack space + // by simply splitting the children in the middle + b->child_offset[0] = 0; + b->child_offset[1] = (size+1)/2; + b->child_offset[2] = size; + return 2; + } + + float bcost = FLT_MAX; + baxis = -1; + boffset = size / 2; + + SweepCost *sweep = (SweepCost *)MEM_mallocN(sizeof(SweepCost) * size, "RTBuilder.HeuristicSweep"); + + for (int axis = 0; axis < 3; axis++) { + SweepCost sweep_left; + + RTBuilder::Object **obj = b->sorted_begin[axis]; + +// float right_cost = 0; + for (int i = size - 1; i >= 0; i--) { + if (i == size - 1) { + copy_v3_v3(sweep[i].bb, obj[i]->bb); + copy_v3_v3(sweep[i].bb + 3, obj[i]->bb + 3); + sweep[i].cost = obj[i]->cost; + } + else { + sweep[i].bb[0] = min_ff(obj[i]->bb[0], sweep[i + 1].bb[0]); + sweep[i].bb[1] = min_ff(obj[i]->bb[1], sweep[i + 1].bb[1]); + sweep[i].bb[2] = min_ff(obj[i]->bb[2], sweep[i + 1].bb[2]); + sweep[i].bb[3] = max_ff(obj[i]->bb[3], sweep[i + 1].bb[3]); + sweep[i].bb[4] = max_ff(obj[i]->bb[4], sweep[i + 1].bb[4]); + sweep[i].bb[5] = max_ff(obj[i]->bb[5], sweep[i + 1].bb[5]); + sweep[i].cost = obj[i]->cost + sweep[i + 1].cost; + } +// right_cost += obj[i]->cost; + } + + sweep_left.bb[0] = obj[0]->bb[0]; + sweep_left.bb[1] = obj[0]->bb[1]; + sweep_left.bb[2] = obj[0]->bb[2]; + sweep_left.bb[3] = obj[0]->bb[3]; + sweep_left.bb[4] = obj[0]->bb[4]; + sweep_left.bb[5] = obj[0]->bb[5]; + sweep_left.cost = obj[0]->cost; + +// right_cost -= obj[0]->cost; if (right_cost < 0) right_cost = 0; + + for (int i = 1; i < size; i++) { + //Worst case heuristic (cost of each child is linear) + float hcost, left_side, right_side; + + // not using log seems to have no impact on raytracing perf, but + // makes tree construction quicker, left out for now to test (brecht) + // left_side = bb_area(sweep_left.bb, sweep_left.bb + 3) * (sweep_left.cost + logf((float)i)); + // right_side = bb_area(sweep[i].bb, sweep[i].bb + 3) * (sweep[i].cost + logf((float)size - i)); + left_side = bb_area(sweep_left.bb, sweep_left.bb + 3) * (sweep_left.cost); + right_side = bb_area(sweep[i].bb, sweep[i].bb + 3) * (sweep[i].cost); + hcost = left_side + right_side; + + assert(left_side >= 0); + assert(right_side >= 0); + + if (left_side > bcost) break; //No way we can find a better heuristic in this axis + + assert(hcost >= 0); + // this makes sure the tree built is the same whatever is the order of the sorting axis + if (hcost < bcost || (hcost == bcost && axis < baxis)) { + bcost = hcost; + baxis = axis; + boffset = i; + } + DO_MIN(obj[i]->bb, sweep_left.bb); + DO_MAX(obj[i]->bb + 3, sweep_left.bb + 3); + + sweep_left.cost += obj[i]->cost; +// right_cost -= obj[i]->cost; if (right_cost < 0) right_cost = 0; + } + + //assert(baxis >= 0 && baxis < 3); + if (!(baxis >= 0 && baxis < 3)) + baxis = 0; + } + + + MEM_freeN(sweep); + } + else if (size == 2) { + baxis = 0; + boffset = 1; + } + else if (size == 1) { + b->child_offset[0] = 0; + b->child_offset[1] = 1; + return 1; + } + + b->child_offset[0] = 0; + b->child_offset[1] = boffset; + b->child_offset[2] = size; + + + /* Adjust sorted arrays for childs */ + for (int i = 0; i < boffset; i++) b->sorted_begin[baxis][i]->selected = true; + for (int i = boffset; i < size; i++) b->sorted_begin[baxis][i]->selected = false; + for (int i = 0; i < 3; i++) + std::stable_partition(b->sorted_begin[i], b->sorted_end[i], selected_node); + + return nchilds; +} + +/* + * Helper code + * PARTITION code / used on mean-split + * basically this a std::nth_element (like on C++ STL algorithm) + */ +#if 0 +static void split_leafs(RTBuilder *b, int *nth, int partitions, int split_axis) +{ + int i; + b->split_axis = split_axis; + + for (i = 0; i < partitions - 1; i++) + { + assert(nth[i] < nth[i + 1] && nth[i + 1] < nth[partitions]); + + if (split_axis == 0) std::nth_element(b, nth[i], nth[i + 1], nth[partitions], obj_bb_compare<RTBuilder::Object, 0>); + if (split_axis == 1) std::nth_element(b, nth[i], nth[i + 1], nth[partitions], obj_bb_compare<RTBuilder::Object, 1>); + if (split_axis == 2) std::nth_element(b, nth[i], nth[i + 1], nth[partitions], obj_bb_compare<RTBuilder::Object, 2>); + } +} +#endif + +/* + * Bounding Box utils + */ +float bb_volume(const float min[3], const float max[3]) +{ + return (max[0] - min[0]) * (max[1] - min[1]) * (max[2] - min[2]); +} + +float bb_area(const float min[3], const float max[3]) +{ + float sub[3], a; + sub[0] = max[0] - min[0]; + sub[1] = max[1] - min[1]; + sub[2] = max[2] - min[2]; + + a = (sub[0] * sub[1] + sub[0] * sub[2] + sub[1] * sub[2]) * 2.0f; + /* used to have an assert() here on negative results + * however, in this case its likely some overflow or ffast math error. + * so just return 0.0f instead. */ + return a < 0.0f ? 0.0f : a; +} + +int bb_largest_axis(const float min[3], const float max[3]) +{ + float sub[3]; + + sub[0] = max[0] - min[0]; + sub[1] = max[1] - min[1]; + sub[2] = max[2] - min[2]; + if (sub[0] > sub[1]) { + if (sub[0] > sub[2]) + return 0; + else + return 2; + } + else { + if (sub[1] > sub[2]) + return 1; + else + return 2; + } +} + +/* only returns 0 if merging inner and outerbox would create a box larger than outer box */ +int bb_fits_inside(const float outer_min[3], const float outer_max[3], + const float inner_min[3], const float inner_max[3]) +{ + int i; + for (i = 0; i < 3; i++) + if (outer_min[i] > inner_min[i]) return 0; + + for (i = 0; i < 3; i++) + if (outer_max[i] < inner_max[i]) return 0; + + return 1; +} diff --git a/source/blender/render/intern/raytrace/rayobject_rtbuild.h b/source/blender/render/intern/raytrace/rayobject_rtbuild.h new file mode 100644 index 00000000000..fc42bc36d92 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_rtbuild.h @@ -0,0 +1,125 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_rtbuild.h + * \ingroup render + */ + +#ifndef __RAYOBJECT_RTBUILD_H__ +#define __RAYOBJECT_RTBUILD_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "rayobject.h" + + +/* + * Ray Tree Builder + * this structs helps building any type of tree + * it contains several methods to organize/split nodes + * allowing to create a given tree on the fly. + * + * Idea is that other trees BVH, BIH can use this code to + * generate with simple calls, and then convert to the theirs + * specific structure on the fly. + */ +#define RTBUILD_MAX_CHILDS 32 +#define RTBUILD_MAX_SAH_DEPTH 256 + + +typedef struct RTBuilder { + struct Object { + RayObject *obj; + float cost; + float bb[6]; + int selected; + }; + + /* list to all primitives added in this tree */ + struct { + Object *begin, *end; + int maxsize; + } primitives; + + /* sorted list of rayobjects */ + struct Object **sorted_begin[3], **sorted_end[3]; + + /* axis used (if any) on the split method */ + int split_axis; + + /* child partitions calculated during splitting */ + int child_offset[RTBUILD_MAX_CHILDS + 1]; + +// int child_sorted_axis; /* -1 if not sorted */ + + float bb[6]; + + /* current depth */ + int depth; +} RTBuilder; + +/* used during creation */ +RTBuilder *rtbuild_create(int size); +void rtbuild_free(RTBuilder *b); +void rtbuild_add(RTBuilder *b, RayObject *o); +void rtbuild_done(RTBuilder *b, RayObjectControl *c); +void rtbuild_merge_bb(RTBuilder *b, float min[3], float max[3]); +int rtbuild_size(RTBuilder *b); + +RayObject *rtbuild_get_primitive(RTBuilder *b, int offset); + +/* used during tree reorganization */ +RTBuilder *rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp); + +/* Calculates child partitions and returns number of efectively needed partitions */ +int rtbuild_get_largest_axis(RTBuilder *b); + +//Object partition +int rtbuild_mean_split(RTBuilder *b, int nchilds, int axis); +int rtbuild_mean_split_largest_axis(RTBuilder *b, int nchilds); + +int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds); + +//Space partition +int rtbuild_median_split(RTBuilder *b, float *separators, int nchilds, int axis); +int rtbuild_median_split_largest_axis(RTBuilder *b, int nchilds); + + +/* bb utils */ +float bb_area(const float min[3], const float max[3]); +float bb_volume(const float min[3], const float max[3]); +int bb_largest_axis(const float min[3], const float max[3]); +int bb_fits_inside(const float outer_min[3], const float outer_max[3], + const float inner_min[3], const float inner_max[3]); + +#ifdef __cplusplus +} +#endif + +#endif /* __RAYOBJECT_RTBUILD_H__ */ diff --git a/source/blender/render/intern/raytrace/rayobject_svbvh.cpp b/source/blender/render/intern/raytrace/rayobject_svbvh.cpp new file mode 100644 index 00000000000..fcd692fac02 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_svbvh.cpp @@ -0,0 +1,192 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_svbvh.cpp + * \ingroup render + */ + + +#include "MEM_guardedalloc.h" + +#include "BLI_utildefines.h" + +#include "vbvh.h" +#include "svbvh.h" +#include "reorganize.h" + +#ifdef __SSE__ + +#define DFS_STACK_SIZE 256 + +struct SVBVHTree { + RayObject rayobj; + + SVBVHNode *root; + MemArena *node_arena; + + float cost; + RTBuilder *builder; +}; + +/* + * Cost to test N childs + */ +struct PackCost { + float operator()(int n) + { + return (n / 4) + ((n % 4) > 2 ? 1 : n % 4); + } +}; + + +template<> +void bvh_done<SVBVHTree>(SVBVHTree *obj) +{ + rtbuild_done(obj->builder, &obj->rayobj.control); + + //TODO find a away to exactly calculate the needed memory + MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "svbvh arena"); + BLI_memarena_use_malloc(arena1); + + MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "svbvh arena2"); + BLI_memarena_use_malloc(arena2); + BLI_memarena_use_align(arena2, 16); + + //Build and optimize the tree + if (0) { + VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder); + + if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) { + BLI_memarena_free(arena1); + BLI_memarena_free(arena2); + return; + } + + reorganize(root); + remove_useless(root, &root); + bvh_refit(root); + + pushup(root); + pushdown(root); + pushup_simd<VBVHNode, 4>(root); + + obj->root = Reorganize_SVBVH<VBVHNode>(arena2).transform(root); + } + else { + //Finds the optimal packing of this tree using a given cost model + //TODO this uses quite a lot of memory, find ways to reduce memory usage during building + OVBVHNode *root = BuildBinaryVBVH<OVBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder); + + if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) { + BLI_memarena_free(arena1); + BLI_memarena_free(arena2); + return; + } + + if (root) { + VBVH_optimalPackSIMD<OVBVHNode, PackCost>(PackCost()).transform(root); + obj->root = Reorganize_SVBVH<OVBVHNode>(arena2).transform(root); + } + else + obj->root = NULL; + } + + //Free data + BLI_memarena_free(arena1); + + obj->node_arena = arena2; + obj->cost = 1.0; + + rtbuild_free(obj->builder); + obj->builder = NULL; +} + +template<int StackSize> +static int intersect(SVBVHTree *obj, Isect *isec) +{ + //TODO renable hint support + if (RE_rayobject_isAligned(obj->root)) { + if (isec->mode == RE_RAY_SHADOW) + return svbvh_node_stack_raycast<StackSize, true>(obj->root, isec); + else + return svbvh_node_stack_raycast<StackSize, false>(obj->root, isec); + } + else + return RE_rayobject_intersect( (RayObject *) obj->root, isec); +} + +template<class Tree> +static void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *UNUSED(min), float *UNUSED(max)) +{ + //TODO renable hint support + { + hint->size = 0; + hint->stack[hint->size++] = (RayObject *)tree->root; + } +} +/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */ +template<class Tree, int STACK_SIZE> +static RayObjectAPI make_api() +{ + static RayObjectAPI api = + { + (RE_rayobject_raycast_callback) ((int (*)(Tree *, Isect *)) & intersect<STACK_SIZE>), + (RE_rayobject_add_callback) ((void (*)(Tree *, RayObject *)) & bvh_add<Tree>), + (RE_rayobject_done_callback) ((void (*)(Tree *)) & bvh_done<Tree>), + (RE_rayobject_free_callback) ((void (*)(Tree *)) & bvh_free<Tree>), + (RE_rayobject_merge_bb_callback)((void (*)(Tree *, float *, float *)) & bvh_bb<Tree>), + (RE_rayobject_cost_callback) ((float (*)(Tree *)) & bvh_cost<Tree>), + (RE_rayobject_hint_bb_callback) ((void (*)(Tree *, LCTSHint *, float *, float *)) & bvh_hint_bb<Tree>) + }; + + return api; +} + +template<class Tree> +static RayObjectAPI *bvh_get_api(int maxstacksize) +{ + static RayObjectAPI bvh_api256 = make_api<Tree, 1024>(); + + if (maxstacksize <= 1024) return &bvh_api256; + assert(maxstacksize <= 256); + return NULL; +} + +RayObject *RE_rayobject_svbvh_create(int size) +{ + return bvh_create_tree<SVBVHTree, DFS_STACK_SIZE>(size); +} + +#else + +RayObject *RE_rayobject_svbvh_create(int UNUSED(size)) +{ + puts("WARNING: SSE disabled at compile time\n"); + return NULL; +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject_vbvh.cpp b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp new file mode 100644 index 00000000000..b63a11047dd --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp @@ -0,0 +1,206 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_vbvh.cpp + * \ingroup render + */ + + +int tot_pushup = 0; +int tot_pushdown = 0; +int tot_hints = 0; + +#include <assert.h> + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_memarena.h" +#include "BLI_utildefines.h" + +#include "BKE_global.h" + +#include "rayintersection.h" +#include "rayobject.h" +#include "rayobject_rtbuild.h" + +#include "reorganize.h" +#include "bvh.h" +#include "vbvh.h" + +#include <queue> +#include <algorithm> + +#define DFS_STACK_SIZE 256 + +struct VBVHTree { + RayObject rayobj; + VBVHNode *root; + MemArena *node_arena; + float cost; + RTBuilder *builder; +}; + +/* + * Cost to test N childs + */ +struct PackCost { + float operator()(int n) + { + return n; + } +}; + +template<> +void bvh_done<VBVHTree>(VBVHTree *obj) +{ + rtbuild_done(obj->builder, &obj->rayobj.control); + + //TODO find a away to exactly calculate the needed memory + MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "vbvh arena"); + BLI_memarena_use_malloc(arena1); + + //Build and optimize the tree + if (1) { + VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder); + if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) { + BLI_memarena_free(arena1); + return; + } + + if (root) { + reorganize(root); + remove_useless(root, &root); + bvh_refit(root); + + pushup(root); + pushdown(root); + obj->root = root; + } + else + obj->root = NULL; + } + else { + /* TODO */ +#if 0 + MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "vbvh arena2"); + BLI_memarena_use_malloc(arena2); + + //Finds the optimal packing of this tree using a given cost model + //TODO this uses quite a lot of memory, find ways to reduce memory usage during building + OVBVHNode *root = BuildBinaryVBVH<OVBVHNode>(arena2).transform(obj->builder); + VBVH_optimalPackSIMD<OVBVHNode, PackCost>(PackCost()).transform(root); + obj->root = Reorganize_VBVH<OVBVHNode>(arena1).transform(root); + + BLI_memarena_free(arena2); +#endif + } + + //Cleanup + rtbuild_free(obj->builder); + obj->builder = NULL; + + obj->node_arena = arena1; + obj->cost = 1.0; +} + +template<int StackSize> +static int intersect(VBVHTree *obj, Isect *isec) +{ + //TODO renable hint support + if (RE_rayobject_isAligned(obj->root)) { + if (isec->mode == RE_RAY_SHADOW) + return bvh_node_stack_raycast<VBVHNode, StackSize, false, true>(obj->root, isec); + else + return bvh_node_stack_raycast<VBVHNode, StackSize, false, false>(obj->root, isec); + } + else + return RE_rayobject_intersect( (RayObject *) obj->root, isec); +} + +template<class Tree> +static void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *UNUSED(min), float *UNUSED(max)) +{ + //TODO renable hint support + { + hint->size = 0; + hint->stack[hint->size++] = (RayObject *)tree->root; + } +} + +#if 0 /* UNUSED */ +static void bfree(VBVHTree *tree) +{ + if (tot_pushup + tot_pushdown + tot_hints + tot_moves) { + if (G.debug & G_DEBUG) { + printf("tot pushups: %d\n", tot_pushup); + printf("tot pushdowns: %d\n", tot_pushdown); + printf("tot moves: %d\n", tot_moves); + printf("tot hints created: %d\n", tot_hints); + } + + tot_pushup = 0; + tot_pushdown = 0; + tot_hints = 0; + tot_moves = 0; + } + bvh_free(tree); +} +#endif + +/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */ +template<class Tree, int STACK_SIZE> +static RayObjectAPI make_api() +{ + static RayObjectAPI api = + { + (RE_rayobject_raycast_callback) ((int (*)(Tree *, Isect *)) & intersect<STACK_SIZE>), + (RE_rayobject_add_callback) ((void (*)(Tree *, RayObject *)) & bvh_add<Tree>), + (RE_rayobject_done_callback) ((void (*)(Tree *)) & bvh_done<Tree>), + (RE_rayobject_free_callback) ((void (*)(Tree *)) & bvh_free<Tree>), + (RE_rayobject_merge_bb_callback)((void (*)(Tree *, float *, float *)) & bvh_bb<Tree>), + (RE_rayobject_cost_callback) ((float (*)(Tree *)) & bvh_cost<Tree>), + (RE_rayobject_hint_bb_callback) ((void (*)(Tree *, LCTSHint *, float *, float *)) & bvh_hint_bb<Tree>) + }; + + return api; +} + +template<class Tree> +RayObjectAPI *bvh_get_api(int maxstacksize) +{ + static RayObjectAPI bvh_api256 = make_api<Tree, 1024>(); + + if (maxstacksize <= 1024) return &bvh_api256; + assert(maxstacksize <= 256); + return 0; +} + +RayObject *RE_rayobject_vbvh_create(int size) +{ + return bvh_create_tree<VBVHTree, DFS_STACK_SIZE>(size); +} diff --git a/source/blender/render/intern/raytrace/reorganize.h b/source/blender/render/intern/raytrace/reorganize.h new file mode 100644 index 00000000000..3fdd3363edb --- /dev/null +++ b/source/blender/render/intern/raytrace/reorganize.h @@ -0,0 +1,513 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/reorganize.h + * \ingroup render + */ + + +#include <float.h> +#include <math.h> +#include <stdio.h> + +#include <algorithm> +#include <queue> +#include <vector> + +#include "BKE_global.h" + +#ifdef _WIN32 +# ifdef INFINITY +# undef INFINITY +# endif +# define INFINITY FLT_MAX // in mingw math.h: (1.0F/0.0F). This generates compile error, though. +#endif + +extern int tot_pushup; +extern int tot_pushdown; + +#if !defined(INFINITY) && defined(HUGE_VAL) +#define INFINITY HUGE_VAL +#endif + +template<class Node> +static bool node_fits_inside(Node *a, Node *b) +{ + return bb_fits_inside(b->bb, b->bb + 3, a->bb, a->bb + 3); +} + +template<class Node> +static void reorganize_find_fittest_parent(Node *tree, Node *node, std::pair<float, Node *> &cost) +{ + std::queue<Node *> q; + q.push(tree); + + while (!q.empty()) { + Node *parent = q.front(); + q.pop(); + + if (parent == node) continue; + if (node_fits_inside(node, parent) && RE_rayobject_isAligned(parent->child) ) { + float pcost = bb_area(parent->bb, parent->bb + 3); + cost = std::min(cost, std::make_pair(pcost, parent) ); + for (Node *child = parent->child; child; child = child->sibling) + q.push(child); + } + } +} + +template<class Node> +static void reorganize(Node *root) +{ + std::queue<Node *> q; + + q.push(root); + while (!q.empty()) { + Node *node = q.front(); + q.pop(); + + if (RE_rayobject_isAligned(node->child)) { + for (Node **prev = &node->child; *prev; ) { + assert(RE_rayobject_isAligned(*prev)); + q.push(*prev); + + std::pair<float, Node *> best(FLT_MAX, root); + reorganize_find_fittest_parent(root, *prev, best); + + if (best.second == node) { + //Already inside the fitnest BB + prev = &(*prev)->sibling; + } + else { + Node *tmp = *prev; + *prev = (*prev)->sibling; + + tmp->sibling = best.second->child; + best.second->child = tmp; + } + + + } + } + if (node != root) { + } + } +} + +/* + * Prunes useless nodes from trees: + * erases nodes with total amount of primitives = 0 + * prunes nodes with only one child (except if that child is a primitive) + */ +template<class Node> +static void remove_useless(Node *node, Node **new_node) +{ + if (RE_rayobject_isAligned(node->child) ) { + + for (Node **prev = &node->child; *prev; ) { + Node *next = (*prev)->sibling; + remove_useless(*prev, prev); + if (*prev == NULL) + *prev = next; + else { + (*prev)->sibling = next; + prev = &((*prev)->sibling); + } + } + } + if (node->child) { + if (RE_rayobject_isAligned(node->child) && node->child->sibling == 0) + *new_node = node->child; + } + else if (node->child == NULL) { + *new_node = NULL; + } +} + +/* + * Minimizes expected number of BBtest by colapsing nodes + * it uses surface area heuristic for determining whether a node should be colapsed + */ +template<class Node> +static void pushup(Node *parent) +{ + if (is_leaf(parent)) return; + + float p_area = bb_area(parent->bb, parent->bb + 3); + Node **prev = &parent->child; + for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) { + const float c_area = bb_area(child->bb, child->bb + 3); + const int nchilds = count_childs(child); + float original_cost = ((p_area != 0.0f) ? (c_area / p_area) * nchilds : 1.0f) + 1; + float flatten_cost = nchilds; + if (flatten_cost < original_cost && nchilds >= 2) { + append_sibling(child, child->child); + child = child->sibling; + *prev = child; + +// *prev = child->child; +// append_sibling( *prev, child->sibling ); +// child = *prev; + tot_pushup++; + } + else { + *prev = child; + prev = &(*prev)->sibling; + child = *prev; + } + } + + for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling) + pushup(child); +} + +/* + * try to optimize number of childs to be a multiple of SSize + */ +template<class Node, int SSize> +static void pushup_simd(Node *parent) +{ + if (is_leaf(parent)) return; + + int n = count_childs(parent); + + Node **prev = &parent->child; + for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) { + int cn = count_childs(child); + if (cn - 1 <= (SSize - (n % SSize) ) % SSize && RE_rayobject_isAligned(child->child) ) { + n += (cn - 1); + append_sibling(child, child->child); + child = child->sibling; + *prev = child; + } + else { + *prev = child; + prev = &(*prev)->sibling; + child = *prev; + } + } + + for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling) + pushup_simd<Node, SSize>(child); +} + + +/* + * Pushdown + * makes sure no child fits inside any of its sibling + */ +template<class Node> +static void pushdown(Node *parent) +{ + Node **s_child = &parent->child; + Node *child = parent->child; + + while (child && RE_rayobject_isAligned(child)) { + Node *next = child->sibling; + Node **next_s_child = &child->sibling; + + //assert(bb_fits_inside(parent->bb, parent->bb+3, child->bb, child->bb+3)); + + for (Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling) + if (child != i && bb_fits_inside(i->bb, i->bb + 3, child->bb, child->bb + 3) && RE_rayobject_isAligned(i->child)) { +// todo optimize (should the one with the smallest area?) +// float ia = bb_area(i->bb, i->bb+3) +// if (child->i) + *s_child = child->sibling; + child->sibling = i->child; + i->child = child; + next_s_child = s_child; + + tot_pushdown++; + break; + } + child = next; + s_child = next_s_child; + } + + for (Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling) { + pushdown(i); + } +} + + +/* + * BVH refit + * readjust nodes BB (useful if nodes childs where modified) + */ +template<class Node> +static float bvh_refit(Node *node) +{ + if (is_leaf(node)) return 0; + if (is_leaf(node->child)) return 0; + + float total = 0; + + for (Node *child = node->child; child; child = child->sibling) + total += bvh_refit(child); + + float old_area = bb_area(node->bb, node->bb + 3); + INIT_MINMAX(node->bb, node->bb + 3); + for (Node *child = node->child; child; child = child->sibling) { + DO_MIN(child->bb, node->bb); + DO_MAX(child->bb + 3, node->bb + 3); + } + total += old_area - bb_area(node->bb, node->bb + 3); + return total; +} + + +/* + * this finds the best way to packing a tree according to a given test cost function + * with the purpose to reduce the expected cost (eg.: number of BB tests). + */ +#include <vector> +#define MAX_CUT_SIZE 4 /* svbvh assumes max 4 children! */ +#define MAX_OPTIMIZE_CHILDS MAX_CUT_SIZE + +#define CUT_SIZE_IS_VALID(cut_size) ((cut_size) < MAX_CUT_SIZE && (cut_size) >= 0) +#define CUT_SIZE_INVALID -1 + + +struct OVBVHNode { + float bb[6]; + + OVBVHNode *child; + OVBVHNode *sibling; + + /* + * Returns min cost to represent the subtree starting at the given node, + * allowing it to have a given cutsize + */ + float cut_cost[MAX_CUT_SIZE]; + float get_cost(int cutsize) + { + assert(CUT_SIZE_IS_VALID(cutsize - 1)); + return cut_cost[cutsize - 1]; + } + + /* + * This saves the cut size of this child, when parent is reaching + * its minimum cut with the given cut size + */ + int cut_size[MAX_CUT_SIZE]; + int get_cut_size(int parent_cut_size) + { + assert(CUT_SIZE_IS_VALID(parent_cut_size - 1)); + return cut_size[parent_cut_size - 1]; + } + + /* + * Reorganize the node based on calculated cut costs + */ + int best_cutsize; + void set_cut(int cutsize, OVBVHNode ***cut) + { + if (cutsize == 1) { + **cut = this; + *cut = &(**cut)->sibling; + } + else { + if (cutsize > MAX_CUT_SIZE) { + for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + child->set_cut(1, cut); + cutsize--; + } + assert(cutsize == 0); + } + else { + for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + child->set_cut(child->get_cut_size(cutsize), cut); + } + } + } + } + + void optimize() + { + if (RE_rayobject_isAligned(this->child)) { + //Calc new childs + if (this->best_cutsize != CUT_SIZE_INVALID) { + OVBVHNode **cut = &(this->child); + set_cut(this->best_cutsize, &cut); + *cut = NULL; + } + + //Optimize new childs + for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) + child->optimize(); + } + } +}; + +/* + * Calculates an optimal SIMD packing + * + */ +template<class Node, class TestCost> +struct VBVH_optimalPackSIMD { + TestCost testcost; + + VBVH_optimalPackSIMD(TestCost testcost) + { + this->testcost = testcost; + } + + /* + * calc best cut on a node + */ + struct calc_best { + Node *child[MAX_OPTIMIZE_CHILDS]; + float child_hit_prob[MAX_OPTIMIZE_CHILDS]; + + calc_best(Node *node) + { + int nchilds = 0; + //Fetch childs and needed data + { + float parent_area = bb_area(node->bb, node->bb + 3); + for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + this->child[nchilds] = child; + this->child_hit_prob[nchilds] = (parent_area != 0.0f) ? bb_area(child->bb, child->bb + 3) / parent_area : 1.0f; + nchilds++; + } + + assert(nchilds >= 2 && nchilds <= MAX_OPTIMIZE_CHILDS); + } + + + //Build DP table to find minimum cost to represent this node with a given cutsize + int bt[MAX_OPTIMIZE_CHILDS + 1][MAX_CUT_SIZE + 1]; //backtrace table + float cost[MAX_OPTIMIZE_CHILDS + 1][MAX_CUT_SIZE + 1]; //cost table (can be reduced to float[2][MAX_CUT_COST]) + + for (int i = 0; i <= nchilds; i++) { + for (int j = 0; j <= MAX_CUT_SIZE; j++) { + cost[i][j] = INFINITY; + } + } + + cost[0][0] = 0; + + for (int i = 1; i <= nchilds; i++) { + for (int size = i - 1; size /*+(nchilds-i)*/ <= MAX_CUT_SIZE; size++) { + for (int cut = 1; cut + size /*+(nchilds-i)*/ <= MAX_CUT_SIZE; cut++) { + float new_cost = cost[i - 1][size] + child_hit_prob[i - 1] * child[i - 1]->get_cost(cut); + if (new_cost < cost[i][size + cut]) { + cost[i][size + cut] = new_cost; + bt[i][size + cut] = cut; + } + } + } + } + + /* Save the ways to archive the minimum cost with a given cutsize */ + for (int i = nchilds; i <= MAX_CUT_SIZE; i++) { + node->cut_cost[i - 1] = cost[nchilds][i]; + if (cost[nchilds][i] < INFINITY) { + int current_size = i; + for (int j = nchilds; j > 0; j--) { + child[j - 1]->cut_size[i - 1] = bt[j][current_size]; + current_size -= bt[j][current_size]; + } + } + } + } + }; + + void calc_costs(Node *node) + { + + if (RE_rayobject_isAligned(node->child) ) { + int nchilds = 0; + for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + calc_costs(child); + nchilds++; + } + + for (int i = 0; i < MAX_CUT_SIZE; i++) + node->cut_cost[i] = INFINITY; + + //We are not allowed to look on nodes with with so many childs + if (nchilds > MAX_CUT_SIZE) { + float cost = 0; + + float parent_area = bb_area(node->bb, node->bb + 3); + for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + cost += ((parent_area != 0.0f) ? (bb_area(child->bb, child->bb + 3) / parent_area) : 1.0f) * child->get_cost(1); + } + + cost += testcost(nchilds); + node->cut_cost[0] = cost; + node->best_cutsize = nchilds; + } + else { + calc_best calc(node); + + //calc expected cost if we optimaly pack this node + for (int cutsize = nchilds; cutsize <= MAX_CUT_SIZE; cutsize++) { + float m = node->get_cost(cutsize) + testcost(cutsize); + if (m < node->cut_cost[0]) { + node->cut_cost[0] = m; + node->best_cutsize = cutsize; + } + } + } + + if (node->cut_cost[0] == INFINITY) { + node->best_cutsize = CUT_SIZE_INVALID; + } + } + else { + node->cut_cost[0] = 1.0f; + for (int i = 1; i < MAX_CUT_SIZE; i++) + node->cut_cost[i] = INFINITY; + + /* node->best_cutsize can remain unset here */ + } + } + + Node *transform(Node *node) + { + if (RE_rayobject_isAligned(node->child)) { +#ifdef DEBUG + static int num = 0; + bool first = false; + if (num == 0) { num++; first = true; } +#endif + + calc_costs(node); + +#ifdef DEBUG + if (first && G.debug) { + printf("expected cost = %f (%d)\n", node->cut_cost[0], node->best_cutsize); + } +#endif + node->optimize(); + } + return node; + } +}; diff --git a/source/blender/render/intern/raytrace/svbvh.h b/source/blender/render/intern/raytrace/svbvh.h new file mode 100644 index 00000000000..0a5690deb46 --- /dev/null +++ b/source/blender/render/intern/raytrace/svbvh.h @@ -0,0 +1,317 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/svbvh.h + * \ingroup render + */ + +#ifndef __SVBVH_H__ +#define __SVBVH_H__ + +#ifdef __SSE__ + +#include "bvh.h" +#include "BLI_memarena.h" +#include <algorithm> + +struct SVBVHNode { + float child_bb[24]; + SVBVHNode *child[4]; + int nchilds; +}; + +static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group) +{ + const __m128 tmin0 = _mm_setzero_ps(); + const __m128 tmax0 = _mm_set_ps1(isec->dist); + + const __m128 start0 = _mm_set_ps1(isec->start[0]); + const __m128 start1 = _mm_set_ps1(isec->start[1]); + const __m128 start2 = _mm_set_ps1(isec->start[2]); + const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0); + const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0); + const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1); + const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1); + const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2); + const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2); + const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]); + const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]); + const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]); + const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0); + const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0); + const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1); + const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1); + const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2); + const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2); + const __m128 tmin1 = _mm_max_ps(tmin0, mul0); + const __m128 tmax1 = _mm_min_ps(tmax0, mul1); + const __m128 tmin2 = _mm_max_ps(tmin1, mul2); + const __m128 tmax2 = _mm_min_ps(tmax1, mul3); + const __m128 tmin3 = _mm_max_ps(tmin2, mul4); + const __m128 tmax3 = _mm_min_ps(tmax2, mul5); + + return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3)); +} + +static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb) +{ + const float *bb = _bb; + + float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0]; + float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0]; + float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1]; + float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1]; + float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2]; + float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2]; + + RE_RC_COUNT(isec->raycounter->bb.test); + + if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0; + if (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) return 0; + if (t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0; + + RE_RC_COUNT(isec->raycounter->bb.hit); + + return 1; +} + +static bool svbvh_node_is_leaf(const SVBVHNode *node) +{ + return !RE_rayobject_isAligned(node); +} + +template<int MAX_STACK_SIZE, bool SHADOW> +static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec) +{ + SVBVHNode *stack[MAX_STACK_SIZE], *node; + int hit = 0, stack_pos = 0; + + stack[stack_pos++] = root; + + while (stack_pos) { + node = stack[--stack_pos]; + + if (!svbvh_node_is_leaf(node)) { + int nchilds = node->nchilds; + + if (nchilds == 4) { + float *child_bb = node->child_bb; + int res = svbvh_bb_intersect_test_simd4(isec, ((__m128 *) (child_bb))); + SVBVHNode **child = node->child; + + RE_RC_COUNT(isec->raycounter->simd_bb.test); + + if (res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + if (res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + if (res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + if (res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + } + else { + float *child_bb = node->child_bb; + SVBVHNode **child = node->child; + int i; + + for (i = 0; i < nchilds; i++) { + if (svbvh_bb_intersect_test(isec, (float *)child_bb + 6 * i)) { + stack[stack_pos++] = child[i]; + } + } + } + } + else { + hit |= RE_rayobject_intersect((RayObject *)node, isec); + if (SHADOW && hit) break; + } + } + + return hit; +} + + +template<> +inline void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float min[3], float max[3]) +{ + if (is_leaf(node)) { + RE_rayobject_merge_bb((RayObject *)node, min, max); + } + else { + int i; + for (i = 0; i + 4 <= node->nchilds; i += 4) { + float *res = node->child_bb + 6 * i; + for (int j = 0; j < 3; j++) { + min[j] = min_ff(min[j], + min_ffff(res[4 * j + 0], + res[4 * j + 1], + res[4 * j + 2], + res[4 * j + 3])); + } + for (int j = 0; j < 3; j++) { + max[j] = max_ff(max[j], + max_ffff(res[4 * (j + 3) + 0], + res[4 * (j + 3) + 1], + res[4 * (j + 3) + 2], + res[4 * (j + 3) + 3])); + } + } + + for (; i < node->nchilds; i++) { + DO_MIN(node->child_bb + 6 * i, min); + DO_MAX(node->child_bb + 3 + 6 * i, max); + } + } +} + + + +/* + * Builds a SVBVH tree form a VBVHTree + */ +template<class OldNode> +struct Reorganize_SVBVH { + MemArena *arena; + + float childs_per_node; + int nodes_with_childs[16]; + int useless_bb; + int nodes; + + Reorganize_SVBVH(MemArena *a) + { + arena = a; + nodes = 0; + childs_per_node = 0; + useless_bb = 0; + + for (int i = 0; i < 16; i++) { + nodes_with_childs[i] = 0; + } + } + + ~Reorganize_SVBVH() + { +#if 0 + { + printf("%f childs per node\n", childs_per_node / nodes); + printf("%d childs BB are useless\n", useless_bb); + for (int i = 0; i < 16; i++) { + printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes, nodes_with_childs[i] / float(nodes)); + } + } +#endif + } + + SVBVHNode *create_node(int nchilds) + { + SVBVHNode *node = (SVBVHNode *)BLI_memarena_alloc(arena, sizeof(SVBVHNode)); + node->nchilds = nchilds; + + return node; + } + + void copy_bb(float bb[6], const float old_bb[6]) + { + std::copy(old_bb, old_bb + 6, bb); + } + + void prepare_for_simd(SVBVHNode *node) + { + int i = 0; + while (i + 4 <= node->nchilds) { + float vec_tmp[4 * 6]; + float *res = node->child_bb + 6 * i; + std::copy(res, res + 6 * 4, vec_tmp); + + for (int j = 0; j < 6; j++) { + res[4 * j + 0] = vec_tmp[6 * 0 + j]; + res[4 * j + 1] = vec_tmp[6 * 1 + j]; + res[4 * j + 2] = vec_tmp[6 * 2 + j]; + res[4 * j + 3] = vec_tmp[6 * 3 + j]; + } + + i += 4; + } + } + + /* amt must be power of two */ + inline int padup(int num, int amt) + { + return ((num + (amt - 1)) & ~(amt - 1)); + } + + SVBVHNode *transform(OldNode *old) + { + if (is_leaf(old)) + return (SVBVHNode *)old; + if (is_leaf(old->child)) + return (SVBVHNode *)old->child; + + int nchilds = count_childs(old); + int alloc_childs = nchilds; + if (nchilds % 4 > 2) + alloc_childs = padup(nchilds, 4); + + SVBVHNode *node = create_node(alloc_childs); + + childs_per_node += nchilds; + nodes++; + if (nchilds < 16) + nodes_with_childs[nchilds]++; + + useless_bb += alloc_childs - nchilds; + while (alloc_childs > nchilds) { + const static float def_bb[6] = {FLT_MAX, FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX}; + alloc_childs--; + node->child[alloc_childs] = NULL; + copy_bb(node->child_bb + alloc_childs * 6, def_bb); + } + + int i = nchilds; + for (OldNode *o_child = old->child; o_child; o_child = o_child->sibling) { + i--; + node->child[i] = transform(o_child); + if (is_leaf(o_child)) { + float bb[6]; + INIT_MINMAX(bb, bb + 3); + RE_rayobject_merge_bb((RayObject *)o_child, bb, bb + 3); + copy_bb(node->child_bb + i * 6, bb); + break; + } + else { + copy_bb(node->child_bb + i * 6, o_child->bb); + } + } + assert(i == 0); + + prepare_for_simd(node); + + return node; + } +}; + +#endif /* __SSE__ */ + +#endif /* __SVBVH_H__ */ diff --git a/source/blender/render/intern/raytrace/vbvh.h b/source/blender/render/intern/raytrace/vbvh.h new file mode 100644 index 00000000000..0b0bbd19116 --- /dev/null +++ b/source/blender/render/intern/raytrace/vbvh.h @@ -0,0 +1,238 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/vbvh.h + * \ingroup render + */ + + +#include <assert.h> +#include <algorithm> + +#include "BLI_memarena.h" + +#include "rayobject_rtbuild.h" + +/* + * VBVHNode represents a BVHNode with support for a variable number of childrens + */ +struct VBVHNode { + float bb[6]; + + VBVHNode *child; + VBVHNode *sibling; +}; + + +/* + * Push nodes (used on dfs) + */ +template<class Node> +inline static void bvh_node_push_childs(Node *node, Isect *UNUSED(isec), Node **stack, int &stack_pos) +{ + Node *child = node->child; + + if (is_leaf(child)) { + stack[stack_pos++] = child; + } + else { + while (child) { + /* Skips BB tests on primitives */ +#if 0 + if (is_leaf(child->child)) { + stack[stack_pos++] = child->child; + } + else +#endif + { + stack[stack_pos++] = child; + } + + child = child->sibling; + } + } +} + + +template<class Node> +static int count_childs(Node *parent) +{ + int n = 0; + for (Node *i = parent->child; i; i = i->sibling) { + n++; + if (is_leaf(i)) + break; + } + + return n; +} + + +template<class Node> +static void append_sibling(Node *node, Node *sibling) +{ + while (node->sibling) + node = node->sibling; + + node->sibling = sibling; +} + + +/* + * Builds a binary VBVH from a rtbuild + */ +template<class Node> +struct BuildBinaryVBVH { + MemArena *arena; + RayObjectControl *control; + + void test_break() + { + if (RE_rayobjectcontrol_test_break(control)) + throw "Stop"; + } + + BuildBinaryVBVH(MemArena *a, RayObjectControl *c) + { + arena = a; + control = c; + } + + Node *create_node() + { + Node *node = (Node *)BLI_memarena_alloc(arena, sizeof(Node) ); + assert(RE_rayobject_isAligned(node)); + + node->sibling = NULL; + node->child = NULL; + + return node; + } + + int rtbuild_split(RTBuilder *builder) + { + return ::rtbuild_heuristic_object_split(builder, 2); + } + + Node *transform(RTBuilder *builder) + { + try + { + return _transform(builder); + + } catch (...) + { + } + return NULL; + } + + Node *_transform(RTBuilder *builder) + { + int size = rtbuild_size(builder); + + if (size == 0) { + return NULL; + } + else if (size == 1) { + Node *node = create_node(); + INIT_MINMAX(node->bb, node->bb + 3); + rtbuild_merge_bb(builder, node->bb, node->bb + 3); + node->child = (Node *) rtbuild_get_primitive(builder, 0); + return node; + } + else { + test_break(); + + Node *node = create_node(); + + Node **child = &node->child; + + int nc = rtbuild_split(builder); + INIT_MINMAX(node->bb, node->bb + 3); + + assert(nc == 2); + for (int i = 0; i < nc; i++) { + RTBuilder tmp; + rtbuild_get_child(builder, i, &tmp); + + *child = _transform(&tmp); + DO_MIN((*child)->bb, node->bb); + DO_MAX((*child)->bb + 3, node->bb + 3); + child = &((*child)->sibling); + } + + *child = NULL; + return node; + } + } +}; + +#if 0 +template<class Tree, class OldNode> +struct Reorganize_VBVH { + Tree *tree; + + Reorganize_VBVH(Tree *t) + { + tree = t; + } + + VBVHNode *create_node() + { + VBVHNode *node = (VBVHNode *)BLI_memarena_alloc(tree->node_arena, sizeof(VBVHNode)); + return node; + } + + void copy_bb(VBVHNode *node, OldNode *old) + { + std::copy(old->bb, old->bb + 6, node->bb); + } + + VBVHNode *transform(OldNode *old) + { + if (is_leaf(old)) + return (VBVHNode *)old; + + VBVHNode *node = create_node(); + VBVHNode **child_ptr = &node->child; + node->sibling = 0; + + copy_bb(node, old); + + for (OldNode *o_child = old->child; o_child; o_child = o_child->sibling) + { + VBVHNode *n_child = transform(o_child); + *child_ptr = n_child; + if (is_leaf(n_child)) return node; + child_ptr = &n_child->sibling; + } + *child_ptr = 0; + + return node; + } +}; +#endif |