diff options
Diffstat (limited to 'source/blender/render/intern/raytrace')
-rw-r--r-- | source/blender/render/intern/raytrace/bvh.h | 400 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject.cpp | 537 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_hint.h | 70 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_qbvh.cpp | 149 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_rtbuild.cpp | 496 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_rtbuild.h | 121 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_svbvh.cpp | 181 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_vbvh.cpp | 191 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/reorganize.h | 516 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/svbvh.h | 249 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/vbvh.h | 237 |
11 files changed, 3147 insertions, 0 deletions
diff --git a/source/blender/render/intern/raytrace/bvh.h b/source/blender/render/intern/raytrace/bvh.h new file mode 100644 index 00000000000..0e74bbc923b --- /dev/null +++ b/source/blender/render/intern/raytrace/bvh.h @@ -0,0 +1,400 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#include "rayobject.h" +#include "raycounter.h" +#include "MEM_guardedalloc.h" +#include "rayobject_rtbuild.h" +#include "rayobject_hint.h" + +#include <assert.h> + +#ifdef __SSE__ +#include <xmmintrin.h> +#endif + +#ifndef RE_RAYTRACE_BVH_H +#define RE_RAYTRACE_BVH_H + +#ifdef __SSE__ +inline int test_bb_group4(__m128 *bb_group, const Isect *isec) +{ + + const __m128 tmin0 = _mm_setzero_ps(); + const __m128 tmax0 = _mm_load1_ps(&isec->labda); + + const __m128 tmin1 = _mm_max_ps(tmin0, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[0]], _mm_load1_ps(&isec->start[0]) ), _mm_load1_ps(&isec->idot_axis[0])) ); + const __m128 tmax1 = _mm_min_ps(tmax0, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[1]], _mm_load1_ps(&isec->start[0]) ), _mm_load1_ps(&isec->idot_axis[0])) ); + const __m128 tmin2 = _mm_max_ps(tmin1, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[2]], _mm_load1_ps(&isec->start[1]) ), _mm_load1_ps(&isec->idot_axis[1])) ); + const __m128 tmax2 = _mm_min_ps(tmax1, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[3]], _mm_load1_ps(&isec->start[1]) ), _mm_load1_ps(&isec->idot_axis[1])) ); + const __m128 tmin3 = _mm_max_ps(tmin2, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[4]], _mm_load1_ps(&isec->start[2]) ), _mm_load1_ps(&isec->idot_axis[2])) ); + const __m128 tmax3 = _mm_min_ps(tmax2, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[5]], _mm_load1_ps(&isec->start[2]) ), _mm_load1_ps(&isec->idot_axis[2])) ); + + return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3)); +} +#endif + + +/* bvh tree generics */ +template<class Tree> static int bvh_intersect(Tree *obj, Isect *isec); + +template<class Tree> static void bvh_add(Tree *obj, RayObject *ob) +{ + rtbuild_add( obj->builder, ob ); +} + +template<class Node> +inline bool is_leaf(Node *node) +{ + return !RE_rayobject_isAligned(node); +} + +template<class Tree> static void bvh_done(Tree *obj); + +template<class Tree> +static void bvh_free(Tree *obj) +{ + if(obj->builder) + rtbuild_free(obj->builder); + + if(obj->node_arena) + BLI_memarena_free(obj->node_arena); + + MEM_freeN(obj); +} + +template<class Tree> +static void bvh_bb(Tree *obj, float *min, float *max) +{ + bvh_node_merge_bb(obj->root, min, max); +} + + +template<class Tree> +static float bvh_cost(Tree *obj) +{ + assert(obj->cost >= 0.0); + return obj->cost; +} + + + +/* bvh tree nodes generics */ +template<class Node> static inline int bvh_node_hit_test(Node *node, Isect *isec) +{ + return RE_rayobject_bb_intersect_test(isec, (const float*)node->bb); +} + + +template<class Node> +static void bvh_node_merge_bb(Node *node, float *min, float *max) +{ + if(is_leaf(node)) + { + RE_rayobject_merge_bb( (RayObject*)node, min, max); + } + else + { + DO_MIN(node->bb , min); + DO_MAX(node->bb+3, max); + } +} + + + +/* + * recursivly transverse a BVH looking for a rayhit using a local stack + */ +template<class Node> static inline void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, int &stack_pos); + +template<class Node,int MAX_STACK_SIZE,bool TEST_ROOT> +static int bvh_node_stack_raycast(Node *root, Isect *isec) +{ + Node *stack[MAX_STACK_SIZE]; + int hit = 0, stack_pos = 0; + + if(!TEST_ROOT && !is_leaf(root)) + bvh_node_push_childs(root, isec, stack, stack_pos); + else + stack[stack_pos++] = root; + + while(stack_pos) + { + Node *node = stack[--stack_pos]; + if(!is_leaf(node)) + { + if(bvh_node_hit_test(node,isec)) + { + bvh_node_push_childs(node, isec, stack, stack_pos); + assert(stack_pos <= MAX_STACK_SIZE); + } + } + else + { + hit |= RE_rayobject_intersect( (RayObject*)node, isec); + if(hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + return hit; +} + + +#ifdef __SSE__ +/* + * Generic SIMD bvh recursion + * this was created to be able to use any simd (with the cost of some memmoves) + * it can take advantage of any SIMD width and doens't needs any special tree care + */ +template<class Node,int MAX_STACK_SIZE,bool TEST_ROOT> +static int bvh_node_stack_raycast_simd(Node *root, Isect *isec) +{ + Node *stack[MAX_STACK_SIZE]; + + int hit = 0, stack_pos = 0; + + if(!TEST_ROOT) + { + if(!is_leaf(root)) + { + if(!is_leaf(root->child)) + bvh_node_push_childs(root, isec, stack, stack_pos); + else + return RE_rayobject_intersect( (RayObject*)root->child, isec); + } + else + return RE_rayobject_intersect( (RayObject*)root, isec); + } + else + { + if(!is_leaf(root)) + stack[stack_pos++] = root; + else + return RE_rayobject_intersect( (RayObject*)root, isec); + } + + while(true) + { + //Use SIMD 4 + if(stack_pos >= 4) + { + __m128 t_bb[6]; + Node * t_node[4]; + + stack_pos -= 4; + + /* prepare the 4BB for SIMD */ + t_node[0] = stack[stack_pos+0]->child; + t_node[1] = stack[stack_pos+1]->child; + t_node[2] = stack[stack_pos+2]->child; + t_node[3] = stack[stack_pos+3]->child; + + const float *bb0 = stack[stack_pos+0]->bb; + const float *bb1 = stack[stack_pos+1]->bb; + const float *bb2 = stack[stack_pos+2]->bb; + const float *bb3 = stack[stack_pos+3]->bb; + + const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(1,0,1,0) ); + const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(1,0,1,0) ); + t_bb[0] = _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ); + t_bb[1] = _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ); + + const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(3,2,3,2) ); + const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(3,2,3,2) ); + t_bb[2] = _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ); + t_bb[3] = _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ); + + const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_load_ps(bb0+4), _mm_load_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) ); + const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_load_ps(bb2+4), _mm_load_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) ); + t_bb[4] = _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ); + t_bb[5] = _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ); +/* + for(int i=0; i<4; i++) + { + Node *t = stack[stack_pos+i]; + assert(!is_leaf(t)); + + float *bb = ((float*)t_bb)+i; + bb[4*0] = t->bb[0]; + bb[4*1] = t->bb[1]; + bb[4*2] = t->bb[2]; + bb[4*3] = t->bb[3]; + bb[4*4] = t->bb[4]; + bb[4*5] = t->bb[5]; + t_node[i] = t->child; + } +*/ + RE_RC_COUNT(isec->raycounter->simd_bb.test); + int res = test_bb_group4( t_bb, isec ); + + for(int i=0; i<4; i++) + if(res & (1<<i)) + { + RE_RC_COUNT(isec->raycounter->simd_bb.hit); + if(!is_leaf(t_node[i])) + { + for(Node *t=t_node[i]; t; t=t->sibling) + { + assert(stack_pos < MAX_STACK_SIZE); + stack[stack_pos++] = t; + } + } + else + { + hit |= RE_rayobject_intersect( (RayObject*)t_node[i], isec); + if(hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + } + else if(stack_pos > 0) + { + Node *node = stack[--stack_pos]; + assert(!is_leaf(node)); + + if(bvh_node_hit_test(node,isec)) + { + if(!is_leaf(node->child)) + { + bvh_node_push_childs(node, isec, stack, stack_pos); + assert(stack_pos <= MAX_STACK_SIZE); + } + else + { + hit |= RE_rayobject_intersect( (RayObject*)node->child, isec); + if(hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + } + else break; + } + return hit; +} +#endif + +/* + * recursively transverse a BVH looking for a rayhit using system stack + */ +/* +template<class Node> +static int bvh_node_raycast(Node *node, Isect *isec) +{ + int hit = 0; + if(bvh_test_node(node, isec)) + { + if(isec->idot_axis[node->split_axis] > 0.0f) + { + int i; + for(i=0; i<BVH_NCHILDS; i++) + if(!is_leaf(node->child[i])) + { + if(node->child[i] == 0) break; + + hit |= bvh_node_raycast(node->child[i], isec); + if(hit && isec->mode == RE_RAY_SHADOW) return hit; + } + else + { + hit |= RE_rayobject_intersect( (RayObject*)node->child[i], isec); + if(hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + else + { + int i; + for(i=BVH_NCHILDS-1; i>=0; i--) + if(!is_leaf(node->child[i])) + { + if(node->child[i]) + { + hit |= dfs_raycast(node->child[i], isec); + if(hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + else + { + hit |= RE_rayobject_intersect( (RayObject*)node->child[i], isec); + if(hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + } + return hit; +} +*/ + +template<class Node,class HintObject> +void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject *hintObject) +{ + assert( hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE ); + + if(is_leaf(node)) + { + hint->stack[hint->size++] = (RayObject*)node; + } + else + { + int childs = count_childs(node); + if(hint->size + reserve_space + childs <= RE_RAY_LCTS_MAX_SIZE) + { + int result = hint_test_bb(hintObject, node->bb, node->bb+3); + if(result == HINT_RECURSE) + { + /* We are 100% sure the ray will be pass inside this node */ + bvh_dfs_make_hint_push_siblings(node->child, hint, reserve_space, hintObject); + } + else if(result == HINT_ACCEPT) + { + hint->stack[hint->size++] = (RayObject*)node; + } + } + else + { + hint->stack[hint->size++] = (RayObject*)node; + } + } +} + + +template<class Tree> +static RayObjectAPI* bvh_get_api(int maxstacksize); + + +template<class Tree, int DFS_STACK_SIZE> +static inline RayObject *bvh_create_tree(int size) +{ + Tree *obj= (Tree*)MEM_callocN(sizeof(Tree), "BVHTree" ); + assert( RE_rayobject_isAligned(obj) ); /* RayObject API assumes real data to be 4-byte aligned */ + + obj->rayobj.api = bvh_get_api<Tree>(DFS_STACK_SIZE); + obj->root = NULL; + + obj->node_arena = NULL; + obj->builder = rtbuild_create( size ); + + return RE_rayobject_unalignRayAPI((RayObject*) obj); +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject.cpp b/source/blender/render/intern/raytrace/rayobject.cpp new file mode 100644 index 00000000000..95387cf1ee4 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject.cpp @@ -0,0 +1,537 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#include <assert.h> + +#include "BKE_utildefines.h" +#include "BLI_arithb.h" +#include "DNA_material_types.h" + +#include "RE_raytrace.h" +#include "render_types.h" +#include "rayobject.h" +#include "raycounter.h" + +/* + * Determines the distance that the ray must travel to hit the bounding volume of the given node + * Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe + * [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9] + */ +int RE_rayobject_bb_intersect_test(const Isect *isec, const float *_bb) +{ + const float *bb = _bb; + + float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0]; + float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0]; + float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1]; + float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1]; + float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2]; + float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2]; + + RE_RC_COUNT(isec->raycounter->bb.test); + + if(t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0; + if(t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0; + if(t1x > isec->labda || t1y > isec->labda || t1z > isec->labda) return 0; + RE_RC_COUNT(isec->raycounter->bb.hit); + + return 1; +} + + +/* only for self-intersecting test with current render face (where ray left) */ +static int intersection2(VlakRen *face, float r0, float r1, float r2, float rx1, float ry1, float rz1) +{ + float co1[3], co2[3], co3[3], co4[3]; + float x0,x1,x2,t00,t01,t02,t10,t11,t12,t20,t21,t22; + float m0, m1, m2, divdet, det, det1; + float u1, v, u2; + + VECCOPY(co1, face->v1->co); + VECCOPY(co2, face->v2->co); + if(face->v4) + { + VECCOPY(co3, face->v4->co); + VECCOPY(co4, face->v3->co); + } + else + { + VECCOPY(co3, face->v3->co); + } + + t00= co3[0]-co1[0]; + t01= co3[1]-co1[1]; + t02= co3[2]-co1[2]; + t10= co3[0]-co2[0]; + t11= co3[1]-co2[1]; + t12= co3[2]-co2[2]; + + x0= t11*r2-t12*r1; + x1= t12*r0-t10*r2; + x2= t10*r1-t11*r0; + + divdet= t00*x0+t01*x1+t02*x2; + + m0= rx1-co3[0]; + m1= ry1-co3[1]; + m2= rz1-co3[2]; + det1= m0*x0+m1*x1+m2*x2; + + if(divdet!=0.0f) { + u1= det1/divdet; + + if(u1<ISECT_EPSILON) { + det= t00*(m1*r2-m2*r1); + det+= t01*(m2*r0-m0*r2); + det+= t02*(m0*r1-m1*r0); + v= det/divdet; + + if(v<ISECT_EPSILON && (u1 + v) > -(1.0f+ISECT_EPSILON)) { + return 1; + } + } + } + + if(face->v4) { + + t20= co3[0]-co4[0]; + t21= co3[1]-co4[1]; + t22= co3[2]-co4[2]; + + divdet= t20*x0+t21*x1+t22*x2; + if(divdet!=0.0f) { + u2= det1/divdet; + + if(u2<ISECT_EPSILON) { + det= t20*(m1*r2-m2*r1); + det+= t21*(m2*r0-m0*r2); + det+= t22*(m0*r1-m1*r0); + v= det/divdet; + + if(v<ISECT_EPSILON && (u2 + v) >= -(1.0f+ISECT_EPSILON)) { + return 2; + } + } + } + } + return 0; +} + +static inline int vlr_check_intersect(Isect *is, ObjectInstanceRen *obi, VlakRen *vlr) +{ + /* for baking selected to active non-traceable materials might still + * be in the raytree */ + if(!(vlr->mat->mode & MA_TRACEBLE)) + return 0; + + /* I know... cpu cycle waste, might do smarter once */ + if(is->mode==RE_RAY_MIRROR) + return !(vlr->mat->mode & MA_ONLYCAST); + else + return (is->lay & obi->lay); +} + +static inline int vlr_check_intersect_solid(Isect *is, ObjectInstanceRen* obi, VlakRen *vlr) +{ + /* solid material types only */ + if (vlr->mat->material_type == MA_TYPE_SURFACE) + return 1; + else + return 0; +} + +static inline int rayface_check_cullface(RayFace *face, Isect *is) +{ + float nor[3]; + + /* don't intersect if the ray faces along the face normal */ + if(face->quad) CalcNormFloat4(face->v1, face->v2, face->v3, face->v4, nor); + else CalcNormFloat(face->v1, face->v2, face->v3, nor); + + return (INPR(nor, is->vec) < 0); +} + +/* ray - triangle or quad intersection */ +/* this function shall only modify Isect if it detects an hit */ +static int intersect_rayface(RayObject *hit_obj, RayFace *face, Isect *is) +{ + float co1[3],co2[3],co3[3],co4[3]; + float x0,x1,x2,t00,t01,t02,t10,t11,t12,t20,t21,t22,r0,r1,r2; + float m0, m1, m2, divdet, det1; + float labda, u, v; + short ok=0; + + if(is->orig.ob == face->ob && is->orig.face == face->face) + return 0; + + + if(is->skip & RE_SKIP_VLR_RENDER_CHECK) + { + if(vlr_check_intersect(is, (ObjectInstanceRen*)face->ob, (VlakRen*)face->face ) == 0) + return 0; + } + if(is->skip & RE_SKIP_VLR_NON_SOLID_MATERIAL) + { + if(vlr_check_intersect_solid(is, (ObjectInstanceRen*)face->ob, (VlakRen*)face->face) == 0) + return 0; + } + if(is->skip & RE_SKIP_CULLFACE) + { + if(rayface_check_cullface(face, is) == 0) + return 0; + } + + RE_RC_COUNT(is->raycounter->faces.test); + + //Load coords + VECCOPY(co1, face->v1); + VECCOPY(co2, face->v2); + if(RE_rayface_isQuad(face)) + { + VECCOPY(co3, face->v4); + VECCOPY(co4, face->v3); + } + else + { + VECCOPY(co3, face->v3); + } + + t00= co3[0]-co1[0]; + t01= co3[1]-co1[1]; + t02= co3[2]-co1[2]; + t10= co3[0]-co2[0]; + t11= co3[1]-co2[1]; + t12= co3[2]-co2[2]; + + r0= is->vec[0]; + r1= is->vec[1]; + r2= is->vec[2]; + + x0= t12*r1-t11*r2; + x1= t10*r2-t12*r0; + x2= t11*r0-t10*r1; + + divdet= t00*x0+t01*x1+t02*x2; + + m0= is->start[0]-co3[0]; + m1= is->start[1]-co3[1]; + m2= is->start[2]-co3[2]; + det1= m0*x0+m1*x1+m2*x2; + + if(divdet!=0.0f) { + + divdet= 1.0f/divdet; + u= det1*divdet; + if(u<ISECT_EPSILON && u>-(1.0f+ISECT_EPSILON)) { + float cros0, cros1, cros2; + + cros0= m1*t02-m2*t01; + cros1= m2*t00-m0*t02; + cros2= m0*t01-m1*t00; + v= divdet*(cros0*r0 + cros1*r1 + cros2*r2); + + if(v<ISECT_EPSILON && (u + v) > -(1.0f+ISECT_EPSILON)) { + labda= divdet*(cros0*t10 + cros1*t11 + cros2*t12); + + if(labda>-ISECT_EPSILON && labda<is->labda) { + ok= 1; + } + } + } + } + + if(ok==0 && RE_rayface_isQuad(face)) { + + t20= co3[0]-co4[0]; + t21= co3[1]-co4[1]; + t22= co3[2]-co4[2]; + + divdet= t20*x0+t21*x1+t22*x2; + if(divdet!=0.0f) { + divdet= 1.0f/divdet; + u = det1*divdet; + + if(u<ISECT_EPSILON && u>-(1.0f+ISECT_EPSILON)) { + float cros0, cros1, cros2; + cros0= m1*t22-m2*t21; + cros1= m2*t20-m0*t22; + cros2= m0*t21-m1*t20; + v= divdet*(cros0*r0 + cros1*r1 + cros2*r2); + + if(v<ISECT_EPSILON && (u + v) >-(1.0f+ISECT_EPSILON)) { + labda= divdet*(cros0*t10 + cros1*t11 + cros2*t12); + + if(labda>-ISECT_EPSILON && labda<is->labda) { + ok= 2; + } + } + } + } + } + + if(ok) { + + /* when a shadow ray leaves a face, it can be little outside the edges of it, causing + intersection to be detected in its neighbour face */ + if(is->skip & RE_SKIP_VLR_NEIGHBOUR) + { + if(labda < 0.1f && is->orig.ob == face->ob) + { + VlakRen * a = (VlakRen*)is->orig.face; + VlakRen * b = (VlakRen*)face->face; + + /* so there's a shared edge or vertex, let's intersect ray with face + itself, if that's true we can safely return 1, otherwise we assume + the intersection is invalid, 0 */ + if(a->v1==b->v1 || a->v2==b->v1 || a->v3==b->v1 || a->v4==b->v1 + || a->v1==b->v2 || a->v2==b->v2 || a->v3==b->v2 || a->v4==b->v2 + || a->v1==b->v3 || a->v2==b->v3 || a->v3==b->v3 || a->v4==b->v3 + || (b->v4 && (a->v1==b->v4 || a->v2==b->v4 || a->v3==b->v4 || a->v4==b->v4))) + if(!intersection2((VlakRen*)a, -r0, -r1, -r2, is->start[0], is->start[1], is->start[2])) + { + return 0; + } + } + } + + RE_RC_COUNT(is->raycounter->faces.hit); + + is->isect= ok; // wich half of the quad + is->labda= labda; + is->u= u; is->v= v; + + is->hit.ob = face->ob; + is->hit.face = face->face; +#ifdef RT_USE_LAST_HIT + is->last_hit = hit_obj; +#endif + return 1; + } + + return 0; +} + +RayObject* RE_rayface_from_vlak(RayFace *rayface, ObjectInstanceRen *obi, VlakRen *vlr) +{ + return RE_rayface_from_coords(rayface, obi, vlr, vlr->v1->co, vlr->v2->co, vlr->v3->co, vlr->v4 ? vlr->v4->co : 0 ); +} + +RayObject* RE_rayface_from_coords(RayFace *rayface, void *ob, void *face, float *v1, float *v2, float *v3, float *v4) +{ + rayface->ob = ob; + rayface->face = face; + + VECCOPY(rayface->v1, v1); + VECCOPY(rayface->v2, v2); + VECCOPY(rayface->v3, v3); + if(v4) + { + VECCOPY(rayface->v4, v4); + rayface->quad = 1; + } + else + { + rayface->quad = 0; + } + + return RE_rayobject_unalignRayFace(rayface); +} + +RayObject* RE_vlakprimitive_from_vlak(VlakPrimitive *face, struct ObjectInstanceRen *obi, struct VlakRen *vlr) +{ + face->ob = obi; + face->face = vlr; + return RE_rayobject_unalignVlakPrimitive(face); +} + + +int RE_rayobject_raycast(RayObject *r, Isect *isec) +{ + int i; + RE_RC_COUNT(isec->raycounter->raycast.test); + + /* Setup vars used on raycast */ + isec->labda *= Normalize(isec->vec); + isec->dist = VecLength(isec->vec); + + for(i=0; i<3; i++) + { + isec->idot_axis[i] = 1.0f / isec->vec[i]; + + isec->bv_index[2*i] = isec->idot_axis[i] < 0.0 ? 1 : 0; + isec->bv_index[2*i+1] = 1 - isec->bv_index[2*i]; + + isec->bv_index[2*i] = i+3*isec->bv_index[2*i]; + isec->bv_index[2*i+1] = i+3*isec->bv_index[2*i+1]; + } + +#ifdef RT_USE_LAST_HIT + /* Last hit heuristic */ + if(isec->mode==RE_RAY_SHADOW && isec->last_hit) + { + RE_RC_COUNT(isec->raycounter->rayshadow_last_hit.test); + + if(RE_rayobject_intersect(isec->last_hit, isec)) + { + RE_RC_COUNT(isec->raycounter->raycast.hit); + RE_RC_COUNT(isec->raycounter->rayshadow_last_hit.hit); + return 1; + } + } +#endif + +#ifdef RT_USE_HINT + isec->hit_hint = 0; +#endif + + if(RE_rayobject_intersect(r, isec)) + { + RE_RC_COUNT(isec->raycounter->raycast.hit); + +#ifdef RT_USE_HINT + isec->hint = isec->hit_hint; +#endif + return 1; + } + return 0; +} + +int RE_rayobject_intersect(RayObject *r, Isect *i) +{ + if(RE_rayobject_isRayFace(r)) + { + return intersect_rayface(r, (RayFace*) RE_rayobject_align(r), i); + } + else if(RE_rayobject_isVlakPrimitive(r)) + { + //TODO optimize (useless copy to RayFace to avoid duplicate code) + VlakPrimitive *face = (VlakPrimitive*) RE_rayobject_align(r); + RayFace nface; + RE_rayface_from_vlak(&nface, face->ob, face->face); + + if(face->ob->transform_primitives) + { + Mat4MulVecfl(face->ob->mat, nface.v1); + Mat4MulVecfl(face->ob->mat, nface.v2); + Mat4MulVecfl(face->ob->mat, nface.v3); + if(RE_rayface_isQuad(&nface)) + Mat4MulVecfl(face->ob->mat, nface.v4); + } + + return intersect_rayface(r, &nface, i); + } + else if(RE_rayobject_isRayAPI(r)) + { + r = RE_rayobject_align( r ); + return r->api->raycast( r, i ); + } + else assert(0); +} + +void RE_rayobject_add(RayObject *r, RayObject *o) +{ + r = RE_rayobject_align( r ); + return r->api->add( r, o ); +} + +void RE_rayobject_done(RayObject *r) +{ + r = RE_rayobject_align( r ); + r->api->done( r ); +} + +void RE_rayobject_free(RayObject *r) +{ + r = RE_rayobject_align( r ); + r->api->free( r ); +} + +void RE_rayobject_merge_bb(RayObject *r, float *min, float *max) +{ + if(RE_rayobject_isRayFace(r)) + { + RayFace *face = (RayFace*) RE_rayobject_align(r); + + DO_MINMAX( face->v1, min, max ); + DO_MINMAX( face->v2, min, max ); + DO_MINMAX( face->v3, min, max ); + if(RE_rayface_isQuad(face)) DO_MINMAX( face->v4, min, max ); + } + else if(RE_rayobject_isVlakPrimitive(r)) + { + VlakPrimitive *face = (VlakPrimitive*) RE_rayobject_align(r); + VlakRen *vlr = face->face; + + DO_MINMAX( vlr->v1->co, min, max ); + DO_MINMAX( vlr->v2->co, min, max ); + DO_MINMAX( vlr->v3->co, min, max ); + if(vlr->v4) DO_MINMAX( vlr->v4->co, min, max ); + } + else if(RE_rayobject_isRayAPI(r)) + { + r = RE_rayobject_align( r ); + r->api->bb( r, min, max ); + } + else assert(0); +} + +float RE_rayobject_cost(RayObject *r) +{ + if(RE_rayobject_isRayFace(r) || RE_rayobject_isVlakPrimitive(r)) + { + return 1.0; + } + else if(RE_rayobject_isRayAPI(r)) + { + r = RE_rayobject_align( r ); + return r->api->cost( r ); + } + else assert(0); +} + +void RE_rayobject_hint_bb(RayObject *r, RayHint *hint, float *min, float *max) +{ + if(RE_rayobject_isRayFace(r) || RE_rayobject_isVlakPrimitive(r)) + { + return; + } + else if(RE_rayobject_isRayAPI(r)) + { + r = RE_rayobject_align( r ); + return r->api->hint_bb( r, hint, min, max ); + } + else assert(0); +} + +int RE_rayobjectcontrol_test_break(RayObjectControl *control) +{ + if(control->test_break) + return control->test_break( control->data ); + + return 0; +} diff --git a/source/blender/render/intern/raytrace/rayobject_hint.h b/source/blender/render/intern/raytrace/rayobject_hint.h new file mode 100644 index 00000000000..d85465aec66 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_hint.h @@ -0,0 +1,70 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#ifndef RE_RAYTRACE_RAYOBJECT_HINT_H +#define RE_RAYTRACE_RAYOBJECT_HINT_H + +#define HINT_RECURSE 1 +#define HINT_ACCEPT 0 +#define HINT_DISCARD -1 + +struct HintBB +{ + float bb[6]; +}; + +inline int hint_test_bb(HintBB *obj, float *Nmin, float *Nmax) +{ + if(bb_fits_inside( Nmin, Nmax, obj->bb, obj->bb+3 ) ) + return HINT_RECURSE; + else + return HINT_ACCEPT; +} +/* +struct HintFrustum +{ + float co[3]; + float no[4][3]; +}; + +inline int hint_test_bb(HintFrustum &obj, float *Nmin, float *Nmax) +{ + //if frustum inside BB + { + return HINT_RECURSE; + } + //if BB outside frustum + { + return HINT_DISCARD; + } + + return HINT_ACCEPT; +} +*/ + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject_qbvh.cpp b/source/blender/render/intern/raytrace/rayobject_qbvh.cpp new file mode 100644 index 00000000000..8d35848c9ec --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_qbvh.cpp @@ -0,0 +1,149 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#include "vbvh.h" +#include "svbvh.h" +#include "reorganize.h" + +#ifdef __SSE__ + +#define DFS_STACK_SIZE 256 + +struct QBVHTree +{ + RayObject rayobj; + + SVBVHNode *root; + MemArena *node_arena; + + float cost; + RTBuilder *builder; +}; + + +template<> +void bvh_done<QBVHTree>(QBVHTree *obj) +{ + rtbuild_done(obj->builder, &obj->rayobj.control); + + //TODO find a away to exactly calculate the needed memory + MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE); + BLI_memarena_use_malloc(arena1); + + MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE); + BLI_memarena_use_malloc(arena2); + BLI_memarena_use_align(arena2, 16); + + //Build and optimize the tree + //TODO do this in 1 pass (half memory usage during building) + VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder); + + if(RE_rayobjectcontrol_test_break(&obj->rayobj.control)) + { + BLI_memarena_free(arena1); + BLI_memarena_free(arena2); + return; + } + + pushup_simd<VBVHNode,4>(root); + obj->root = Reorganize_SVBVH<VBVHNode>(arena2).transform(root); + + //Cleanup + BLI_memarena_free(arena1); + + rtbuild_free( obj->builder ); + obj->builder = NULL; + + obj->node_arena = arena2; + obj->cost = 1.0; +} + + +template<int StackSize> +int intersect(QBVHTree *obj, Isect* isec) +{ + //TODO renable hint support + if(RE_rayobject_isAligned(obj->root)) + return bvh_node_stack_raycast<SVBVHNode,StackSize,false>( obj->root, isec); + else + return RE_rayobject_intersect( (RayObject*) obj->root, isec ); +} + +template<class Tree> +void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *min, float *max) +{ + //TODO renable hint support + { + hint->size = 0; + hint->stack[hint->size++] = (RayObject*)tree->root; + } +} +/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */ +template<class Tree, int STACK_SIZE> +RayObjectAPI make_api() +{ + static RayObjectAPI api = + { + (RE_rayobject_raycast_callback) ((int(*)(Tree*,Isect*)) &intersect<STACK_SIZE>), + (RE_rayobject_add_callback) ((void(*)(Tree*,RayObject*)) &bvh_add<Tree>), + (RE_rayobject_done_callback) ((void(*)(Tree*)) &bvh_done<Tree>), + (RE_rayobject_free_callback) ((void(*)(Tree*)) &bvh_free<Tree>), + (RE_rayobject_merge_bb_callback)((void(*)(Tree*,float*,float*)) &bvh_bb<Tree>), + (RE_rayobject_cost_callback) ((float(*)(Tree*)) &bvh_cost<Tree>), + (RE_rayobject_hint_bb_callback) ((void(*)(Tree*,LCTSHint*,float*,float*)) &bvh_hint_bb<Tree>) + }; + + return api; +} + +template<class Tree> +RayObjectAPI* bvh_get_api(int maxstacksize) +{ + static RayObjectAPI bvh_api256 = make_api<Tree,1024>(); + + if(maxstacksize <= 1024) return &bvh_api256; + assert(maxstacksize <= 256); + return 0; +} + + +RayObject *RE_rayobject_qbvh_create(int size) +{ + return bvh_create_tree<QBVHTree,DFS_STACK_SIZE>(size); +} + + +#else + +RayObject *RE_rayobject_qbvh_create(int size) +{ + puts("WARNING: SSE disabled at compile time\n"); + return NULL; +} + +#endif
\ No newline at end of file diff --git a/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp b/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp new file mode 100644 index 00000000000..9523e725893 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp @@ -0,0 +1,496 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#include <assert.h> +#include <math.h> +#include <stdlib.h> +#include <algorithm> + +#include "rayobject_rtbuild.h" +#include "MEM_guardedalloc.h" +#include "BLI_arithb.h" +#include "BKE_utildefines.h" + +static bool selected_node(RTBuilder::Object *node) +{ + return node->selected; +} + +static void rtbuild_init(RTBuilder *b) +{ + b->split_axis = -1; + b->primitives.begin = 0; + b->primitives.end = 0; + b->primitives.maxsize = 0; + + for(int i=0; i<RTBUILD_MAX_CHILDS; i++) + b->child_offset[i] = 0; + + for(int i=0; i<3; i++) + b->sorted_begin[i] = b->sorted_end[i] = 0; + + INIT_MINMAX(b->bb, b->bb+3); +} + +RTBuilder* rtbuild_create(int size) +{ + RTBuilder *builder = (RTBuilder*) MEM_mallocN( sizeof(RTBuilder), "RTBuilder" ); + RTBuilder::Object *memblock= (RTBuilder::Object*)MEM_mallocN( sizeof(RTBuilder::Object)*size,"RTBuilder.objects"); + + + rtbuild_init(builder); + + builder->primitives.begin = builder->primitives.end = memblock; + builder->primitives.maxsize = size; + + for(int i=0; i<3; i++) + { + builder->sorted_begin[i] = (RTBuilder::Object**)MEM_mallocN( sizeof(RTBuilder::Object*)*size,"RTBuilder.sorted_objects"); + builder->sorted_end[i] = builder->sorted_begin[i]; + } + + + return builder; +} + +void rtbuild_free(RTBuilder *b) +{ + if(b->primitives.begin) MEM_freeN(b->primitives.begin); + + for(int i=0; i<3; i++) + if(b->sorted_begin[i]) + MEM_freeN(b->sorted_begin[i]); + + MEM_freeN(b); +} + +void rtbuild_add(RTBuilder *b, RayObject *o) +{ + assert( b->primitives.begin + b->primitives.maxsize != b->primitives.end ); + + b->primitives.end->obj = o; + b->primitives.end->cost = RE_rayobject_cost(o); + + INIT_MINMAX(b->primitives.end->bb, b->primitives.end->bb+3); + RE_rayobject_merge_bb(o, b->primitives.end->bb, b->primitives.end->bb+3); + + for(int i=0; i<3; i++) + { + *(b->sorted_end[i]) = b->primitives.end; + b->sorted_end[i]++; + } + b->primitives.end++; +} + +int rtbuild_size(RTBuilder *b) +{ + return b->sorted_end[0] - b->sorted_begin[0]; +} + + +template<class Obj,int Axis> +static bool obj_bb_compare(const Obj &a, const Obj &b) +{ + if(a->bb[Axis] != b->bb[Axis]) + return a->bb[Axis] < b->bb[Axis]; + return a->obj < b->obj; +} + +template<class Item> +static void object_sort(Item *begin, Item *end, int axis) +{ + if(axis == 0) return std::sort(begin, end, obj_bb_compare<Item,0> ); + if(axis == 1) return std::sort(begin, end, obj_bb_compare<Item,1> ); + if(axis == 2) return std::sort(begin, end, obj_bb_compare<Item,2> ); + assert(false); +} + +void rtbuild_done(RTBuilder *b, RayObjectControl* ctrl) +{ + for(int i=0; i<3; i++) + if(b->sorted_begin[i]) + { + if(RE_rayobjectcontrol_test_break(ctrl)) break; + object_sort( b->sorted_begin[i], b->sorted_end[i], i ); + } +} + +RayObject* rtbuild_get_primitive(RTBuilder *b, int index) +{ + return b->sorted_begin[0][index]->obj; +} + +RTBuilder* rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp) +{ + rtbuild_init( tmp ); + + for(int i=0; i<3; i++) + if(b->sorted_begin[i]) + { + tmp->sorted_begin[i] = b->sorted_begin[i] + b->child_offset[child ]; + tmp->sorted_end [i] = b->sorted_begin[i] + b->child_offset[child+1]; + } + else + { + tmp->sorted_begin[i] = 0; + tmp->sorted_end [i] = 0; + } + + return tmp; +} + +void rtbuild_calc_bb(RTBuilder *b) +{ + if(b->bb[0] == 1.0e30f) + { + for(RTBuilder::Object **index = b->sorted_begin[0]; index != b->sorted_end[0]; index++) + RE_rayobject_merge_bb( (*index)->obj , b->bb, b->bb+3); + } +} + +void rtbuild_merge_bb(RTBuilder *b, float *min, float *max) +{ + rtbuild_calc_bb(b); + DO_MIN(b->bb, min); + DO_MAX(b->bb+3, max); +} + +/* +int rtbuild_get_largest_axis(RTBuilder *b) +{ + rtbuild_calc_bb(b); + return bb_largest_axis(b->bb, b->bb+3); +} + +//Left balanced tree +int rtbuild_mean_split(RTBuilder *b, int nchilds, int axis) +{ + int i; + int mleafs_per_child, Mleafs_per_child; + int tot_leafs = rtbuild_size(b); + int missing_leafs; + + long long s; + + assert(nchilds <= RTBUILD_MAX_CHILDS); + + //TODO optimize calc of leafs_per_child + for(s=nchilds; s<tot_leafs; s*=nchilds); + Mleafs_per_child = s/nchilds; + mleafs_per_child = Mleafs_per_child/nchilds; + + //split min leafs per child + b->child_offset[0] = 0; + for(i=1; i<=nchilds; i++) + b->child_offset[i] = mleafs_per_child; + + //split remaining leafs + missing_leafs = tot_leafs - mleafs_per_child*nchilds; + for(i=1; i<=nchilds; i++) + { + if(missing_leafs > Mleafs_per_child - mleafs_per_child) + { + b->child_offset[i] += Mleafs_per_child - mleafs_per_child; + missing_leafs -= Mleafs_per_child - mleafs_per_child; + } + else + { + b->child_offset[i] += missing_leafs; + missing_leafs = 0; + break; + } + } + + //adjust for accumulative offsets + for(i=1; i<=nchilds; i++) + b->child_offset[i] += b->child_offset[i-1]; + + //Count created childs + for(i=nchilds; b->child_offset[i] == b->child_offset[i-1]; i--); + split_leafs(b, b->child_offset, i, axis); + + assert( b->child_offset[0] == 0 && b->child_offset[i] == tot_leafs ); + return i; +} + + +int rtbuild_mean_split_largest_axis(RTBuilder *b, int nchilds) +{ + int axis = rtbuild_get_largest_axis(b); + return rtbuild_mean_split(b, nchilds, axis); +} +*/ + +/* + * "separators" is an array of dim NCHILDS-1 + * and indicates where to cut the childs + */ +/* +int rtbuild_median_split(RTBuilder *b, float *separators, int nchilds, int axis) +{ + int size = rtbuild_size(b); + + assert(nchilds <= RTBUILD_MAX_CHILDS); + if(size <= nchilds) + { + return rtbuild_mean_split(b, nchilds, axis); + } + else + { + int i; + + b->split_axis = axis; + + //Calculate child offsets + b->child_offset[0] = 0; + for(i=0; i<nchilds-1; i++) + b->child_offset[i+1] = split_leafs_by_plane(b, b->child_offset[i], size, separators[i]); + b->child_offset[nchilds] = size; + + for(i=0; i<nchilds; i++) + if(b->child_offset[i+1] - b->child_offset[i] == size) + return rtbuild_mean_split(b, nchilds, axis); + + return nchilds; + } +} + +int rtbuild_median_split_largest_axis(RTBuilder *b, int nchilds) +{ + int la, i; + float separators[RTBUILD_MAX_CHILDS]; + + rtbuild_calc_bb(b); + + la = bb_largest_axis(b->bb,b->bb+3); + for(i=1; i<nchilds; i++) + separators[i-1] = (b->bb[la+3]-b->bb[la])*i / nchilds; + + return rtbuild_median_split(b, separators, nchilds, la); +} +*/ + +//Heuristics Object Splitter + + +struct SweepCost +{ + float bb[6]; + float cost; +}; + +/* Object Surface Area Heuristic splitter */ +int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds) +{ + int size = rtbuild_size(b); + assert(nchilds == 2); + assert(size > 1); + int baxis = -1, boffset = 0; + + if(size > nchilds) + { + float bcost = FLT_MAX; + baxis = -1, boffset = size/2; + + SweepCost *sweep = (SweepCost*)MEM_mallocN( sizeof(SweepCost)*size, "RTBuilder.HeuristicSweep" ); + + for(int axis=0; axis<3; axis++) + { + SweepCost sweep_left; + + RTBuilder::Object **obj = b->sorted_begin[axis]; + +// float right_cost = 0; + for(int i=size-1; i>=0; i--) + { + if(i == size-1) + { + VECCOPY(sweep[i].bb, obj[i]->bb); + VECCOPY(sweep[i].bb+3, obj[i]->bb+3); + sweep[i].cost = obj[i]->cost; + } + else + { + sweep[i].bb[0] = MIN2(obj[i]->bb[0], sweep[i+1].bb[0]); + sweep[i].bb[1] = MIN2(obj[i]->bb[1], sweep[i+1].bb[1]); + sweep[i].bb[2] = MIN2(obj[i]->bb[2], sweep[i+1].bb[2]); + sweep[i].bb[3] = MAX2(obj[i]->bb[3], sweep[i+1].bb[3]); + sweep[i].bb[4] = MAX2(obj[i]->bb[4], sweep[i+1].bb[4]); + sweep[i].bb[5] = MAX2(obj[i]->bb[5], sweep[i+1].bb[5]); + sweep[i].cost = obj[i]->cost + sweep[i+1].cost; + } +// right_cost += obj[i]->cost; + } + + sweep_left.bb[0] = obj[0]->bb[0]; + sweep_left.bb[1] = obj[0]->bb[1]; + sweep_left.bb[2] = obj[0]->bb[2]; + sweep_left.bb[3] = obj[0]->bb[3]; + sweep_left.bb[4] = obj[0]->bb[4]; + sweep_left.bb[5] = obj[0]->bb[5]; + sweep_left.cost = obj[0]->cost; + +// right_cost -= obj[0]->cost; if(right_cost < 0) right_cost = 0; + + for(int i=1; i<size; i++) + { + //Worst case heuristic (cost of each child is linear) + float hcost, left_side, right_side; + + left_side = bb_area(sweep_left.bb, sweep_left.bb+3)*(sweep_left.cost+logf(i)); + right_side= bb_area(sweep[i].bb, sweep[i].bb+3)*(sweep[i].cost+logf(size-i)); + hcost = left_side+right_side; + + assert(left_side >= 0); + assert(right_side >= 0); + + if(left_side > bcost) break; //No way we can find a better heuristic in this axis + + assert(hcost >= 0); + if( hcost < bcost + || (hcost == bcost && axis < baxis)) //this makes sure the tree built is the same whatever is the order of the sorting axis + { + bcost = hcost; + baxis = axis; + boffset = i; + } + DO_MIN( obj[i]->bb, sweep_left.bb ); + DO_MAX( obj[i]->bb+3, sweep_left.bb+3 ); + + sweep_left.cost += obj[i]->cost; +// right_cost -= obj[i]->cost; if(right_cost < 0) right_cost = 0; + } + + assert(baxis >= 0 && baxis < 3); + } + + + MEM_freeN(sweep); + } + else if(size == 2) + { + baxis = 0; + boffset = 1; + } + else if(size == 1) + { + b->child_offset[0] = 0; + b->child_offset[1] = 1; + return 1; + } + + b->child_offset[0] = 0; + b->child_offset[1] = boffset; + b->child_offset[2] = size; + + + /* Adjust sorted arrays for childs */ + for(int i=0; i<boffset; i++) b->sorted_begin[baxis][i]->selected = true; + for(int i=boffset; i<size; i++) b->sorted_begin[baxis][i]->selected = false; + for(int i=0; i<3; i++) + std::stable_partition( b->sorted_begin[i], b->sorted_end[i], selected_node ); + + return nchilds; +} + +/* + * Helper code + * PARTITION code / used on mean-split + * basicly this a std::nth_element (like on C++ STL algorithm) + */ +/* +static void split_leafs(RTBuilder *b, int *nth, int partitions, int split_axis) +{ + int i; + b->split_axis = split_axis; + + for(i=0; i < partitions-1; i++) + { + assert(nth[i] < nth[i+1] && nth[i+1] < nth[partitions]); + + if(split_axis == 0) std::nth_element(b, nth[i], nth[i+1], nth[partitions], obj_bb_compare<RTBuilder::Object,0>); + if(split_axis == 1) std::nth_element(b, nth[i], nth[i+1], nth[partitions], obj_bb_compare<RTBuilder::Object,1>); + if(split_axis == 2) std::nth_element(b, nth[i], nth[i+1], nth[partitions], obj_bb_compare<RTBuilder::Object,2>); + } +} +*/ + +/* + * Bounding Box utils + */ +float bb_volume(float *min, float *max) +{ + return (max[0]-min[0])*(max[1]-min[1])*(max[2]-min[2]); +} + +float bb_area(float *min, float *max) +{ + float sub[3], a; + sub[0] = max[0]-min[0]; + sub[1] = max[1]-min[1]; + sub[2] = max[2]-min[2]; + + a = (sub[0]*sub[1] + sub[0]*sub[2] + sub[1]*sub[2])*2; + assert(a >= 0.0); + return a; +} + +int bb_largest_axis(float *min, float *max) +{ + float sub[3]; + + sub[0] = max[0]-min[0]; + sub[1] = max[1]-min[1]; + sub[2] = max[2]-min[2]; + if(sub[0] > sub[1]) + { + if(sub[0] > sub[2]) + return 0; + else + return 2; + } + else + { + if(sub[1] > sub[2]) + return 1; + else + return 2; + } +} + +int bb_fits_inside(float *outer_min, float *outer_max, float *inner_min, float *inner_max) +{ + int i; + for(i=0; i<3; i++) + if(outer_min[i] > inner_min[i]) return 0; + + for(i=0; i<3; i++) + if(outer_max[i] < inner_max[i]) return 0; + + return 1; +} diff --git a/source/blender/render/intern/raytrace/rayobject_rtbuild.h b/source/blender/render/intern/raytrace/rayobject_rtbuild.h new file mode 100644 index 00000000000..71665681586 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_rtbuild.h @@ -0,0 +1,121 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#ifndef RE_RAYOBJECT_RTBUILD_H +#define RE_RAYOBJECT_RTBUILD_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "rayobject.h" + + +/* + * Ray Tree Builder + * this structs helps building any type of tree + * it contains several methods to organiza/split nodes + * allowing to create a given tree on the fly. + * + * Idea is that other trees BVH, BIH can use this code to + * generate with simple calls, and then convert to the theirs + * specific structure on the fly. + */ +#define RTBUILD_MAX_CHILDS 32 + + +typedef struct RTBuilder +{ + struct Object + { + RayObject *obj; + float cost; + float bb[6]; + int selected; + }; + + /* list to all primitives added in this tree */ + struct + { + Object *begin, *end; + int maxsize; + } primitives; + + /* sorted list of rayobjects */ + struct Object **sorted_begin[3], **sorted_end[3]; + + /* axis used (if any) on the split method */ + int split_axis; + + /* child partitions calculated during splitting */ + int child_offset[RTBUILD_MAX_CHILDS+1]; + +// int child_sorted_axis; /* -1 if not sorted */ + + float bb[6]; + +} RTBuilder; + +/* used during creation */ +RTBuilder* rtbuild_create(int size); +void rtbuild_free(RTBuilder *b); +void rtbuild_add(RTBuilder *b, RayObject *o); +void rtbuild_done(RTBuilder *b, RayObjectControl *c); +void rtbuild_merge_bb(RTBuilder *b, float *min, float *max); +int rtbuild_size(RTBuilder *b); + +RayObject* rtbuild_get_primitive(RTBuilder *b, int offset); + +/* used during tree reorganization */ +RTBuilder* rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp); + +/* Calculates child partitions and returns number of efectively needed partitions */ +int rtbuild_get_largest_axis(RTBuilder *b); + +//Object partition +int rtbuild_mean_split(RTBuilder *b, int nchilds, int axis); +int rtbuild_mean_split_largest_axis(RTBuilder *b, int nchilds); + +int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds); + +//Space partition +int rtbuild_median_split(RTBuilder *b, float *separators, int nchilds, int axis); +int rtbuild_median_split_largest_axis(RTBuilder *b, int nchilds); + + +/* bb utils */ +float bb_area(float *min, float *max); +float bb_volume(float *min, float *max); +int bb_largest_axis(float *min, float *max); +int bb_fits_inside(float *outer_min, float *outer_max, float *inner_min, float *inner_max); /* only returns 0 if merging inner and outerbox would create a box larger than outer box */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject_svbvh.cpp b/source/blender/render/intern/raytrace/rayobject_svbvh.cpp new file mode 100644 index 00000000000..a877b91e2ff --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_svbvh.cpp @@ -0,0 +1,181 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#include "vbvh.h" +#include "svbvh.h" +#include "reorganize.h" + +#ifdef __SSE__ + +#define DFS_STACK_SIZE 256 + +struct SVBVHTree +{ + RayObject rayobj; + + SVBVHNode *root; + MemArena *node_arena; + + float cost; + RTBuilder *builder; +}; + +/* + * Cost to test N childs + */ +struct PackCost +{ + float operator()(int n) + { + return (n / 4) + ((n % 4) > 2 ? 1 : n%4); + } +}; + + +template<> +void bvh_done<SVBVHTree>(SVBVHTree *obj) +{ + rtbuild_done(obj->builder, &obj->rayobj.control); + + //TODO find a away to exactly calculate the needed memory + MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE); + BLI_memarena_use_malloc(arena1); + + MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE); + BLI_memarena_use_malloc(arena2); + BLI_memarena_use_align(arena2, 16); + + //Build and optimize the tree + if(0) + { + VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1,&obj->rayobj.control).transform(obj->builder); + if(RE_rayobjectcontrol_test_break(&obj->rayobj.control)) + { + BLI_memarena_free(arena1); + BLI_memarena_free(arena2); + return; + } + + reorganize(root); + remove_useless(root, &root); + bvh_refit(root); + + pushup(root); + pushdown(root); + pushup_simd<VBVHNode,4>(root); + + obj->root = Reorganize_SVBVH<VBVHNode>(arena2).transform(root); + } + else + { + //Finds the optimal packing of this tree using a given cost model + //TODO this uses quite a lot of memory, find ways to reduce memory usage during building + OVBVHNode *root = BuildBinaryVBVH<OVBVHNode>(arena1,&obj->rayobj.control).transform(obj->builder); + if(RE_rayobjectcontrol_test_break(&obj->rayobj.control)) + { + BLI_memarena_free(arena1); + BLI_memarena_free(arena2); + return; + } + + VBVH_optimalPackSIMD<OVBVHNode,PackCost>(PackCost()).transform(root); + obj->root = Reorganize_SVBVH<OVBVHNode>(arena2).transform(root); + } + + + //Free data + BLI_memarena_free(arena1); + + obj->node_arena = arena2; + obj->cost = 1.0; + + + rtbuild_free( obj->builder ); + obj->builder = NULL; +} + +template<int StackSize> +int intersect(SVBVHTree *obj, Isect* isec) +{ + //TODO renable hint support + if(RE_rayobject_isAligned(obj->root)) + return bvh_node_stack_raycast<SVBVHNode,StackSize,false>( obj->root, isec); + else + return RE_rayobject_intersect( (RayObject*) obj->root, isec ); +} + +template<class Tree> +void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *min, float *max) +{ + //TODO renable hint support + { + hint->size = 0; + hint->stack[hint->size++] = (RayObject*)tree->root; + } +} +/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */ +template<class Tree, int STACK_SIZE> +RayObjectAPI make_api() +{ + static RayObjectAPI api = + { + (RE_rayobject_raycast_callback) ((int(*)(Tree*,Isect*)) &intersect<STACK_SIZE>), + (RE_rayobject_add_callback) ((void(*)(Tree*,RayObject*)) &bvh_add<Tree>), + (RE_rayobject_done_callback) ((void(*)(Tree*)) &bvh_done<Tree>), + (RE_rayobject_free_callback) ((void(*)(Tree*)) &bvh_free<Tree>), + (RE_rayobject_merge_bb_callback)((void(*)(Tree*,float*,float*)) &bvh_bb<Tree>), + (RE_rayobject_cost_callback) ((float(*)(Tree*)) &bvh_cost<Tree>), + (RE_rayobject_hint_bb_callback) ((void(*)(Tree*,LCTSHint*,float*,float*)) &bvh_hint_bb<Tree>) + }; + + return api; +} + +template<class Tree> +RayObjectAPI* bvh_get_api(int maxstacksize) +{ + static RayObjectAPI bvh_api256 = make_api<Tree,1024>(); + + if(maxstacksize <= 1024) return &bvh_api256; + assert(maxstacksize <= 256); + return 0; +} + +RayObject *RE_rayobject_svbvh_create(int size) +{ + return bvh_create_tree<SVBVHTree,DFS_STACK_SIZE>(size); +} +#else + +RayObject *RE_rayobject_svbvh_create(int size) +{ + puts("WARNING: SSE disabled at compile time\n"); + return NULL; +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject_vbvh.cpp b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp new file mode 100644 index 00000000000..11f04c04141 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp @@ -0,0 +1,191 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +int tot_pushup = 0; +int tot_pushdown = 0; +int tot_hints = 0; + + +#include <assert.h> +#include "rayobject.h" +#include "rayobject_rtbuild.h" +#include "RE_raytrace.h" +#include "BLI_memarena.h" +#include "MEM_guardedalloc.h" +#include "BKE_utildefines.h" +#include "BLI_arithb.h" + +#include "reorganize.h" +#include "bvh.h" +#include "vbvh.h" +#include "svbvh.h" +#include <queue> +#include <algorithm> + +#define DFS_STACK_SIZE 256 + +struct VBVHTree +{ + RayObject rayobj; + VBVHNode *root; + MemArena *node_arena; + float cost; + RTBuilder *builder; +}; + +/* + * Cost to test N childs + */ +struct PackCost +{ + float operator()(int n) + { + return n; + } +}; + +template<> +void bvh_done<VBVHTree>(VBVHTree *obj) +{ + rtbuild_done(obj->builder, &obj->rayobj.control); + + //TODO find a away to exactly calculate the needed memory + MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE); + BLI_memarena_use_malloc(arena1); + + //Build and optimize the tree + if(1) + { + VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1,&obj->rayobj.control).transform(obj->builder); + if(RE_rayobjectcontrol_test_break(&obj->rayobj.control)) + { + BLI_memarena_free(arena1); + return; + } + + reorganize(root); + remove_useless(root, &root); + bvh_refit(root); + + pushup(root); + pushdown(root); + obj->root = root; + } + else + { +/* + TODO + MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE); + BLI_memarena_use_malloc(arena2); + + //Finds the optimal packing of this tree using a given cost model + //TODO this uses quite a lot of memory, find ways to reduce memory usage during building + OVBVHNode *root = BuildBinaryVBVH<OVBVHNode>(arena2).transform(obj->builder); + VBVH_optimalPackSIMD<OVBVHNode,PackCost>(PackCost()).transform(root); + obj->root = Reorganize_VBVH<OVBVHNode>(arena1).transform(root); + + BLI_memarena_free(arena2); + */ + } + + //Cleanup + rtbuild_free( obj->builder ); + obj->builder = NULL; + + obj->node_arena = arena1; + obj->cost = 1.0; +} + +template<int StackSize> +int intersect(VBVHTree *obj, Isect* isec) +{ + //TODO renable hint support + if(RE_rayobject_isAligned(obj->root)) + return bvh_node_stack_raycast<VBVHNode,StackSize,false>( obj->root, isec); + else + return RE_rayobject_intersect( (RayObject*) obj->root, isec ); +} + +template<class Tree> +void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *min, float *max) +{ + //TODO renable hint support + { + hint->size = 0; + hint->stack[hint->size++] = (RayObject*)tree->root; + } +} + +void bfree(VBVHTree *tree) +{ + if(tot_pushup + tot_pushdown + tot_hints + tot_moves) + { + printf("tot pushups: %d\n", tot_pushup); + printf("tot pushdowns: %d\n", tot_pushdown); + printf("tot moves: %d\n", tot_moves); + printf("tot hints created: %d\n", tot_hints); + tot_pushup = 0; + tot_pushdown = 0; + tot_hints = 0; + tot_moves = 0; + } + bvh_free(tree); +} + +/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */ +template<class Tree, int STACK_SIZE> +RayObjectAPI make_api() +{ + static RayObjectAPI api = + { + (RE_rayobject_raycast_callback) ((int(*)(Tree*,Isect*)) &intersect<STACK_SIZE>), + (RE_rayobject_add_callback) ((void(*)(Tree*,RayObject*)) &bvh_add<Tree>), + (RE_rayobject_done_callback) ((void(*)(Tree*)) &bvh_done<Tree>), + (RE_rayobject_free_callback) ((void(*)(Tree*)) &bvh_free<Tree>), + (RE_rayobject_merge_bb_callback)((void(*)(Tree*,float*,float*)) &bvh_bb<Tree>), + (RE_rayobject_cost_callback) ((float(*)(Tree*)) &bvh_cost<Tree>), + (RE_rayobject_hint_bb_callback) ((void(*)(Tree*,LCTSHint*,float*,float*)) &bvh_hint_bb<Tree>) + }; + + return api; +} + +template<class Tree> +RayObjectAPI* bvh_get_api(int maxstacksize) +{ + static RayObjectAPI bvh_api256 = make_api<Tree,1024>(); + + if(maxstacksize <= 1024) return &bvh_api256; + assert(maxstacksize <= 256); + return 0; +} + +RayObject *RE_rayobject_vbvh_create(int size) +{ + return bvh_create_tree<VBVHTree,DFS_STACK_SIZE>(size); +} diff --git a/source/blender/render/intern/raytrace/reorganize.h b/source/blender/render/intern/raytrace/reorganize.h new file mode 100644 index 00000000000..f0335b769d5 --- /dev/null +++ b/source/blender/render/intern/raytrace/reorganize.h @@ -0,0 +1,516 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#include <stdio.h> +#include <math.h> +#include <algorithm> +#include <vector> +#include <queue> + +extern int tot_pushup; +extern int tot_pushdown; + +template<class Node> +bool node_fits_inside(Node *a, Node *b) +{ + return bb_fits_inside(b->bb, b->bb+3, a->bb, a->bb+3); +} + +template<class Node> +void reorganize_find_fittest_parent(Node *tree, Node *node, std::pair<float,Node*> &cost) +{ + std::queue<Node*> q; + q.push(tree); + + while(!q.empty()) + { + Node *parent = q.front(); + q.pop(); + + if(parent == node) continue; + if(node_fits_inside(node, parent) && RE_rayobject_isAligned(parent->child) ) + { + float pcost = bb_area(parent->bb, parent->bb+3); + cost = std::min( cost, std::make_pair(pcost,parent) ); + for(Node *child = parent->child; child; child = child->sibling) + q.push(child); + } + } +} + +static int tot_moves = 0; +template<class Node> +void reorganize(Node *root) +{ + std::queue<Node*> q; + + q.push(root); + while(!q.empty()) + { + Node * node = q.front(); + q.pop(); + + if( RE_rayobject_isAligned(node->child) ) + { + for(Node **prev = &node->child; *prev; ) + { + assert( RE_rayobject_isAligned(*prev) ); + q.push(*prev); + + std::pair<float,Node*> best(FLT_MAX, root); + reorganize_find_fittest_parent( root, *prev, best ); + + if(best.second == node) + { + //Already inside the fitnest BB + prev = &(*prev)->sibling; + } + else + { + Node *tmp = *prev; + *prev = (*prev)->sibling; + + tmp->sibling = best.second->child; + best.second->child = tmp; + + tot_moves++; + } + + + } + } + if(node != root) + { + } + } +} + +/* + * Prunes useless nodes from trees: + * erases nodes with total ammount of primitives = 0 + * prunes nodes with only one child (except if that child is a primitive) + */ +template<class Node> +void remove_useless(Node *node, Node **new_node) +{ + if( RE_rayobject_isAligned(node->child) ) + { + + for(Node **prev = &node->child; *prev; ) + { + Node *next = (*prev)->sibling; + remove_useless(*prev, prev); + if(*prev == 0) + *prev = next; + else + { + (*prev)->sibling = next; + prev = &((*prev)->sibling); + } + } + } + if(node->child) + { + if(RE_rayobject_isAligned(node->child) && node->child->sibling == 0) + *new_node = node->child; + } + else if(node->child == 0) + *new_node = 0; +} + +/* + * Minimizes expected number of BBtest by colapsing nodes + * it uses surface area heuristic for determining whether a node should be colapsed + */ +template<class Node> +void pushup(Node *parent) +{ + if(is_leaf(parent)) return; + + float p_area = bb_area(parent->bb, parent->bb+3); + Node **prev = &parent->child; + for(Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) + { + float c_area = bb_area(child->bb, child->bb+3) ; + int nchilds = count_childs(child); + float original_cost = (c_area / p_area)*nchilds + 1; + float flatten_cost = nchilds; + if(flatten_cost < original_cost && nchilds >= 2) + { + append_sibling(child, child->child); + child = child->sibling; + *prev = child; + +// *prev = child->child; +// append_sibling( *prev, child->sibling ); +// child = *prev; + tot_pushup++; + } + else + { + *prev = child; + prev = &(*prev)->sibling; + child = *prev; + } + } + + for(Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling) + pushup(child); +} + +/* + * try to optimize number of childs to be a multiple of SSize + */ +template<class Node, int SSize> +void pushup_simd(Node *parent) +{ + if(is_leaf(parent)) return; + + int n = count_childs(parent); + + Node **prev = &parent->child; + for(Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) + { + int cn = count_childs(child); + if(cn-1 <= (SSize - (n%SSize) ) % SSize && RE_rayobject_isAligned(child->child) ) + { + n += (cn - 1); + append_sibling(child, child->child); + child = child->sibling; + *prev = child; + } + else + { + *prev = child; + prev = &(*prev)->sibling; + child = *prev; + } + } + + for(Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling) + pushup_simd<Node,SSize>(child); +} + + +/* + * Pushdown + * makes sure no child fits inside any of its sibling + */ +template<class Node> +void pushdown(Node *parent) +{ + Node **s_child = &parent->child; + Node * child = parent->child; + + while(child && RE_rayobject_isAligned(child)) + { + Node *next = child->sibling; + Node **next_s_child = &child->sibling; + + //assert(bb_fits_inside(parent->bb, parent->bb+3, child->bb, child->bb+3)); + + for(Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling) + if(child != i && bb_fits_inside(i->bb, i->bb+3, child->bb, child->bb+3) && RE_rayobject_isAligned(i->child)) + { +// todo optimize (should the one with the smallest area?) +// float ia = bb_area(i->bb, i->bb+3) +// if(child->i) + *s_child = child->sibling; + child->sibling = i->child; + i->child = child; + next_s_child = s_child; + + tot_pushdown++; + break; + } + child = next; + s_child = next_s_child; + } + + for(Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling) + pushdown( i ); +} + + +/* + * BVH refit + * reajust nodes BB (useful if nodes childs where modified) + */ +template<class Node> +float bvh_refit(Node *node) +{ + if(is_leaf(node)) return 0; + if(is_leaf(node->child)) return 0; + + float total = 0; + + for(Node *child = node->child; child; child = child->sibling) + total += bvh_refit(child); + + float old_area = bb_area(node->bb, node->bb+3); + INIT_MINMAX(node->bb, node->bb+3); + for(Node *child = node->child; child; child = child->sibling) + { + DO_MIN(child->bb, node->bb); + DO_MAX(child->bb+3, node->bb+3); + } + total += old_area - bb_area(node->bb, node->bb+3); + return total; +} + + +/* + * this finds the best way to packing a tree acording to a given test cost function + * with the purpose to reduce the expected cost (eg.: number of BB tests). + */ +#include <vector> +#include <cmath> +#define MAX_CUT_SIZE 16 +#define MAX_OPTIMIZE_CHILDS MAX_CUT_SIZE + +struct OVBVHNode +{ + float bb[6]; + + OVBVHNode *child; + OVBVHNode *sibling; + + /* + * Returns min cost to represent the subtree starting at the given node, + * allowing it to have a given cutsize + */ + float cut_cost[MAX_CUT_SIZE]; + float get_cost(int cutsize) + { + return cut_cost[cutsize-1]; + } + + /* + * This saves the cut size of this child, when parent is reaching + * its minimum cut with the given cut size + */ + int cut_size[MAX_CUT_SIZE]; + int get_cut_size(int parent_cut_size) + { + return cut_size[parent_cut_size-1]; + } + + /* + * Reorganize the node based on calculated cut costs + */ + int best_cutsize; + void set_cut(int cutsize, OVBVHNode ***cut) + { + if(cutsize == 1) + { + **cut = this; + *cut = &(**cut)->sibling; + } + else + { + if(cutsize > MAX_CUT_SIZE) + { + for(OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) + { + child->set_cut( 1, cut ); + cutsize--; + } + assert(cutsize == 0); + } + else + for(OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) + child->set_cut( child->get_cut_size( cutsize ), cut ); + } + } + + void optimize() + { + if(RE_rayobject_isAligned(this->child)) + { + //Calc new childs + { + OVBVHNode **cut = &(this->child); + set_cut( best_cutsize, &cut ); + *cut = NULL; + } + + //Optimize new childs + for(OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) + child->optimize(); + } + } +}; + +/* + * Calculates an optimal SIMD packing + * + */ +template<class Node, class TestCost> +struct VBVH_optimalPackSIMD +{ + TestCost testcost; + + VBVH_optimalPackSIMD(TestCost testcost) + { + this->testcost = testcost; + } + + /* + * calc best cut on a node + */ + struct calc_best + { + Node *child[MAX_OPTIMIZE_CHILDS]; + float child_hit_prob[MAX_OPTIMIZE_CHILDS]; + + calc_best(Node *node) + { + int nchilds = 0; + //Fetch childs and needed data + { + float parent_area = bb_area(node->bb, node->bb+3); + for(Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) + { + this->child[nchilds] = child; + this->child_hit_prob[nchilds] = bb_area(child->bb, child->bb+3) / parent_area; + nchilds++; + } + + assert(nchilds >= 2 && nchilds <= MAX_OPTIMIZE_CHILDS); + } + + + //Build DP table to find minimum cost to represent this node with a given cutsize + int bt [MAX_OPTIMIZE_CHILDS+1][MAX_CUT_SIZE+1]; //backtrace table + float cost[MAX_OPTIMIZE_CHILDS+1][MAX_CUT_SIZE+1]; //cost table (can be reduced to float[2][MAX_CUT_COST]) + + for(int i=0; i<=nchilds; i++) + for(int j=0; j<=MAX_CUT_SIZE; j++) + cost[i][j] = INFINITY; + + cost[0][0] = 0; + + for(int i=1; i<=nchilds; i++) + for(int size=i-1; size/*+(nchilds-i)*/<=MAX_CUT_SIZE; size++) + for(int cut=1; cut+size/*+(nchilds-i)*/<=MAX_CUT_SIZE; cut++) + { + float new_cost = cost[i-1][size] + child_hit_prob[i-1]*child[i-1]->get_cost(cut); + if(new_cost < cost[i][size+cut]) + { + cost[i][size+cut] = new_cost; + bt[i][size+cut] = cut; + } + } + + //Save the ways to archieve the minimum cost with a given cutsize + for(int i = nchilds; i <= MAX_CUT_SIZE; i++) + { + node->cut_cost[i-1] = cost[nchilds][i]; + if(cost[nchilds][i] < INFINITY) + { + int current_size = i; + for(int j=nchilds; j>0; j--) + { + child[j-1]->cut_size[i-1] = bt[j][current_size]; + current_size -= bt[j][current_size]; + } + } + } + } + }; + + void calc_costs(Node *node) + { + + if( RE_rayobject_isAligned(node->child) ) + { + int nchilds = 0; + for(Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) + { + calc_costs(child); + nchilds++; + } + + for(int i=0; i<MAX_CUT_SIZE; i++) + node->cut_cost[i] = INFINITY; + + //We are not allowed to look on nodes with with so many childs + if(nchilds > MAX_CUT_SIZE) + { + float cost = 0; + + float parent_area = bb_area(node->bb, node->bb+3); + for(Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) + { + cost += ( bb_area(child->bb, child->bb+3) / parent_area ) * child->get_cost(1); + } + + cost += testcost(nchilds); + node->cut_cost[0] = cost; + node->best_cutsize = nchilds; + } + else + { + calc_best calc(node); + + //calc expected cost if we optimaly pack this node + for(int cutsize=nchilds; cutsize<=MAX_CUT_SIZE; cutsize++) + { + float m = node->get_cost(cutsize) + testcost(cutsize); + if(m < node->cut_cost[0]) + { + node->cut_cost[0] = m; + node->best_cutsize = cutsize; + } + } + } + assert(node->cut_cost[0] != INFINITY); + } + else + { + node->cut_cost[0] = 1.0f; + for(int i=1; i<MAX_CUT_SIZE; i++) + node->cut_cost[i] = INFINITY; + } + } + + Node *transform(Node *node) + { + if(RE_rayobject_isAligned(node->child)) + { + static int num = 0; + bool first = false; + if(num == 0) { num++; first = true; } + + calc_costs(node); + if(first) printf("expected cost = %f (%d)\n", node->cut_cost[0], node->best_cutsize ); + node->optimize(); + } + return node; + } +}; diff --git a/source/blender/render/intern/raytrace/svbvh.h b/source/blender/render/intern/raytrace/svbvh.h new file mode 100644 index 00000000000..3c733b6b402 --- /dev/null +++ b/source/blender/render/intern/raytrace/svbvh.h @@ -0,0 +1,249 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#ifdef __SSE__ + +#ifndef RE_RAYTRACE_SVBVH_H +#define RE_RAYTRACE_SVBVH_H + +#include "bvh.h" +#include "BLI_memarena.h" +#include <stdio.h> +#include <algorithm> + +struct SVBVHNode +{ + int nchilds; + + //Array of bb, array of childs + float *child_bb; + SVBVHNode **child; +}; + +template<> +inline int bvh_node_hit_test<SVBVHNode>(SVBVHNode *node, Isect *isec) +{ + return 1; +} + +template<> +inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHNode **stack, int &stack_pos) +{ + int i=0; + while(i+4 <= node->nchilds) + { + int res = test_bb_group4( (__m128*) (node->child_bb+6*i), isec ); + RE_RC_COUNT(isec->raycounter->simd_bb.test); + + if(res & 1) { stack[stack_pos++] = node->child[i+0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + if(res & 2) { stack[stack_pos++] = node->child[i+1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + if(res & 4) { stack[stack_pos++] = node->child[i+2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + if(res & 8) { stack[stack_pos++] = node->child[i+3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + + i += 4; + } + while(i < node->nchilds) + { + if(RE_rayobject_bb_intersect_test(isec, (const float*)node->child_bb+6*i)) + stack[stack_pos++] = node->child[i]; + i++; + } +} + +template<> +void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float *min, float *max) +{ + if(is_leaf(node)) + { + RE_rayobject_merge_bb( (RayObject*)node, min, max); + } + else + { + int i=0; + while(i+4 <= node->nchilds) + { + float *res = node->child_bb + 6*i; + for(int j=0; j<3; j++) + { + min[j] = MIN2(min[j], res[4*j+0]); + min[j] = MIN2(min[j], res[4*j+1]); + min[j] = MIN2(min[j], res[4*j+2]); + min[j] = MIN2(min[j], res[4*j+3]); + } + for(int j=0; j<3; j++) + { + max[j] = MAX2(max[j], res[4*(j+3)+0]); + max[j] = MAX2(max[j], res[4*(j+3)+1]); + max[j] = MAX2(max[j], res[4*(j+3)+2]); + max[j] = MAX2(max[j], res[4*(j+3)+3]); + } + + i += 4; + } + + for(; i<node->nchilds; i++) + { + DO_MIN(node->child_bb+6*i , min); + DO_MAX(node->child_bb+3+6*i, max); + } + } +} + + + +/* + * Builds a SVBVH tree form a VBVHTree + */ +template<class OldNode> +struct Reorganize_SVBVH +{ + MemArena *arena; + + float childs_per_node; + int nodes_with_childs[16]; + int useless_bb; + int nodes; + + Reorganize_SVBVH(MemArena *a) + { + arena = a; + nodes = 0; + childs_per_node = 0; + useless_bb = 0; + + for(int i=0; i<16; i++) + nodes_with_childs[i] = 0; + } + + ~Reorganize_SVBVH() + { + printf("%f childs per node\n", childs_per_node / nodes); + printf("%d childs BB are useless\n", useless_bb); + for(int i=0; i<16; i++) + printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes, nodes_with_childs[i]/float(nodes)); + } + + SVBVHNode *create_node(int nchilds) + { + SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(arena, sizeof(SVBVHNode)); + node->nchilds = nchilds; + node->child_bb = (float*)BLI_memarena_alloc(arena, sizeof(float)*6*nchilds); + node->child= (SVBVHNode**)BLI_memarena_alloc(arena, sizeof(SVBVHNode*)*nchilds); + + return node; + } + + void copy_bb(float *bb, const float *old_bb) + { + std::copy( old_bb, old_bb+6, bb ); + } + + void prepare_for_simd(SVBVHNode *node) + { + int i=0; + while(i+4 <= node->nchilds) + { + float vec_tmp[4*6]; + float *res = node->child_bb+6*i; + std::copy( res, res+6*4, vec_tmp); + + for(int j=0; j<6; j++) + { + res[4*j+0] = vec_tmp[6*0+j]; + res[4*j+1] = vec_tmp[6*1+j]; + res[4*j+2] = vec_tmp[6*2+j]; + res[4*j+3] = vec_tmp[6*3+j]; + } + + i += 4; + } + } + + /* amt must be power of two */ + inline int padup(int num, int amt) + { + return ((num+(amt-1))&~(amt-1)); + } + + SVBVHNode *transform(OldNode *old) + { + if(is_leaf(old)) + return (SVBVHNode*)old; + if(is_leaf(old->child)) + return (SVBVHNode*)old->child; + + int nchilds = count_childs(old); + int alloc_childs = nchilds; + if(nchilds % 4 > 2) + alloc_childs = padup(nchilds, 4); + + SVBVHNode *node = create_node(alloc_childs); + + childs_per_node += nchilds; + nodes++; + if(nchilds < 16) + nodes_with_childs[nchilds]++; + + useless_bb += alloc_childs-nchilds; + while(alloc_childs > nchilds) + { + const static float def_bb[6] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MIN, FLT_MIN, FLT_MIN }; + alloc_childs--; + node->child[alloc_childs] = 0; + copy_bb(node->child_bb+alloc_childs*6, def_bb); + } + + int i=nchilds; + for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling) + { + i--; + node->child[i] = transform(o_child); + if(is_leaf(o_child)) + { + float bb[6]; + INIT_MINMAX(bb, bb+3); + RE_rayobject_merge_bb( (RayObject*)o_child, bb, bb+3); + copy_bb(node->child_bb+i*6, bb); + break; + } + else + { + copy_bb(node->child_bb+i*6, o_child->bb); + } + } + assert( i == 0 ); + + prepare_for_simd(node); + + return node; + } +}; + +#endif + +#endif //__SSE__
\ No newline at end of file diff --git a/source/blender/render/intern/raytrace/vbvh.h b/source/blender/render/intern/raytrace/vbvh.h new file mode 100644 index 00000000000..1ff51786e52 --- /dev/null +++ b/source/blender/render/intern/raytrace/vbvh.h @@ -0,0 +1,237 @@ +/** + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ +#include <assert.h> + +#include <algorithm> +#include "rayobject_rtbuild.h" +#include "BLI_memarena.h" + + +/* + * VBVHNode represents a BVHNode with support for a variable number of childrens + */ +struct VBVHNode +{ + float bb[6]; + + VBVHNode *child; + VBVHNode *sibling; +}; + + +/* + * Push nodes (used on dfs) + */ +template<class Node> +inline static void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, int &stack_pos) +{ + Node *child = node->child; + + if(is_leaf(child)) + { + stack[stack_pos++] = child; + } + else + { + while(child) + { + //Skips BB tests on primitives +/* + if(is_leaf(child->child)) + stack[stack_pos++] = child->child; + else +*/ + stack[stack_pos++] = child; + + child = child->sibling; + } + } +} + + +template<class Node> +int count_childs(Node *parent) +{ + int n = 0; + for(Node *i = parent->child; i; i = i->sibling) + { + n++; + if(is_leaf(i)) + break; + } + + return n; +} + + +template<class Node> +void append_sibling(Node *node, Node *sibling) +{ + while(node->sibling) + node = node->sibling; + + node->sibling = sibling; +} + + +/* + * Builds a binary VBVH from a rtbuild + */ +template<class Node> +struct BuildBinaryVBVH +{ + MemArena *arena; + RayObjectControl *control; + + void test_break() + { + if(RE_rayobjectcontrol_test_break(control)) + throw "Stop"; + } + + BuildBinaryVBVH(MemArena *a, RayObjectControl *c) + { + arena = a; + control = c; + } + + Node *create_node() + { + Node *node = (Node*)BLI_memarena_alloc( arena, sizeof(Node) ); + assert( RE_rayobject_isAligned(node) ); + + node->sibling = NULL; + node->child = NULL; + + return node; + } + + int rtbuild_split(RTBuilder *builder) + { + return ::rtbuild_heuristic_object_split(builder, 2); + } + + Node *transform(RTBuilder *builder) + { + try + { + return _transform(builder); + + } catch(...) + { + } + return NULL; + } + + Node *_transform(RTBuilder *builder) + { + + int size = rtbuild_size(builder); + if(size == 1) + { + Node *node = create_node(); + INIT_MINMAX(node->bb, node->bb+3); + rtbuild_merge_bb(builder, node->bb, node->bb+3); + node->child = (Node*) rtbuild_get_primitive( builder, 0 ); + return node; + } + else + { + test_break(); + + Node *node = create_node(); + + INIT_MINMAX(node->bb, node->bb+3); + rtbuild_merge_bb(builder, node->bb, node->bb+3); + + Node **child = &node->child; + + int nc = rtbuild_split(builder); + assert(nc == 2); + for(int i=0; i<nc; i++) + { + RTBuilder tmp; + rtbuild_get_child(builder, i, &tmp); + + *child = _transform(&tmp); + child = &((*child)->sibling); + } + + *child = 0; + return node; + } + } +}; + +/* +template<class Tree,class OldNode> +struct Reorganize_VBVH +{ + Tree *tree; + + Reorganize_VBVH(Tree *t) + { + tree = t; + } + + VBVHNode *create_node() + { + VBVHNode *node = (VBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(VBVHNode)); + return node; + } + + void copy_bb(VBVHNode *node, OldNode *old) + { + std::copy( old->bb, old->bb+6, node->bb ); + } + + VBVHNode *transform(OldNode *old) + { + if(is_leaf(old)) + return (VBVHNode*)old; + + VBVHNode *node = create_node(); + VBVHNode **child_ptr = &node->child; + node->sibling = 0; + + copy_bb(node,old); + + for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling) + { + VBVHNode *n_child = transform(o_child); + *child_ptr = n_child; + if(is_leaf(n_child)) return node; + child_ptr = &n_child->sibling; + } + *child_ptr = 0; + + return node; + } +}; +*/
\ No newline at end of file |