/* * $Id$ * * ***** BEGIN GPL LICENSE BLOCK ***** * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * The Original Code is Copyright (C) 2009 Blender Foundation. * All rights reserved. * * The Original Code is: all of this file. * * Contributor(s): André Pinto. * * ***** END GPL LICENSE BLOCK ***** */ /** \file blender/render/intern/raytrace/svbvh.h * \ingroup render */ #ifdef __SSE__ #ifndef RE_RAYTRACE_SVBVH_H #define RE_RAYTRACE_SVBVH_H #include "bvh.h" #include "BLI_memarena.h" #include "BKE_global.h" #include #include struct SVBVHNode { float child_bb[24]; SVBVHNode *child[4]; int nchilds; }; static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group) { const __m128 tmin0 = _mm_setzero_ps(); const __m128 tmax0 = _mm_set_ps1(isec->dist); const __m128 start0 = _mm_set_ps1(isec->start[0]); const __m128 start1 = _mm_set_ps1(isec->start[1]); const __m128 start2 = _mm_set_ps1(isec->start[2]); const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0); const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0); const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1); const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1); const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2); const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2); const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]); const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]); const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]); const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0); const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0); const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1); const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1); const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2); const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2); const __m128 tmin1 = _mm_max_ps(tmin0, mul0); const __m128 tmax1 = _mm_min_ps(tmax0, mul1); const __m128 tmin2 = _mm_max_ps(tmin1, mul2); const __m128 tmax2 = _mm_min_ps(tmax1, mul3); const __m128 tmin3 = _mm_max_ps(tmin2, mul4); const __m128 tmax3 = _mm_min_ps(tmax2, mul5); return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3)); } static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb) { const float *bb = _bb; float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0]; float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0]; float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1]; float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1]; float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2]; float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2]; RE_RC_COUNT(isec->raycounter->bb.test); if(t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0; if(t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0; if(t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0; RE_RC_COUNT(isec->raycounter->bb.hit); return 1; } static bool svbvh_node_is_leaf(const SVBVHNode *node) { return !RE_rayobject_isAligned(node); } template static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec) { SVBVHNode *stack[MAX_STACK_SIZE], *node; int hit = 0, stack_pos = 0; stack[stack_pos++] = root; while(stack_pos) { node = stack[--stack_pos]; if(!svbvh_node_is_leaf(node)) { int nchilds= node->nchilds; if(nchilds == 4) { float *child_bb= node->child_bb; int res = svbvh_bb_intersect_test_simd4(isec, ((__m128*) (child_bb))); SVBVHNode **child= node->child; RE_RC_COUNT(isec->raycounter->simd_bb.test); if(res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } if(res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } if(res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } if(res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } } else { float *child_bb= node->child_bb; SVBVHNode **child= node->child; int i; for(i=0; i inline void bvh_node_merge_bb(SVBVHNode *node, float *min, float *max) { if(is_leaf(node)) { RE_rayobject_merge_bb((RayObject*)node, min, max); } else { int i=0; while(i+4 <= node->nchilds) { float *res = node->child_bb + 6*i; for(int j=0; j<3; j++) { min[j] = MIN2(min[j], res[4*j+0]); min[j] = MIN2(min[j], res[4*j+1]); min[j] = MIN2(min[j], res[4*j+2]); min[j] = MIN2(min[j], res[4*j+3]); } for(int j=0; j<3; j++) { max[j] = MAX2(max[j], res[4*(j+3)+0]); max[j] = MAX2(max[j], res[4*(j+3)+1]); max[j] = MAX2(max[j], res[4*(j+3)+2]); max[j] = MAX2(max[j], res[4*(j+3)+3]); } i += 4; } for(; inchilds; i++) { DO_MIN(node->child_bb+6*i , min); DO_MAX(node->child_bb+3+6*i, max); } } } /* * Builds a SVBVH tree form a VBVHTree */ template struct Reorganize_SVBVH { MemArena *arena; float childs_per_node; int nodes_with_childs[16]; int useless_bb; int nodes; Reorganize_SVBVH(MemArena *a) { arena = a; nodes = 0; childs_per_node = 0; useless_bb = 0; for(int i=0; i<16; i++) nodes_with_childs[i] = 0; } ~Reorganize_SVBVH() { if(G.f & G_DEBUG) { printf("%f childs per node\n", childs_per_node / nodes); printf("%d childs BB are useless\n", useless_bb); for(int i=0; i<16; i++) printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes, nodes_with_childs[i]/float(nodes)); } } SVBVHNode *create_node(int nchilds) { SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(arena, sizeof(SVBVHNode)); node->nchilds = nchilds; return node; } void copy_bb(float *bb, const float *old_bb) { std::copy(old_bb, old_bb+6, bb); } void prepare_for_simd(SVBVHNode *node) { int i=0; while(i+4 <= node->nchilds) { float vec_tmp[4*6]; float *res = node->child_bb+6*i; std::copy(res, res+6*4, vec_tmp); for(int j=0; j<6; j++) { res[4*j+0] = vec_tmp[6*0+j]; res[4*j+1] = vec_tmp[6*1+j]; res[4*j+2] = vec_tmp[6*2+j]; res[4*j+3] = vec_tmp[6*3+j]; } i += 4; } } /* amt must be power of two */ inline int padup(int num, int amt) { return ((num+(amt-1))&~(amt-1)); } SVBVHNode *transform(OldNode *old) { if(is_leaf(old)) return (SVBVHNode*)old; if(is_leaf(old->child)) return (SVBVHNode*)old->child; int nchilds = count_childs(old); int alloc_childs = nchilds; if(nchilds % 4 > 2) alloc_childs = padup(nchilds, 4); SVBVHNode *node = create_node(alloc_childs); childs_per_node += nchilds; nodes++; if(nchilds < 16) nodes_with_childs[nchilds]++; useless_bb += alloc_childs-nchilds; while(alloc_childs > nchilds) { const static float def_bb[6] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MIN, FLT_MIN, FLT_MIN }; alloc_childs--; node->child[alloc_childs] = 0; copy_bb(node->child_bb+alloc_childs*6, def_bb); } int i=nchilds; for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling) { i--; node->child[i] = transform(o_child); if(is_leaf(o_child)) { float bb[6]; INIT_MINMAX(bb, bb+3); RE_rayobject_merge_bb((RayObject*)o_child, bb, bb+3); copy_bb(node->child_bb+i*6, bb); break; } else { copy_bb(node->child_bb+i*6, o_child->bb); } } assert(i == 0); prepare_for_simd(node); return node; } }; #endif #endif //__SSE__