diff options
author | Andre Susano Pinto <andresusanopinto@gmail.com> | 2009-08-11 21:28:58 +0400 |
---|---|---|
committer | Andre Susano Pinto <andresusanopinto@gmail.com> | 2009-08-11 21:28:58 +0400 |
commit | 495ef8a6a24f516826175f78cb6cb60facd82592 (patch) | |
tree | 6182f5ed10fcd06c9aea01e8964d79db6e0e5c7c /source/blender/render/intern/raytrace/svbvh.h | |
parent | 27da1ec383bb0de09ab808bcaef410319bc351f4 (diff) |
fix instance support when using SIMD
Diffstat (limited to 'source/blender/render/intern/raytrace/svbvh.h')
-rw-r--r-- | source/blender/render/intern/raytrace/svbvh.h | 69 |
1 files changed, 54 insertions, 15 deletions
diff --git a/source/blender/render/intern/raytrace/svbvh.h b/source/blender/render/intern/raytrace/svbvh.h index f537aa79dac..cc875ad6dba 100644 --- a/source/blender/render/intern/raytrace/svbvh.h +++ b/source/blender/render/intern/raytrace/svbvh.h @@ -39,7 +39,7 @@ struct SVBVHNode int nchilds; //Array of bb, array of childs - float *bb; + float *child_bb; SVBVHNode **child; }; @@ -57,7 +57,7 @@ inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHN int i=0; while(i+4 <= node->nchilds) { - int res = test_bb_group4( (__m128*) (node->bb+6*i), isec ); + int res = test_bb_group4( (__m128*) (node->child_bb+6*i), isec ); RE_RC_COUNT(isec->raycounter->bb.test); RE_RC_COUNT(isec->raycounter->bb.test); RE_RC_COUNT(isec->raycounter->bb.test); @@ -72,7 +72,7 @@ inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHN } while(i < node->nchilds) { - if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i)) + if(RE_rayobject_bb_intersect_test(isec, (const float*)node->child_bb+6*i)) stack[stack_pos++] = node->child[i]; i++; } @@ -81,12 +81,51 @@ inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHN { for(int i=0; i<node->nchilds; i++) { - if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i)) + if(RE_rayobject_bb_intersect_test(isec, (const float*)node->child_bb+6*i)) stack[stack_pos++] = node->child[i]; } } } +template<> +void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float *min, float *max) +{ + if(is_leaf(node)) + { + RE_rayobject_merge_bb( (RayObject*)node, min, max); + } + else + { + int i=0; + while(SVBVH_SIMD && i+4 <= node->nchilds) + { + float *res = node->child_bb + 6*i; + for(int j=0; j<3; j++) + { + min[j] = MIN2(min[j], res[4*j+0]); + min[j] = MIN2(min[j], res[4*j+1]); + min[j] = MIN2(min[j], res[4*j+2]); + min[j] = MIN2(min[j], res[4*j+3]); + } + for(int j=0; j<3; j++) + { + max[j] = MAX2(max[j], res[4*(j+3)+0]); + max[j] = MAX2(max[j], res[4*(j+3)+1]); + max[j] = MAX2(max[j], res[4*(j+3)+2]); + max[j] = MAX2(max[j], res[4*(j+3)+3]); + } + + i += 4; + } + + for(; i<node->nchilds; i++) + { + DO_MIN(node->child_bb+6*i , min); + DO_MAX(node->child_bb+3+6*i, max); + } + } +} + struct SVBVHTree { RayObject rayobj; @@ -131,7 +170,7 @@ struct Reorganize_SVBVH { SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode)); node->nchilds = nchilds; - node->bb = (float*)BLI_memarena_alloc(tree->node_arena, sizeof(float)*6*nchilds); + node->child_bb = (float*)BLI_memarena_alloc(tree->node_arena, sizeof(float)*6*nchilds); node->child= (SVBVHNode**)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode*)*nchilds); return node; @@ -148,8 +187,8 @@ struct Reorganize_SVBVH while(i+4 <= node->nchilds) { float vec_tmp[4*6]; - float *res = node->bb+6*i; - std::copy( node->bb+6*i, node->bb+6*(i+4), vec_tmp); + float *res = node->child_bb+6*i; + std::copy( res, res+6*4, vec_tmp); for(int j=0; j<6; j++) { @@ -167,18 +206,18 @@ struct Reorganize_SVBVH //memmoves could be memory alligned const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(1,0,1,0) ); const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(1,0,1,0) ); - _mm_store_ps( node->bb+6*i+4*0, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ) ); - _mm_store_ps( node->bb+6*i+4*1, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ) ); + _mm_store_ps( node->child_bb+6*i+4*0, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ) ); + _mm_store_ps( node->child_bb+6*i+4*1, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ) ); const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(3,2,3,2) ); const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(3,2,3,2) ); - _mm_store_ps( node->bb+6*i+4*2, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ) ); - _mm_store_ps( node->bb+6*i+4*3, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ) ); + _mm_store_ps( node->child_bb+6*i+4*2, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ) ); + _mm_store_ps( node->child_bb+6*i+4*3, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ) ); const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_loadu_ps(bb0+4), _mm_loadu_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) ); const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_loadu_ps(bb2+4), _mm_loadu_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) ); - _mm_store_ps( node->bb+6*i+4*4, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ) ); - _mm_store_ps( node->bb+6*i+4*5, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ) ); + _mm_store_ps( node->child_bb+6*i+4*4, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ) ); + _mm_store_ps( node->child_bb+6*i+4*5, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ) ); */ i += 4; @@ -210,12 +249,12 @@ struct Reorganize_SVBVH float bb[6]; INIT_MINMAX(bb, bb+3); RE_rayobject_merge_bb( (RayObject*)o_child, bb, bb+3); - copy_bb(node->bb+i*6, bb); + copy_bb(node->child_bb+i*6, bb); break; } else { - copy_bb(node->bb+i*6, o_child->bb); + copy_bb(node->child_bb+i*6, o_child->bb); } } assert( i == 0 ); |