diff options
author | Andre Susano Pinto <andresusanopinto@gmail.com> | 2009-08-07 17:49:15 +0400 |
---|---|---|
committer | Andre Susano Pinto <andresusanopinto@gmail.com> | 2009-08-07 17:49:15 +0400 |
commit | 51cad12120a673959abfbb1d4445edb782b5f13b (patch) | |
tree | b4947b13ef8e1e0082634c0edf07f7ef65a52ba4 /source/blender | |
parent | 20c9f2e8abf5d81e5b8a340b12ff8f6f66eee2ca (diff) |
*BLI_memarena support for any power of two alignment
*some simd stuff on bvh
Diffstat (limited to 'source/blender')
-rw-r--r-- | source/blender/blenlib/BLI_memarena.h | 2 | ||||
-rw-r--r-- | source/blender/blenlib/intern/BLI_memarena.c | 23 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/bvh.h | 12 | ||||
-rw-r--r-- | source/blender/render/intern/raytrace/rayobject_vbvh.cpp | 13 |
4 files changed, 39 insertions, 11 deletions
diff --git a/source/blender/blenlib/BLI_memarena.h b/source/blender/blenlib/BLI_memarena.h index 4b955e9fa20..fcf6ae02900 100644 --- a/source/blender/blenlib/BLI_memarena.h +++ b/source/blender/blenlib/BLI_memarena.h @@ -57,6 +57,8 @@ void BLI_memarena_free (struct MemArena *ma); void BLI_memarena_use_malloc (struct MemArena *ma); void BLI_memarena_use_calloc (struct MemArena *ma); +void BLI_memarena_use_align(struct MemArena *ma, int align); + void* BLI_memarena_alloc (struct MemArena *ma, int size); #ifdef __cplusplus diff --git a/source/blender/blenlib/intern/BLI_memarena.c b/source/blender/blenlib/intern/BLI_memarena.c index 6312cbb22ca..275ab12540b 100644 --- a/source/blender/blenlib/intern/BLI_memarena.c +++ b/source/blender/blenlib/intern/BLI_memarena.c @@ -45,6 +45,7 @@ struct MemArena { int bufsize, cursize; int use_calloc; + int align; LinkNode *bufs; }; @@ -52,6 +53,7 @@ struct MemArena { MemArena *BLI_memarena_new(int bufsize) { MemArena *ma= MEM_callocN(sizeof(*ma), "memarena"); ma->bufsize= bufsize; + ma->align = 8; return ma; } @@ -64,6 +66,11 @@ void BLI_memarena_use_malloc(MemArena *ma) { ma->use_calloc= 0; } +void BLI_memarena_use_align(struct MemArena *ma, int align) { + /* align should be a power of two */ + ma->align = align; +} + void BLI_memarena_free(MemArena *ma) { BLI_linklist_free(ma->bufs, (void(*)(void*)) MEM_freeN); MEM_freeN(ma); @@ -77,16 +84,28 @@ void *BLI_memarena_alloc(MemArena *ma, int size) { /* ensure proper alignment by rounding * size up to multiple of 8 */ - size= PADUP(size, 8); + size= PADUP(size, ma->align); if (size>ma->cursize) { - ma->cursize= (size>ma->bufsize)?size:ma->bufsize; + unsigned char *tmp; + + if(size > ma->bufsize - (ma->align - 1)) + { + ma->cursize = PADUP(size+1, ma->align); + } + else ma->cursize = ma->bufsize; + if(ma->use_calloc) ma->curbuf= MEM_callocN(ma->cursize, "memarena calloc"); else ma->curbuf= MEM_mallocN(ma->cursize, "memarena malloc"); BLI_linklist_prepend(&ma->bufs, ma->curbuf); + + /* align alloc'ed memory (needed if align > 8) */ + tmp = (unsigned char*)PADUP( (intptr_t) ma->curbuf, ma->align); + ma->cursize -= (tmp - ma->curbuf); + ma->curbuf = tmp; } ptr= ma->curbuf; diff --git a/source/blender/render/intern/raytrace/bvh.h b/source/blender/render/intern/raytrace/bvh.h index b17a072f3a6..2d75cd800a7 100644 --- a/source/blender/render/intern/raytrace/bvh.h +++ b/source/blender/render/intern/raytrace/bvh.h @@ -196,18 +196,18 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec) const float *bb2 = stack[stack_pos+2]->bb; const float *bb3 = stack[stack_pos+3]->bb; - const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(1,0,1,0) ); - const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(1,0,1,0) ); + const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(1,0,1,0) ); + const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(1,0,1,0) ); t_bb[0] = _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ); t_bb[1] = _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ); - const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_loadu_ps(bb0+2), _mm_loadu_ps(bb1+2), _MM_SHUFFLE(1,0,1,0) ); - const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_loadu_ps(bb2+2), _mm_loadu_ps(bb3+2), _MM_SHUFFLE(1,0,1,0) ); + const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(3,1,3,1) ); + const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(3,1,3,1) ); t_bb[2] = _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ); t_bb[3] = _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ); - const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_loadu_ps(bb0+4), _mm_loadu_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) ); - const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_loadu_ps(bb2+4), _mm_loadu_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) ); + const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_load_ps(bb0+4), _mm_load_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) ); + const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_load_ps(bb2+4), _mm_load_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) ); t_bb[4] = _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ); t_bb[5] = _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ); /* diff --git a/source/blender/render/intern/raytrace/rayobject_vbvh.cpp b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp index 9a67f490aeb..2f0efa80445 100644 --- a/source/blender/render/intern/raytrace/rayobject_vbvh.cpp +++ b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp @@ -56,14 +56,14 @@ extern "C" struct BVHNode { - BVHNode *child; - BVHNode *sibling; - #ifdef DYNAMIC_ALLOC_BB float *bb; #else float bb[6]; #endif + + BVHNode *child; + BVHNode *sibling; }; struct BVHTree @@ -114,6 +114,12 @@ inline static void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, i static BVHNode *bvh_new_node(BVHTree *tree) { BVHNode *node = (BVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(BVHNode)); + + if( (((intptr_t)node) & (0x0f)) != 0 ) + { + puts("WRONG!"); + printf("%08x\n", (intptr_t)node); + } node->sibling = NULL; node->child = NULL; @@ -317,6 +323,7 @@ void bvh_done<BVHTree>(BVHTree *obj) obj->node_arena = BLI_memarena_new(needed_nodes); BLI_memarena_use_malloc(obj->node_arena); + BLI_memarena_use_align(obj->node_arena, 16); obj->root = bvh_rearrange<BVHTree,BVHNode,RTBuilder>( obj, obj->builder ); |