/* * * ***** BEGIN GPL LICENSE BLOCK ***** * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * The Original Code is Copyright (C) 2008 Blender Foundation. * All rights reserved. * * The Original Code is: all of this file. * * Contributor(s): Brecht Van Lommel. * * ***** END GPL LICENSE BLOCK ***** */ /** \file blender/render/intern/source/occlusion.c * \ingroup render */ #include #include #include #include #include "MEM_guardedalloc.h" #include "DNA_material_types.h" #include "BLI_math.h" #include "BLI_blenlib.h" #include "BLI_memarena.h" #include "BLI_threads.h" #include "BLI_utildefines.h" #include "BKE_global.h" #include "BKE_scene.h" #include "RE_shader_ext.h" /* local includes */ #include "occlusion.h" #include "render_types.h" #include "rendercore.h" #include "renderdatabase.h" #include "pixelshading.h" #include "shading.h" #include "zbuf.h" /* ------------------------- Declarations --------------------------- */ #define INVALID_INDEX ((int)(~0)) #define INVPI 0.31830988618379069f #define TOTCHILD 8 #define CACHE_STEP 3 typedef struct OcclusionCacheSample { float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2; int x, y, filled; } OcclusionCacheSample; typedef struct OcclusionCache { OcclusionCacheSample *sample; int x, y, w, h, step; } OcclusionCache; typedef struct OccFace { int obi; int facenr; } OccFace; typedef struct OccNode { float co[3], area; float sh[9], dco; float occlusion, rad[3]; int childflag; union { //OccFace face; int face; struct OccNode *node; } child[TOTCHILD]; } OccNode; typedef struct OcclusionTree { MemArena *arena; float (*co)[3]; /* temporary during build */ OccFace *face; /* instance and face indices */ float *occlusion; /* occlusion for faces */ float (*rad)[3]; /* radiance for faces */ OccNode *root; OccNode **stack[BLENDER_MAX_THREADS]; int maxdepth; int totface; float error; float distfac; int dothreadedbuild; int totbuildthread; int doindirect; OcclusionCache *cache; } OcclusionTree; typedef struct OcclusionThread { Render *re; StrandSurface *mesh; float (*faceao)[3]; float (*faceenv)[3]; float (*faceindirect)[3]; int begin, end; int thread; } OcclusionThread; typedef struct OcclusionBuildThread { OcclusionTree *tree; int begin, end, depth; OccNode *node; } OcclusionBuildThread; /* ------------------------- Shading --------------------------- */ extern Render R; // meh static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad) { ShadeInput *shi= ssamp->shi; ShadeResult *shr= ssamp->shr; float l, u, v, *v1, *v2, *v3; /* init */ if(vlr->v4) { shi->u= u= 0.5f; shi->v= v= 0.5f; } else { shi->u= u= 1.0f/3.0f; shi->v= v= 1.0f/3.0f; } /* setup render coordinates */ v1= vlr->v1->co; v2= vlr->v2->co; v3= vlr->v3->co; /* renderco */ l= 1.0f-u-v; shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0]; shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1]; shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2]; shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2); /* set up view vector */ copy_v3_v3(shi->view, shi->co); normalize_v3(shi->view); /* cache for shadow */ shi->samplenr++; shi->xs= 0; // TODO shi->ys= 0; shade_input_set_normals(shi); /* no normal flip */ if(shi->flippednor) shade_input_flip_normals(shi); madd_v3_v3fl(shi->co, shi->vn, 0.0001f); /* ugly.. */ /* not a pretty solution, but fixes common cases */ if(shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) { negate_v3(shi->vn); negate_v3(shi->vno); negate_v3(shi->nmapnorm); } /* init material vars */ // note, keep this synced with render_types.h memcpy(&shi->r, &shi->mat->r, 23*sizeof(float)); shi->har= shi->mat->har; /* render */ shade_input_set_shade_texco(shi); shade_material_loop(shi, shr); /* todo: nodes */ copy_v3_v3(rad, shr->combined); } static void occ_build_shade(Render *re, OcclusionTree *tree) { ShadeSample ssamp; ObjectInstanceRen *obi; VlakRen *vlr; int a; R= *re; /* setup shade sample with correct passes */ memset(&ssamp, 0, sizeof(ShadeSample)); ssamp.shi[0].lay= re->lay; ssamp.shi[0].passflag= SCE_PASS_DIFFUSE|SCE_PASS_RGBA; ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC); ssamp.tot= 1; for(a=0; atotface; a++) { obi= &R.objectinstance[tree->face[a].obi]; vlr= RE_findOrAddVlak(obi->obr, tree->face[a].facenr); occ_shade(&ssamp, obi, vlr, tree->rad[a]); } } /* ------------------------- Spherical Harmonics --------------------------- */ /* Use 2nd order SH => 9 coefficients, stored in this order: 0 = (0,0), 1 = (1,-1), 2 = (1,0), 3 = (1,1), 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */ static void sh_copy(float *shresult, float *sh) { memcpy(shresult, sh, sizeof(float)*9); } static void sh_mul(float *sh, float f) { int i; for(i=0; i<9; i++) sh[i] *= f; } static void sh_add(float *shresult, float *sh1, float *sh2) { int i; for(i=0; i<9; i++) shresult[i]= sh1[i] + sh2[i]; } static void sh_from_disc(float *n, float area, float *shresult) { /* See formula (3) in: "An Efficient Representation for Irradiance Environment Maps" */ float sh[9], x, y, z; x= n[0]; y= n[1]; z= n[2]; sh[0]= 0.282095f; sh[1]= 0.488603f*y; sh[2]= 0.488603f*z; sh[3]= 0.488603f*x; sh[4]= 1.092548f*x*y; sh[5]= 1.092548f*y*z; sh[6]= 0.315392f*(3.0f*z*z - 1.0f); sh[7]= 1.092548f*x*z; sh[8]= 0.546274f*(x*x - y*y); sh_mul(sh, area); sh_copy(shresult, sh); } static float sh_eval(float *sh, float *v) { /* See formula (13) in: "An Efficient Representation for Irradiance Environment Maps" */ static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f; static const float c4 = 0.886227f, c5 = 0.247708f; float x, y, z, sum; x= v[0]; y= v[1]; z= v[2]; sum= c1*sh[8]*(x*x - y*y); sum += c3*sh[6]*z*z; sum += c4*sh[0]; sum += -c5*sh[6]; sum += 2.0f*c1*(sh[4]*x*y + sh[7]*x*z + sh[5]*y*z); sum += 2.0f*c2*(sh[3]*x + sh[1]*y + sh[2]*z); return sum; } /* ------------------------------ Building --------------------------------- */ static void occ_face(const OccFace *face, float co[3], float normal[3], float *area) { ObjectInstanceRen *obi; VlakRen *vlr; float v1[3], v2[3], v3[3], v4[3]; obi= &R.objectinstance[face->obi]; vlr= RE_findOrAddVlak(obi->obr, face->facenr); if(co) { if(vlr->v4) interp_v3_v3v3(co, vlr->v1->co, vlr->v3->co, 0.5f); else cent_tri_v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co); if(obi->flag & R_TRANSFORMED) mul_m4_v3(obi->mat, co); } if(normal) { normal[0]= -vlr->n[0]; normal[1]= -vlr->n[1]; normal[2]= -vlr->n[2]; if(obi->flag & R_TRANSFORMED) mul_m3_v3(obi->nmat, normal); } if(area) { copy_v3_v3(v1, vlr->v1->co); copy_v3_v3(v2, vlr->v2->co); copy_v3_v3(v3, vlr->v3->co); if(vlr->v4) copy_v3_v3(v4, vlr->v4->co); if(obi->flag & R_TRANSFORMED) { mul_m4_v3(obi->mat, v1); mul_m4_v3(obi->mat, v2); mul_m4_v3(obi->mat, v3); if(vlr->v4) mul_m4_v3(obi->mat, v4); } /* todo: correct area for instances */ if(vlr->v4) *area= area_quad_v3(v1, v2, v3, v4); else *area= area_tri_v3(v1, v2, v3); } } static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node) { OccNode *child; float occ, area, totarea, rad[3]; int a, b, indirect= tree->doindirect; occ= 0.0f; totarea= 0.0f; if(indirect) zero_v3(rad); for(b=0; bchildflag & (1<child[b].face; occ_face(&tree->face[a], 0, 0, &area); occ += area*tree->occlusion[a]; if(indirect) madd_v3_v3fl(rad, tree->rad[a], area); totarea += area; } else if(node->child[b].node) { child= node->child[b].node; occ_sum_occlusion(tree, child); occ += child->area*child->occlusion; if(indirect) madd_v3_v3fl(rad, child->rad, child->area); totarea += child->area; } } if(totarea != 0.0f) { occ /= totarea; if(indirect) mul_v3_fl(rad, 1.0f/totarea); } node->occlusion= occ; if(indirect) copy_v3_v3(node->rad, rad); } static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max) { float len, maxlen= -1.0f; int a, axis = 0; INIT_MINMAX(min, max); for(a=begin; aco[a], min, max) for(a=0; a<3; a++) { len= max[a] - min[a]; if(len > maxlen) { maxlen= len; axis= a; } } return axis; } static void occ_node_from_face(OccFace *face, OccNode *node) { float n[3]; occ_face(face, node->co, n, &node->area); node->dco= 0.0f; sh_from_disc(n, node->area, node->sh); } static void occ_build_dco(OcclusionTree *tree, OccNode *node, const float co[3], float *dco) { int b; for(b=0; bchildflag & (1<face+node->child[b].face, nco, NULL, NULL); } else if(node->child[b].node) { OccNode *child= node->child[b].node; occ_build_dco(tree, child, co, dco); copy_v3_v3(nco, child->co); } else { continue; } sub_v3_v3v3(d, nco, co); dist= dot_v3v3(d, d); if(dist > *dco) *dco= dist; } } static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split) { float min[3], max[3], mid; int axis, a, enda; /* split in middle of boundbox. this seems faster than median split * on complex scenes, possibly since it avoids two distant faces to * be in the same node better? */ axis= occ_find_bbox_axis(tree, begin, end, min, max); mid= 0.5f*(min[axis]+max[axis]); a= begin; enda= end; while(aco[a][axis] > mid) { enda--; SWAP(OccFace, tree->face[a], tree->face[enda]); SWAP(float, tree->co[a][0], tree->co[enda][0]); SWAP(float, tree->co[a][1], tree->co[enda][1]); SWAP(float, tree->co[a][2], tree->co[enda][2]); } else a++; } *split= enda; } static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count) { /* split faces into eight groups */ int b, splitx, splity[2], splitz[4]; occ_build_split(tree, begin, end, &splitx); /* force split if none found, to deal with degenerate geometry */ if(splitx == begin || splitx == end) splitx= (begin+end)/2; occ_build_split(tree, begin, splitx, &splity[0]); occ_build_split(tree, splitx, end, &splity[1]); occ_build_split(tree, begin, splity[0], &splitz[0]); occ_build_split(tree, splity[0], splitx, &splitz[1]); occ_build_split(tree, splitx, splity[1], &splitz[2]); occ_build_split(tree, splity[1], end, &splitz[3]); offset[0]= begin; offset[1]= splitz[0]; offset[2]= splity[0]; offset[3]= splitz[1]; offset[4]= splitx; offset[5]= splitz[2]; offset[6]= splity[1]; offset[7]= splitz[3]; for(b=0; b<7; b++) count[b]= offset[b+1] - offset[b]; count[7]= end - offset[7]; } static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth); static void *exec_occ_build(void *data) { OcclusionBuildThread *othread= (OcclusionBuildThread*)data; occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth); return 0; } static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth) { ListBase threads; OcclusionBuildThread othreads[BLENDER_MAX_THREADS]; OccNode *child, tmpnode; /* OccFace *face; */ int a, b, totthread=0, offset[TOTCHILD], count[TOTCHILD]; /* add a new node */ node->occlusion= 1.0f; /* leaf node with only children */ if(end - begin <= TOTCHILD) { for(a=begin, b=0; aface[a]; */ node->child[b].face= a; node->childflag |= (1<dothreadedbuild) BLI_init_threads(&threads, exec_occ_build, tree->totbuildthread); for(b=0; bchild[b].node= NULL; } else if(count[b] == 1) { /* face= &tree->face[offset[b]]; */ node->child[b].face= offset[b]; node->childflag |= (1<dothreadedbuild) BLI_lock_thread(LOCK_CUSTOM1); child= BLI_memarena_alloc(tree->arena, sizeof(OccNode)); node->child[b].node= child; /* keep track of maximum depth for stack */ if(depth+1 > tree->maxdepth) tree->maxdepth= depth+1; if(tree->dothreadedbuild) BLI_unlock_thread(LOCK_CUSTOM1); if(depth == 1 && tree->dothreadedbuild) { othreads[totthread].tree= tree; othreads[totthread].node= child; othreads[totthread].begin= offset[b]; othreads[totthread].end= offset[b]+count[b]; othreads[totthread].depth= depth+1; BLI_insert_thread(&threads, &othreads[totthread]); totthread++; } else occ_build_recursive(tree, child, offset[b], offset[b]+count[b], depth+1); } } if(depth == 1 && tree->dothreadedbuild) BLI_end_threads(&threads); } /* combine area, position and sh */ for(b=0; bchildflag & (1<face+node->child[b].face, &tmpnode); } else { child= node->child[b].node; } if(child) { node->area += child->area; sh_add(node->sh, node->sh, child->sh); madd_v3_v3fl(node->co, child->co, child->area); } } if(node->area != 0.0f) mul_v3_fl(node->co, 1.0f/node->area); /* compute maximum distance from center */ node->dco= 0.0f; if(node->area > 0.0f) occ_build_dco(tree, node, node->co, &node->dco); } static void occ_build_sh_normalize(OccNode *node) { /* normalize spherical harmonics to not include area, so * we can clamp the dot product and then mutliply by area */ int b; if(node->area != 0.0f) sh_mul(node->sh, 1.0f/node->area); for(b=0; bchildflag & (1<child[b].node) occ_build_sh_normalize(node->child[b].node); } } static OcclusionTree *occ_tree_build(Render *re) { OcclusionTree *tree; ObjectInstanceRen *obi; ObjectRen *obr; Material *ma; VlakRen *vlr= NULL; int a, b, c, totface; /* count */ totface= 0; for(obi=re->instancetable.first; obi; obi=obi->next) { obr= obi->obr; for(a=0; atotvlak; a++) { if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak; else vlr++; ma= vlr->mat; if((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) totface++; } } if(totface == 0) return NULL; tree= MEM_callocN(sizeof(OcclusionTree), "OcclusionTree"); tree->totface= totface; /* parameters */ tree->error= get_render_aosss_error(&re->r, re->wrld.ao_approx_error); tree->distfac= (re->wrld.aomode & WO_AODIST)? re->wrld.aodistfac: 0.0f; tree->doindirect= (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0); /* allocation */ tree->arena= BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena"); BLI_memarena_use_calloc(tree->arena); if(re->wrld.aomode & WO_AOCACHE) tree->cache= MEM_callocN(sizeof(OcclusionCache)*BLENDER_MAX_THREADS, "OcclusionCache"); tree->face= MEM_callocN(sizeof(OccFace)*totface, "OcclusionFace"); tree->co= MEM_callocN(sizeof(float)*3*totface, "OcclusionCo"); tree->occlusion= MEM_callocN(sizeof(float)*totface, "OcclusionOcclusion"); if(tree->doindirect) tree->rad= MEM_callocN(sizeof(float)*3*totface, "OcclusionRad"); /* make array of face pointers */ for(b=0, c=0, obi=re->instancetable.first; obi; obi=obi->next, c++) { obr= obi->obr; for(a=0; atotvlak; a++) { if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak; else vlr++; ma= vlr->mat; if((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) { tree->face[b].obi= c; tree->face[b].facenr= a; tree->occlusion[b]= 1.0f; occ_face(&tree->face[b], tree->co[b], NULL, NULL); b++; } } } /* threads */ tree->totbuildthread= (re->r.threads > 1 && totface > 10000)? 8: 1; tree->dothreadedbuild= (tree->totbuildthread > 1); /* recurse */ tree->root= BLI_memarena_alloc(tree->arena, sizeof(OccNode)); tree->maxdepth= 1; occ_build_recursive(tree, tree->root, 0, totface, 1); if(tree->doindirect) { occ_build_shade(re, tree); occ_sum_occlusion(tree, tree->root); } MEM_freeN(tree->co); tree->co= NULL; occ_build_sh_normalize(tree->root); for(a=0; astack[a]= MEM_callocN(sizeof(OccNode)*TOTCHILD*(tree->maxdepth+1), "OccStack"); return tree; } static void occ_free_tree(OcclusionTree *tree) { int a; if(tree) { if(tree->arena) BLI_memarena_free(tree->arena); for(a=0; astack[a]) MEM_freeN(tree->stack[a]); if(tree->occlusion) MEM_freeN(tree->occlusion); if(tree->cache) MEM_freeN(tree->cache); if(tree->face) MEM_freeN(tree->face); if(tree->rad) MEM_freeN(tree->rad); MEM_freeN(tree); } } /* ------------------------- Traversal --------------------------- */ static float occ_solid_angle(OccNode *node, const float v[3], float d2, float invd2, const float receivenormal[3]) { float dotreceive, dotemit; float ev[3]; ev[0]= -v[0]*invd2; ev[1]= -v[1]*invd2; ev[2]= -v[2]*invd2; dotemit= sh_eval(node->sh, ev); dotreceive= dot_v3v3(receivenormal, v)*invd2; CLAMP(dotemit, 0.0f, 1.0f); CLAMP(dotreceive, 0.0f, 1.0f); return ((node->area*dotemit*dotreceive)/(d2 + node->area*INVPI))*INVPI; } static void VecAddDir(float result[3], const float v1[3], const float v2[3], const float fac) { result[0]= v1[0] + fac*(v2[0] - v1[0]); result[1]= v1[1] + fac*(v2[1] - v1[1]); result[2]= v1[2] + fac*(v2[2] - v1[2]); } static int occ_visible_quad(float *p, const float n[3], const float v0[3], const float *v1, const float *v2, float q0[3], float q1[3], float q2[3], float q3[3]) { static const float epsilon = 1e-6f; float c, sd[3]; c= dot_v3v3(n, p); /* signed distances from the vertices to the plane. */ sd[0]= dot_v3v3(n, v0) - c; sd[1]= dot_v3v3(n, v1) - c; sd[2]= dot_v3v3(n, v2) - c; if(fabsf(sd[0]) < epsilon) sd[0] = 0.0f; if(fabsf(sd[1]) < epsilon) sd[1] = 0.0f; if(fabsf(sd[2]) < epsilon) sd[2] = 0.0f; if(sd[0] > 0) { if(sd[1] > 0) { if(sd[2] > 0) { // +++ copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } else if(sd[2] < 0) { // ++- copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2]))); } else { // ++0 copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } } else if(sd[1] < 0) { if(sd[2] > 0) { // +-+ copy_v3_v3(q0, v0); VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); copy_v3_v3(q3, v2); } else if(sd[2] < 0) { // +-- copy_v3_v3(q0, v0); VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2]))); copy_v3_v3(q3, q2); } else { // +-0 copy_v3_v3(q0, v0); VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } } else { if(sd[2] > 0) { // +0+ copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } else if(sd[2] < 0) { // +0- copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2]))); copy_v3_v3(q3, q2); } else { // +00 copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } } } else if(sd[0] < 0) { if(sd[1] > 0) { if(sd[2] > 0) { // -++ VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2]))); } else if(sd[2] < 0) { // -+- VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); copy_v3_v3(q1, v1); VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); copy_v3_v3(q3, q2); } else { // -+0 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } } else if(sd[1] < 0) { if(sd[2] > 0) { // --+ VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2]))); VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2]))); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } else if(sd[2] < 0) { // --- return 0; } else { // --0 return 0; } } else { if(sd[2] > 0) { // -0+ VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2]))); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } else if(sd[2] < 0) { // -0- return 0; } else { // -00 return 0; } } } else { if(sd[1] > 0) { if(sd[2] > 0) { // 0++ copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } else if(sd[2] < 0) { // 0+- copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); copy_v3_v3(q3, q2); } else { // 0+0 copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } } else if(sd[1] < 0) { if(sd[2] > 0) { // 0-+ copy_v3_v3(q0, v0); VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2]))); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } else if(sd[2] < 0) { // 0-- return 0; } else { // 0-0 return 0; } } else { if(sd[2] > 0) { // 00+ copy_v3_v3(q0, v0); copy_v3_v3(q1, v1); copy_v3_v3(q2, v2); copy_v3_v3(q3, q2); } else if(sd[2] < 0) { // 00- return 0; } else { // 000 return 0; } } } return 1; } /* altivec optimization, this works, but is unused */ #if 0 #include typedef union { vFloat v; float f[4]; } vFloatResult; static vFloat vec_splat_float(float val) { return (vFloat){val, val, val, val}; } static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) { vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle; vUInt8 rotate = (vUInt8){4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3}; vFloatResult vresult; float result; /* compute r* */ vrx = (vFloat){q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]); vry = (vFloat){q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]); vrz = (vFloat){q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]); /* normalize r* */ rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f)); vrx = vrx*rlen; vry = vry*rlen; vrz = vrz*rlen; /* rotate r* for cross and dot */ vsrx= vec_perm(vrx, vrx, rotate); vsry= vec_perm(vry, vry, rotate); vsrz= vec_perm(vrz, vrz, rotate); /* cross product */ gx = vsry*vrz - vsrz*vry; gy = vsrz*vrx - vsrx*vrz; gz = vsrx*vry - vsry*vrx; /* normalize */ rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f)); gx = gx*rlen; gy = gy*rlen; gz = gz*rlen; /* angle */ vcos = vrx*vsrx + vry*vsry + vrz*vsrz; vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f)); vangle= vacosf(vcos); /* dot */ vresult.v = (vec_splat_float(n[0])*gx + vec_splat_float(n[1])*gy + vec_splat_float(n[2])*gz)*vangle; result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI); result= MAX2(result, 0.0f); return result; } #endif /* SSE optimization, acos code doesn't work */ #if 0 #include static __m128 sse_approx_acos(__m128 x) { /* needs a better approximation than taylor expansion of acos, since that * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */ return _mm_set_ps1(1.0f); } static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) { float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3]; float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result; float fresult[4] __attribute__((aligned(16))); __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult; /* compute r */ qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]); qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]); qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]); rx = qx - _mm_set_ps1(p[0]); ry = qy - _mm_set_ps1(p[1]); rz = qz - _mm_set_ps1(p[2]); /* normalize r */ rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f)); rx = rx*rlen; ry = ry*rlen; rz = rz*rlen; /* cross product */ srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1)); sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1)); srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1)); gx = sry*rz - srz*ry; gy = srz*rx - srx*rz; gz = srx*ry - sry*rx; /* normalize g */ glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f)); gx = gx*glen; gy = gy*glen; gz = gz*glen; /* compute angle */ rcos = rx*srx + ry*sry + rz*srz; rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f)); angle = sse_approx_cos(rcos); aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle; /* sum together */ result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI); result= MAX2(result, 0.0f); return result; } #endif static void normalizef(float *n) { float d; d= dot_v3v3(n, n); if(d > 1.0e-35F) { d= 1.0f/sqrtf(d); n[0] *= d; n[1] *= d; n[2] *= d; } } static float occ_quad_form_factor(const float p[3], const float n[3], const float q0[3], const float q1[3], const float q2[3], const float q3[3]) { float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3]; float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result; sub_v3_v3v3(r0, q0, p); sub_v3_v3v3(r1, q1, p); sub_v3_v3v3(r2, q2, p); sub_v3_v3v3(r3, q3, p); normalizef(r0); normalizef(r1); normalizef(r2); normalizef(r3); cross_v3_v3v3(g0, r1, r0); normalizef(g0); cross_v3_v3v3(g1, r2, r1); normalizef(g1); cross_v3_v3v3(g2, r3, r2); normalizef(g2); cross_v3_v3v3(g3, r0, r3); normalizef(g3); a1= saacosf(dot_v3v3(r0, r1)); a2= saacosf(dot_v3v3(r1, r2)); a3= saacosf(dot_v3v3(r2, r3)); a4= saacosf(dot_v3v3(r3, r0)); dot1= dot_v3v3(n, g0); dot2= dot_v3v3(n, g1); dot3= dot_v3v3(n, g2); dot4= dot_v3v3(n, g3); result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI; result= MAX2(result, 0.0f); return result; } static float occ_form_factor(OccFace *face, float *p, float *n) { ObjectInstanceRen *obi; VlakRen *vlr; float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib= 0.0f; obi= &R.objectinstance[face->obi]; vlr= RE_findOrAddVlak(obi->obr, face->facenr); copy_v3_v3(v1, vlr->v1->co); copy_v3_v3(v2, vlr->v2->co); copy_v3_v3(v3, vlr->v3->co); if(obi->flag & R_TRANSFORMED) { mul_m4_v3(obi->mat, v1); mul_m4_v3(obi->mat, v2); mul_m4_v3(obi->mat, v3); } if(occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3)) contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3); if(vlr->v4) { copy_v3_v3(v4, vlr->v4->co); if(obi->flag & R_TRANSFORMED) mul_m4_v3(obi->mat, v4); if(occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3)) contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3); } return contrib; } static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, float *pp, float *pn, float *occ, float rad[3], float bentn[3]) { OccNode *node, **stack; OccFace *face; float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2; float distfac, fac, error, d2, weight, emitarea; int b, f, totstack; /* init variables */ copy_v3_v3(p, pp); copy_v3_v3(n, pn); madd_v3_v3fl(p, n, 1e-4f); if(bentn) copy_v3_v3(bentn, n); error= tree->error; distfac= tree->distfac; resultocc= 0.0f; zero_v3(resultrad); /* init stack */ stack= tree->stack[thread]; stack[0]= tree->root; totstack= 1; while(totstack) { /* pop point off the stack */ node= stack[--totstack]; sub_v3_v3v3(v, node->co, p); d2= dot_v3v3(v, v) + 1e-16f; emitarea= MAX2(node->area, node->dco); if(d2*error > emitarea) { if(distfac != 0.0f) { fac= 1.0f/(1.0f + distfac*d2); if(fac < 0.01f) continue; } else fac= 1.0f; /* accumulate occlusion from spherical harmonics */ invd2 = 1.0f/sqrtf(d2); weight= occ_solid_angle(node, v, d2, invd2, n); if(rad) madd_v3_v3fl(resultrad, node->rad, weight*fac); weight *= node->occlusion; if(bentn) { bentn[0] -= weight*invd2*v[0]; bentn[1] -= weight*invd2*v[1]; bentn[2] -= weight*invd2*v[2]; } resultocc += weight*fac; } else { /* traverse into children */ for(b=0; bchildflag & (1<child[b].face; face= &tree->face[f]; /* accumulate occlusion with face form factor */ if(!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) { if(bentn || distfac != 0.0f) { occ_face(face, co, NULL, NULL); sub_v3_v3v3(v, co, p); d2= dot_v3v3(v, v) + 1e-16f; fac= (distfac == 0.0f)? 1.0f: 1.0f/(1.0f + distfac*d2); if(fac < 0.01f) continue; } else fac= 1.0f; weight= occ_form_factor(face, p, n); if(rad) madd_v3_v3fl(resultrad, tree->rad[f], weight*fac); weight *= tree->occlusion[f]; if(bentn) { invd2= 1.0f/sqrtf(d2); bentn[0] -= weight*invd2*v[0]; bentn[1] -= weight*invd2*v[1]; bentn[2] -= weight*invd2*v[2]; } resultocc += weight*fac; } } else if(node->child[b].node) { /* push child on the stack */ stack[totstack++]= node->child[b].node; } } } } if(occ) *occ= resultocc; if(rad) copy_v3_v3(rad, resultrad); /*if(rad && exclude) { int a; for(a=0; atotface; a++) if((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr)) copy_v3_v3(rad, tree->rad[a]); }*/ if(bentn) normalize_v3(bentn); } static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce) { float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ; int bounce, i; rad= MEM_callocN(sizeof(float)*3*tree->totface, "OcclusionBounceRad"); sum= MEM_dupallocN(tree->rad); for(bounce=1; bouncetotface; i++) { occ_face(&tree->face[i], co, n, NULL); madd_v3_v3fl(co, n, 1e-8f); occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL); rad[i][0]= MAX2(rad[i][0], 0.0f); rad[i][1]= MAX2(rad[i][1], 0.0f); rad[i][2]= MAX2(rad[i][2], 0.0f); add_v3_v3(sum[i], rad[i]); if(re->test_break(re->tbh)) break; } if(re->test_break(re->tbh)) break; tmp= tree->rad; tree->rad= rad; rad= tmp; occ_sum_occlusion(tree, tree->root); } MEM_freeN(rad); MEM_freeN(tree->rad); tree->rad= sum; if(!re->test_break(re->tbh)) occ_sum_occlusion(tree, tree->root); } static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass) { float *occ, co[3], n[3]; int pass, i; occ= MEM_callocN(sizeof(float)*tree->totface, "OcclusionPassOcc"); for(pass=0; passtotface; i++) { occ_face(&tree->face[i], co, n, NULL); negate_v3(n); madd_v3_v3fl(co, n, 1e-8f); occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL); if(re->test_break(re->tbh)) break; } if(re->test_break(re->tbh)) break; for(i=0; itotface; i++) { tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f); if(tree->occlusion[i] < 0.0f) tree->occlusion[i]= 0.0f; } occ_sum_occlusion(tree, tree->root); } MEM_freeN(occ); } static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, float *co, float *n, int thread, int onlyshadow, float *ao, float *env, float *indirect) { float nn[3], bn[3], fac, occ, occlusion, correction, rad[3]; int envcolor; envcolor= re->wrld.aocolor; if(onlyshadow) envcolor= WO_AOPLAIN; negate_v3_v3(nn, n); occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect)? rad: NULL, (env && envcolor)? bn: NULL); correction= re->wrld.ao_approx_correction; occlusion= (1.0f-correction)*(1.0f-occ); CLAMP(occlusion, 0.0f, 1.0f); if(correction != 0.0f) occlusion += correction*exp(-occ); if(env) { /* sky shading using bent normal */ if(ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) { fac= 0.5f*(1.0f+bn[0]*re->grvec[0]+ bn[1]*re->grvec[1]+ bn[2]*re->grvec[2]); env[0]= (1.0f-fac)*re->wrld.horr + fac*re->wrld.zenr; env[1]= (1.0f-fac)*re->wrld.horg + fac*re->wrld.zeng; env[2]= (1.0f-fac)*re->wrld.horb + fac*re->wrld.zenb; mul_v3_fl(env, occlusion); } else { env[0]= occlusion; env[1]= occlusion; env[2]= occlusion; } #if 0 else { /* WO_AOSKYTEX */ float dxyview[3]; bn[0]= -bn[0]; bn[1]= -bn[1]; bn[2]= -bn[2]; dxyview[0]= 1.0f; dxyview[1]= 1.0f; dxyview[2]= 0.0f; shadeSkyView(ao, co, bn, dxyview); } #endif } if(ao) { ao[0]= occlusion; ao[1]= occlusion; ao[2]= occlusion; } if(tree->doindirect) copy_v3_v3(indirect, rad); else zero_v3(indirect); } /* ---------------------------- Caching ------------------------------- */ static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y) { x -= cache->x; y -= cache->y; x /= cache->step; y /= cache->step; x *= cache->step; y *= cache->step; if(x < 0 || x >= cache->w || y < 0 || y >= cache->h) return NULL; else return &cache->sample[y*cache->w + x]; } static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect) { OcclusionCache *cache; OcclusionCacheSample *samples[4], *sample; float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo; float d[3], dist2; int i, x1, y1, x2, y2; if(!tree->cache) return 0; /* first try to find a sample in the same pixel */ cache= &tree->cache[thread]; if(cache->sample && cache->step) { sample= &cache->sample[(y-cache->y)*cache->w + (x-cache->x)]; if(sample->filled) { sub_v3_v3v3(d, sample->co, co); dist2= dot_v3v3(d, d); if(dist2 < 0.5f*sample->dist2 && dot_v3v3(sample->n, n) > 0.98f) { copy_v3_v3(ao, sample->ao); copy_v3_v3(env, sample->env); copy_v3_v3(indirect, sample->indirect); return 1; } } } else return 0; /* try to interpolate between 4 neighbouring pixels */ samples[0]= find_occ_sample(cache, x, y); samples[1]= find_occ_sample(cache, x+cache->step, y); samples[2]= find_occ_sample(cache, x, y+cache->step); samples[3]= find_occ_sample(cache, x+cache->step, y+cache->step); for(i=0; i<4; i++) if(!samples[i] || !samples[i]->filled) return 0; /* require intensities not being too different */ mino= MIN4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity); maxo= MAX4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity); if(maxo - mino > 0.05f) return 0; /* compute weighted interpolation between samples */ zero_v3(ao); zero_v3(env); zero_v3(indirect); totw= 0.0f; x1= samples[0]->x; y1= samples[0]->y; x2= samples[3]->x; y2= samples[3]->y; tx= (float)(x2 - x)/(float)(x2 - x1); ty= (float)(y2 - y)/(float)(y2 - y1); wb[3]= (1.0f-tx)*(1.0f-ty); wb[2]= (tx)*(1.0f-ty); wb[1]= (1.0f-tx)*(ty); wb[0]= tx*ty; for(i=0; i<4; i++) { sub_v3_v3v3(d, samples[i]->co, co); //dist2= dot_v3v3(d, d); wz[i]= 1.0f; //(samples[i]->dist2/(1e-4f + dist2)); wn[i]= pow(dot_v3v3(samples[i]->n, n), 32.0f); w= wb[i]*wn[i]*wz[i]; totw += w; madd_v3_v3fl(ao, samples[i]->ao, w); madd_v3_v3fl(env, samples[i]->env, w); madd_v3_v3fl(indirect, samples[i]->indirect, w); } if(totw >= 0.9f) { totw= 1.0f/totw; mul_v3_fl(ao, totw); mul_v3_fl(env, totw); mul_v3_fl(indirect, totw); return 1; } return 0; } static void sample_occ_surface(ShadeInput *shi) { StrandRen *strand= shi->strand; StrandSurface *mesh= strand->buffer->surface; int *face, *index = RE_strandren_get_face(shi->obr, strand, 0); float w[4], *co1, *co2, *co3, *co4; if(mesh && mesh->face && mesh->co && mesh->ao && index) { face= mesh->face[*index]; co1= mesh->co[face[0]]; co2= mesh->co[face[1]]; co3= mesh->co[face[2]]; co4= (face[3])? mesh->co[face[3]]: NULL; interp_weights_face_v3(w, co1, co2, co3, co4, strand->vert->co); zero_v3(shi->ao); zero_v3(shi->env); zero_v3(shi->indirect); madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]); madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]); madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]); madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]); madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]); madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]); madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]); madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]); madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]); if(face[3]) { madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]); madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]); madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]); } } else { shi->ao[0]= 1.0f; shi->ao[1]= 1.0f; shi->ao[2]= 1.0f; zero_v3(shi->env); zero_v3(shi->indirect); } } /* ------------------------- External Functions --------------------------- */ static void *exec_strandsurface_sample(void *data) { OcclusionThread *othread= (OcclusionThread*)data; Render *re= othread->re; StrandSurface *mesh= othread->mesh; float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4; int a, *face; for(a=othread->begin; aend; a++) { face= mesh->face[a]; co1= mesh->co[face[0]]; co2= mesh->co[face[1]]; co3= mesh->co[face[2]]; if(face[3]) { co4= mesh->co[face[3]]; interp_v3_v3v3(co, co1, co3, 0.5f); normal_quad_v3( n,co1, co2, co3, co4); } else { cent_tri_v3(co, co1, co2, co3); normal_tri_v3( n,co1, co2, co3); } negate_v3(n); sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect); copy_v3_v3(othread->faceao[a], ao); copy_v3_v3(othread->faceenv[a], env); copy_v3_v3(othread->faceindirect[a], indirect); } return 0; } void make_occ_tree(Render *re) { OcclusionThread othreads[BLENDER_MAX_THREADS]; OcclusionTree *tree; StrandSurface *mesh; ListBase threads; float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3]; int a, totface, totthread, *face, *count; /* ugly, needed for occ_face */ R= *re; re->i.infostr= "Occlusion preprocessing"; re->stats_draw(re->sdh, &re->i); re->occlusiontree= tree= occ_tree_build(re); if(tree) { if(re->wrld.ao_approx_passes > 0) occ_compute_passes(re, tree, re->wrld.ao_approx_passes); if(tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT)) occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces); for(mesh=re->strandsurface.first; mesh; mesh=mesh->next) { if(!mesh->face || !mesh->co || !mesh->ao) continue; count= MEM_callocN(sizeof(int)*mesh->totvert, "OcclusionCount"); faceao= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceAO"); faceenv= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceEnv"); faceindirect= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceIndirect"); totthread= (mesh->totface > 10000)? re->r.threads: 1; totface= mesh->totface/totthread; for(a=0; atotface: (a+1)*totface; } if(totthread == 1) { exec_strandsurface_sample(&othreads[0]); } else { BLI_init_threads(&threads, exec_strandsurface_sample, totthread); for(a=0; atotface; a++) { face= mesh->face[a]; copy_v3_v3(ao, faceao[a]); copy_v3_v3(env, faceenv[a]); copy_v3_v3(indirect, faceindirect[a]); add_v3_v3(mesh->ao[face[0]], ao); add_v3_v3(mesh->env[face[0]], env); add_v3_v3(mesh->indirect[face[0]], indirect); count[face[0]]++; add_v3_v3(mesh->ao[face[1]], ao); add_v3_v3(mesh->env[face[1]], env); add_v3_v3(mesh->indirect[face[1]], indirect); count[face[1]]++; add_v3_v3(mesh->ao[face[2]], ao); add_v3_v3(mesh->env[face[2]], env); add_v3_v3(mesh->indirect[face[2]], indirect); count[face[2]]++; if(face[3]) { add_v3_v3(mesh->ao[face[3]], ao); add_v3_v3(mesh->env[face[3]], env); add_v3_v3(mesh->indirect[face[3]], indirect); count[face[3]]++; } } for(a=0; atotvert; a++) { if(count[a]) { mul_v3_fl(mesh->ao[a], 1.0f/count[a]); mul_v3_fl(mesh->env[a], 1.0f/count[a]); mul_v3_fl(mesh->indirect[a], 1.0f/count[a]); } } MEM_freeN(count); MEM_freeN(faceao); MEM_freeN(faceenv); MEM_freeN(faceindirect); } } } void free_occ(Render *re) { if(re->occlusiontree) { occ_free_tree(re->occlusiontree); re->occlusiontree = NULL; } } void sample_occ(Render *re, ShadeInput *shi) { OcclusionTree *tree= re->occlusiontree; OcclusionCache *cache; OcclusionCacheSample *sample; OccFace exclude; int onlyshadow; if(tree) { if(shi->strand) { sample_occ_surface(shi); } /* try to get result from the cache if possible */ else if(shi->depth!=0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) { /* no luck, let's sample the occlusion */ exclude.obi= shi->obi - re->objectinstance; exclude.facenr= shi->vlr->index; onlyshadow= (shi->mat->mode & MA_ONLYSHADOW); sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect); /* fill result into sample, each time */ if(tree->cache) { cache= &tree->cache[shi->thread]; if(cache->sample && cache->step) { sample= &cache->sample[(shi->ys-cache->y)*cache->w + (shi->xs-cache->x)]; copy_v3_v3(sample->co, shi->co); copy_v3_v3(sample->n, shi->vno); copy_v3_v3(sample->ao, shi->ao); copy_v3_v3(sample->env, shi->env); copy_v3_v3(sample->indirect, shi->indirect); sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]); sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2])); sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2])); sample->dist2= dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco); sample->filled= 1; } } } } else { shi->ao[0]= 1.0f; shi->ao[1]= 1.0f; shi->ao[2]= 1.0f; shi->env[0]= 0.0f; shi->env[1]= 0.0f; shi->env[2]= 0.0f; shi->indirect[0]= 0.0f; shi->indirect[1]= 0.0f; shi->indirect[2]= 0.0f; } } void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp) { OcclusionTree *tree= re->occlusiontree; PixStr ps; OcclusionCache *cache; OcclusionCacheSample *sample; OccFace exclude; ShadeInput *shi; intptr_t *rd=NULL; int *ro=NULL, *rp=NULL, *rz=NULL, onlyshadow; int x, y, step = CACHE_STEP; if(!tree->cache) return; cache= &tree->cache[pa->thread]; cache->w= pa->rectx; cache->h= pa->recty; cache->x= pa->disprect.xmin; cache->y= pa->disprect.ymin; cache->step= step; cache->sample= MEM_callocN(sizeof(OcclusionCacheSample)*cache->w*cache->h, "OcclusionCacheSample"); sample= cache->sample; if(re->osa) { rd= pa->rectdaps; } else { /* fake pixel struct for non-osa */ ps.next= NULL; ps.mask= 0xFFFF; ro= pa->recto; rp= pa->rectp; rz= pa->rectz; } /* compute a sample at every step pixels */ for(y=pa->disprect.ymin; ydisprect.ymax; y++) { for(x=pa->disprect.xmin; xdisprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) { if(!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax-1)) continue; if(!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax-1)) continue; if(re->osa) { if(!*rd) continue; shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y); } else { if(!*rp) continue; ps.obi= *ro; ps.facenr= *rp; ps.z= *rz; shade_samples_fill_with_ps(ssamp, &ps, x, y); } shi= ssamp->shi; if(shi->vlr) { onlyshadow= (shi->mat->mode & MA_ONLYSHADOW); exclude.obi= shi->obi - re->objectinstance; exclude.facenr= shi->vlr->index; sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect); copy_v3_v3(sample->co, shi->co); copy_v3_v3(sample->n, shi->vno); copy_v3_v3(sample->ao, shi->ao); copy_v3_v3(sample->env, shi->env); copy_v3_v3(sample->indirect, shi->indirect); sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]); sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2])); sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2])); sample->dist2= dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco); sample->x= shi->xs; sample->y= shi->ys; sample->filled= 1; } if(re->test_break(re->tbh)) break; } } } void free_occ_samples(Render *re, RenderPart *pa) { OcclusionTree *tree= re->occlusiontree; OcclusionCache *cache; if(tree->cache) { cache= &tree->cache[pa->thread]; if(cache->sample) MEM_freeN(cache->sample); cache->w= 0; cache->h= 0; cache->step= 0; } }