diff options
Diffstat (limited to 'source/blender/render/intern/source/occlusion.c')
-rw-r--r-- | source/blender/render/intern/source/occlusion.c | 1763 |
1 files changed, 1763 insertions, 0 deletions
diff --git a/source/blender/render/intern/source/occlusion.c b/source/blender/render/intern/source/occlusion.c new file mode 100644 index 00000000000..d2d2cf3fb77 --- /dev/null +++ b/source/blender/render/intern/source/occlusion.c @@ -0,0 +1,1763 @@ +/* + * $Id$ + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The Original Code is Copyright (C) 2008 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): Brecht Van Lommel. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "MEM_guardedalloc.h" + +#include "DNA_material_types.h" + +#include "BLI_arithb.h" +#include "BLI_blenlib.h" +#include "BLI_memarena.h" +#include "BLI_threads.h" + +#include "BKE_global.h" +#include "BKE_scene.h" +#include "BKE_utildefines.h" + +#include "RE_shader_ext.h" + +/* local includes */ +#include "occlusion.h" +#include "render_types.h" +#include "rendercore.h" +#include "renderdatabase.h" +#include "pixelshading.h" +#include "shading.h" +#include "zbuf.h" + +/* ------------------------- Declarations --------------------------- */ + +#define INVALID_INDEX ((int)(~0)) +#define INVPI 0.31830988618379069f +#define TOTCHILD 8 +#define CACHE_STEP 3 + +typedef struct OcclusionCacheSample { + float co[3], n[3], col[3], intensity, dist2; + int x, y, filled; +} OcclusionCacheSample; + +typedef struct OcclusionCache { + OcclusionCacheSample *sample; + int x, y, w, h, step; +} OcclusionCache; + +typedef struct OccFace { + int obi; + int facenr; +} OccFace; + +typedef struct OccNode { + float co[3], area; + float sh[9], dco; + float occlusion; + int childflag; + union { + //OccFace face; + int face; + struct OccNode *node; + } child[TOTCHILD]; +} OccNode; + +typedef struct OcclusionTree { + MemArena *arena; + + float (*co)[3]; /* temporary during build */ + + OccFace *face; /* instance and face indices */ + float *occlusion; /* occlusion for faces */ + + OccNode *root; + + OccNode **stack[BLENDER_MAX_THREADS]; + int maxdepth; + + int totface; + + float error; + float distfac; + + int dothreadedbuild; + int totbuildthread; + + OcclusionCache *cache; +} OcclusionTree; + +typedef struct OcclusionThread { + Render *re; + StrandSurface *mesh; + float (*facecol)[3]; + int begin, end; + int thread; +} OcclusionThread; + +typedef struct OcclusionBuildThread { + OcclusionTree *tree; + int begin, end, depth; + OccNode *node; +} OcclusionBuildThread; + +/* ------------------------- Shading --------------------------- */ + +extern Render R; // meh + +#if 0 +static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad) +{ + ShadeInput *shi= ssamp->shi; + ShadeResult *shr= ssamp->shr; + float l, u, v, *v1, *v2, *v3; + + /* init */ + if(vlr->v4) { + shi->u= u= 0.5f; + shi->v= v= 0.5f; + } + else { + shi->u= u= 1.0f/3.0f; + shi->v= v= 1.0f/3.0f; + } + + /* setup render coordinates */ + v1= vlr->v1->co; + v2= vlr->v2->co; + v3= vlr->v3->co; + + /* renderco */ + l= 1.0f-u-v; + + shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0]; + shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1]; + shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2]; + + shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2); + + /* set up view vector */ + VECCOPY(shi->view, shi->co); + Normalize(shi->view); + + /* cache for shadow */ + shi->samplenr++; + + shi->xs= 0; // TODO + shi->ys= 0; + + shade_input_set_normals(shi); + + /* no normal flip */ + if(shi->flippednor) + shade_input_flip_normals(shi); + + /* not a pretty solution, but fixes common cases */ + if(shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) { + VecMulf(shi->vn, -1.0f); + VecMulf(shi->vno, -1.0f); + } + + /* init material vars */ + // note, keep this synced with render_types.h + memcpy(&shi->r, &shi->mat->r, 23*sizeof(float)); + shi->har= shi->mat->har; + + /* render */ + shade_input_set_shade_texco(shi); + shade_material_loop(shi, shr); /* todo: nodes */ + + VECCOPY(rad, shr->combined); +} + +static void occ_build_shade(Render *re, OcclusionTree *tree) +{ + ShadeSample ssamp; + ObjectInstanceRen *obi; + VlakRen *vlr; + int a; + + R= *re; + + /* setup shade sample with correct passes */ + memset(&ssamp, 0, sizeof(ShadeSample)); + ssamp.shi[0].lay= re->scene->lay; + ssamp.shi[0].passflag= SCE_PASS_DIFFUSE|SCE_PASS_RGBA; + ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC); + ssamp.tot= 1; + + for(a=0; a<tree->totface; a++) { + obi= &R.objectinstance[tree->face[a].obi]; + vlr= RE_findOrAddVlak(obi->obr, tree->face[a].vlr); + + occ_shade(&ssamp, obi, vlr, tree->rad[a]); + } +} +#endif + +/* ------------------------- Spherical Harmonics --------------------------- */ + +/* Use 2nd order SH => 9 coefficients, stored in this order: + 0 = (0,0), + 1 = (1,-1), 2 = (1,0), 3 = (1,1), + 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */ + +static void sh_copy(float *shresult, float *sh) +{ + memcpy(shresult, sh, sizeof(float)*9); +} + +static void sh_mul(float *sh, float f) +{ + int i; + + for(i=0; i<9; i++) + sh[i] *= f; +} + +static void sh_add(float *shresult, float *sh1, float *sh2) +{ + int i; + + for(i=0; i<9; i++) + shresult[i]= sh1[i] + sh2[i]; +} + +static void sh_from_disc(float *n, float area, float *shresult) +{ + /* See formula (3) in: + "An Efficient Representation for Irradiance Environment Maps" */ + float sh[9], x, y, z; + + x= n[0]; + y= n[1]; + z= n[2]; + + sh[0]= 0.282095f; + + sh[1]= 0.488603f*y; + sh[2]= 0.488603f*z; + sh[3]= 0.488603f*x; + + sh[4]= 1.092548f*x*y; + sh[5]= 1.092548f*y*z; + sh[6]= 0.315392f*(3.0f*z*z - 1.0f); + sh[7]= 1.092548f*x*z; + sh[8]= 0.546274f*(x*x - y*y); + + sh_mul(sh, area); + sh_copy(shresult, sh); +} + +static float sh_eval(float *sh, float *v) +{ + /* See formula (13) in: + "An Efficient Representation for Irradiance Environment Maps" */ + static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f; + static const float c4 = 0.886227f, c5 = 0.247708f; + float x, y, z, sum; + + x= v[0]; + y= v[1]; + z= v[2]; + + sum= c1*sh[8]*(x*x - y*y); + sum += c3*sh[6]*z*z; + sum += c4*sh[0]; + sum += -c5*sh[6]; + sum += 2.0f*c1*(sh[4]*x*y + sh[7]*x*z + sh[5]*y*z); + sum += 2.0f*c2*(sh[3]*x + sh[1]*y + sh[2]*z); + + return sum; +} + +/* ------------------------------ Building --------------------------------- */ + +static void occ_face(const OccFace *face, float *co, float *normal, float *area) +{ + ObjectInstanceRen *obi; + VlakRen *vlr; + float v1[3], v2[3], v3[3], v4[3]; + + obi= &R.objectinstance[face->obi]; + vlr= RE_findOrAddVlak(obi->obr, face->facenr); + + if(co) { + if(vlr->v4) + VecLerpf(co, vlr->v1->co, vlr->v3->co, 0.5f); + else + CalcCent3f(co, vlr->v1->co, vlr->v2->co, vlr->v3->co); + + if(obi->flag & R_TRANSFORMED) + Mat4MulVecfl(obi->mat, co); + } + + if(normal) { + normal[0]= -vlr->n[0]; + normal[1]= -vlr->n[1]; + normal[2]= -vlr->n[2]; + + if(obi->flag & R_TRANSFORMED) + Mat3MulVecfl(obi->nmat, normal); + } + + if(area) { + VECCOPY(v1, vlr->v1->co); + VECCOPY(v2, vlr->v2->co); + VECCOPY(v3, vlr->v3->co); + if(vlr->v4) VECCOPY(v4, vlr->v4->co); + + if(obi->flag & R_TRANSFORMED) { + Mat4MulVecfl(obi->mat, v1); + Mat4MulVecfl(obi->mat, v2); + Mat4MulVecfl(obi->mat, v3); + if(vlr->v4) Mat4MulVecfl(obi->mat, v4); + } + + /* todo: correct area for instances */ + if(vlr->v4) + *area= AreaQ3Dfl(v1, v2, v3, v4); + else + *area= AreaT3Dfl(v1, v2, v3); + } +} + +static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node) +{ + OccNode *child; + float occ, area, totarea; + int a, b; + + occ= 0.0f; + totarea= 0.0f; + + for(b=0; b<TOTCHILD; b++) { + if(node->childflag & (1<<b)) { + a= node->child[b].face; + occ_face(&tree->face[a], 0, 0, &area); + occ += area*tree->occlusion[a]; + totarea += area; + } + else if(node->child[b].node) { + child= node->child[b].node; + occ_sum_occlusion(tree, child); + + occ += child->area*child->occlusion; + totarea += child->area; + } + } + + if(totarea != 0.0f) + occ /= totarea; + + node->occlusion= occ; +} + +static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max) +{ + float len, maxlen= -1.0f; + int a, axis = 0; + + INIT_MINMAX(min, max); + + for(a=begin; a<end; a++) + DO_MINMAX(tree->co[a], min, max) + + for(a=0; a<3; a++) { + len= max[a] - min[a]; + + if(len > maxlen) { + maxlen= len; + axis= a; + } + } + + return axis; +} + +static void occ_node_from_face(OccFace *face, OccNode *node) +{ + float n[3]; + + occ_face(face, node->co, n, &node->area); + node->dco= 0.0f; + sh_from_disc(n, node->area, node->sh); +} + +static void occ_build_dco(OcclusionTree *tree, OccNode *node, float *co, float *dco) +{ + OccNode *child; + float dist, d[3], nco[3]; + int b; + + for(b=0; b<TOTCHILD; b++) { + if(node->childflag & (1<<b)) { + occ_face(tree->face+node->child[b].face, nco, 0, 0); + } + else if(node->child[b].node) { + child= node->child[b].node; + occ_build_dco(tree, child, co, dco); + VECCOPY(nco, child->co); + } + + VECSUB(d, nco, co); + dist= INPR(d, d); + if(dist > *dco) + *dco= dist; + } +} + +static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split) +{ + float min[3], max[3], mid; + int axis, a, enda; + + /* split in middle of boundbox. this seems faster than median split + * on complex scenes, possibly since it avoids two distant faces to + * be in the same node better? */ + axis= occ_find_bbox_axis(tree, begin, end, min, max); + mid= 0.5f*(min[axis]+max[axis]); + + a= begin; + enda= end; + while(a<enda) { + if(tree->co[a][axis] > mid) { + enda--; + SWAP(OccFace, tree->face[a], tree->face[enda]); + SWAP(float, tree->co[a][0], tree->co[enda][0]); + SWAP(float, tree->co[a][1], tree->co[enda][1]); + SWAP(float, tree->co[a][2], tree->co[enda][2]); + } + else + a++; + } + + *split= enda; +} + +static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count) +{ + /* split faces into eight groups */ + int b, splitx, splity[2], splitz[4]; + + occ_build_split(tree, begin, end, &splitx); + + /* force split if none found, to deal with degenerate geometry */ + if(splitx == begin || splitx == end) + splitx= (begin+end)/2; + + occ_build_split(tree, begin, splitx, &splity[0]); + occ_build_split(tree, splitx, end, &splity[1]); + + occ_build_split(tree, begin, splity[0], &splitz[0]); + occ_build_split(tree, splity[0], splitx, &splitz[1]); + occ_build_split(tree, splitx, splity[1], &splitz[2]); + occ_build_split(tree, splity[1], end, &splitz[3]); + + offset[0]= begin; + offset[1]= splitz[0]; + offset[2]= splity[0]; + offset[3]= splitz[1]; + offset[4]= splitx; + offset[5]= splitz[2]; + offset[6]= splity[1]; + offset[7]= splitz[3]; + + for(b=0; b<7; b++) + count[b]= offset[b+1] - offset[b]; + count[7]= end - offset[7]; +} + +static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth); + +static void *exec_occ_build(void *data) +{ + OcclusionBuildThread *othread= (OcclusionBuildThread*)data; + + occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth); + + return 0; +} + +static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth) +{ + ListBase threads; + OcclusionBuildThread othreads[BLENDER_MAX_THREADS]; + OccNode *child, tmpnode; + OccFace *face; + int a, b, totthread=0, offset[TOTCHILD], count[TOTCHILD]; + + /* keep track of maximum depth for stack */ + if(depth > tree->maxdepth) + tree->maxdepth= depth; + + /* add a new node */ + node->occlusion= 1.0f; + + /* leaf node with only children */ + if(end - begin <= TOTCHILD) { + for(a=begin, b=0; a<end; a++, b++) { + face= &tree->face[a]; + node->child[b].face= a; + node->childflag |= (1<<b); + } + } + else { + /* order faces */ + occ_build_8_split(tree, begin, end, offset, count); + + if(depth == 1 && tree->dothreadedbuild) + BLI_init_threads(&threads, exec_occ_build, tree->totbuildthread); + + for(b=0; b<TOTCHILD; b++) { + if(count[b] == 0) { + node->child[b].node= NULL; + } + else if(count[b] == 1) { + face= &tree->face[offset[b]]; + node->child[b].face= offset[b]; + node->childflag |= (1<<b); + } + else { + if(tree->dothreadedbuild) + BLI_lock_thread(LOCK_CUSTOM1); + + child= BLI_memarena_alloc(tree->arena, sizeof(OccNode)); + node->child[b].node= child; + + if(tree->dothreadedbuild) + BLI_unlock_thread(LOCK_CUSTOM1); + + if(depth == 1 && tree->dothreadedbuild) { + othreads[totthread].tree= tree; + othreads[totthread].node= child; + othreads[totthread].begin= offset[b]; + othreads[totthread].end= offset[b]+count[b]; + othreads[totthread].depth= depth+1; + BLI_insert_thread(&threads, &othreads[totthread]); + totthread++; + } + else + occ_build_recursive(tree, child, offset[b], offset[b]+count[b], depth+1); + } + } + + if(depth == 1 && tree->dothreadedbuild) + BLI_end_threads(&threads); + } + + /* combine area, position and sh */ + for(b=0; b<TOTCHILD; b++) { + if(node->childflag & (1<<b)) { + child= &tmpnode; + occ_node_from_face(tree->face+node->child[b].face, &tmpnode); + } + else { + child= node->child[b].node; + } + + if(child) { + node->area += child->area; + sh_add(node->sh, node->sh, child->sh); + VECADDFAC(node->co, node->co, child->co, child->area); + } + } + + if(node->area != 0.0f) + VecMulf(node->co, 1.0f/node->area); + + /* compute maximum distance from center */ + node->dco= 0.0f; + occ_build_dco(tree, node, node->co, &node->dco); +} + +static void occ_build_sh_normalize(OccNode *node) +{ + /* normalize spherical harmonics to not include area, so + * we can clamp the dot product and then mutliply by area */ + int b; + + if(node->area != 0.0f) + sh_mul(node->sh, 1.0f/node->area); + + for(b=0; b<TOTCHILD; b++) { + if(node->childflag & (1<<b)); + else if(node->child[b].node) + occ_build_sh_normalize(node->child[b].node); + } +} + +static OcclusionTree *occ_tree_build(Render *re) +{ + OcclusionTree *tree; + ObjectInstanceRen *obi; + ObjectRen *obr; + VlakRen *vlr= NULL; + int a, b, c, totface; + + /* count */ + totface= 0; + for(obi=re->instancetable.first; obi; obi=obi->next) { + obr= obi->obr; + for(a=0; a<obr->totvlak; a++) { + if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak; + else vlr++; + + if(vlr->mat->mode & MA_TRACEBLE) + totface++; + } + } + + if(totface == 0) + return NULL; + + tree= MEM_callocN(sizeof(OcclusionTree), "OcclusionTree"); + tree->totface= totface; + + /* parameters */ + tree->error= get_render_aosss_error(&re->r, re->wrld.ao_approx_error); + tree->distfac= (re->wrld.aomode & WO_AODIST)? re->wrld.aodistfac: 0.0f; + + /* allocation */ + tree->arena= BLI_memarena_new(0x8000 * sizeof(OccNode)); + BLI_memarena_use_calloc(tree->arena); + + if(re->wrld.aomode & WO_AOCACHE) + tree->cache= MEM_callocN(sizeof(OcclusionCache)*BLENDER_MAX_THREADS, "OcclusionCache"); + + tree->face= MEM_callocN(sizeof(OccFace)*totface, "OcclusionFace"); + tree->co= MEM_callocN(sizeof(float)*3*totface, "OcclusionCo"); + tree->occlusion= MEM_callocN(sizeof(float)*totface, "OcclusionOcclusion"); + + /* make array of face pointers */ + for(b=0, c=0, obi=re->instancetable.first; obi; obi=obi->next, c++) { + obr= obi->obr; + for(a=0; a<obr->totvlak; a++) { + if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak; + else vlr++; + + if(vlr->mat->mode & MA_TRACEBLE) { + tree->face[b].obi= c; + tree->face[b].facenr= a; + tree->occlusion[b]= 1.0f; + occ_face(&tree->face[b], tree->co[b], NULL, NULL); + b++; + } + } + } + + /* threads */ + tree->totbuildthread= (re->r.threads > 1 && totface > 10000)? 8: 1; + tree->dothreadedbuild= (tree->totbuildthread > 1); + + /* recurse */ + tree->root= BLI_memarena_alloc(tree->arena, sizeof(OccNode)); + occ_build_recursive(tree, tree->root, 0, totface, 1); + +#if 0 + if(tree->doindirect) { + occ_build_shade(re, tree); + occ_sum_occlusion(tree, tree->root); + } +#endif + + MEM_freeN(tree->co); + tree->co= NULL; + + occ_build_sh_normalize(tree->root); + + for(a=0; a<BLENDER_MAX_THREADS; a++) + tree->stack[a]= MEM_callocN(sizeof(OccNode)*TOTCHILD*(tree->maxdepth+1), "OccStack"); + + return tree; +} + +static void occ_free_tree(OcclusionTree *tree) +{ + int a; + + if(tree) { + if(tree->arena) BLI_memarena_free(tree->arena); + for(a=0; a<BLENDER_MAX_THREADS; a++) + if(tree->stack[a]) + MEM_freeN(tree->stack[a]); + if(tree->occlusion) MEM_freeN(tree->occlusion); + if(tree->face) MEM_freeN(tree->face); + if(tree->cache) MEM_freeN(tree->cache); + MEM_freeN(tree); + } +} + +/* ------------------------- Traversal --------------------------- */ + +static float occ_solid_angle(OccNode *node, float *v, float d2, float invd2, float *receivenormal) +{ + float dotreceive, dotemit; + float ev[3]; + + ev[0]= -v[0]*invd2; + ev[1]= -v[1]*invd2; + ev[2]= -v[2]*invd2; + dotemit= sh_eval(node->sh, ev); + dotreceive= INPR(receivenormal, v)*invd2; + + CLAMP(dotemit, 0.0f, 1.0f); + CLAMP(dotreceive, 0.0f, 1.0f); + + return ((node->area*dotemit*dotreceive)/(d2 + node->area*INVPI))*INVPI; +} + +static void VecAddDir(float *result, float *v1, float *v2, float fac) +{ + result[0]= v1[0] + fac*(v2[0] - v1[0]); + result[1]= v1[1] + fac*(v2[1] - v1[1]); + result[2]= v1[2] + fac*(v2[2] - v1[2]); +} + +static int occ_visible_quad(float *p, float *n, float *v0, float *v1, float *v2, float *q0, float *q1, float *q2, float *q3) +{ + static const float epsilon = 1e-6f; + float c, sd[3]; + + c= INPR(n, p); + + /* signed distances from the vertices to the plane. */ + sd[0]= INPR(n, v0) - c; + sd[1]= INPR(n, v1) - c; + sd[2]= INPR(n, v2) - c; + + if(fabs(sd[0]) < epsilon) sd[0] = 0.0f; + if(fabs(sd[1]) < epsilon) sd[1] = 0.0f; + if(fabs(sd[2]) < epsilon) sd[2] = 0.0f; + + if(sd[0] > 0) { + if(sd[1] > 0) { + if(sd[2] > 0) { + // +++ + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + else if(sd[2] < 0) { + // ++- + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); + VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2]))); + } + else { + // ++0 + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + } + else if(sd[1] < 0) { + if(sd[2] > 0) { + // +-+ + VECCOPY(q0, v0); + VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); + VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); + VECCOPY(q3, v2); + } + else if(sd[2] < 0) { + // +-- + VECCOPY(q0, v0); + VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); + VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2]))); + VECCOPY(q3, q2); + } + else { + // +-0 + VECCOPY(q0, v0); + VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + } + else { + if(sd[2] > 0) { + // +0+ + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + else if(sd[2] < 0) { + // +0- + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2]))); + VECCOPY(q3, q2); + } + else { + // +00 + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + } + } + else if(sd[0] < 0) { + if(sd[1] > 0) { + if(sd[2] > 0) { + // -++ + VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2]))); + } + else if(sd[2] < 0) { + // -+- + VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); + VECCOPY(q1, v1); + VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); + VECCOPY(q3, q2); + } + else { + // -+0 + VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + } + else if(sd[1] < 0) { + if(sd[2] > 0) { + // --+ + VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2]))); + VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2]))); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + else if(sd[2] < 0) { + // --- + return 0; + } + else { + // --0 + return 0; + } + } + else { + if(sd[2] > 0) { + // -0+ + VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2]))); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + else if(sd[2] < 0) { + // -0- + return 0; + } + else { + // -00 + return 0; + } + } + } + else { + if(sd[1] > 0) { + if(sd[2] > 0) { + // 0++ + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + else if(sd[2] < 0) { + // 0+- + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); + VECCOPY(q3, q2); + } + else { + // 0+0 + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + } + else if(sd[1] < 0) { + if(sd[2] > 0) { + // 0-+ + VECCOPY(q0, v0); + VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2]))); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + else if(sd[2] < 0) { + // 0-- + return 0; + } + else { + // 0-0 + return 0; + } + } + else { + if(sd[2] > 0) { + // 00+ + VECCOPY(q0, v0); + VECCOPY(q1, v1); + VECCOPY(q2, v2); + VECCOPY(q3, q2); + } + else if(sd[2] < 0) { + // 00- + return 0; + } + else { + // 000 + return 0; + } + } + } + + return 1; +} + +/* altivec optimization, this works, but is unused */ + +#if 0 +#include <Accelerate/Accelerate.h> + +typedef union { + vFloat v; + float f[4]; +} vFloatResult; + +static vFloat vec_splat_float(float val) +{ + return (vFloat){val, val, val, val}; +} + +static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) +{ + vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle; + vUInt8 rotate = (vUInt8){4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3}; + vFloatResult vresult; + float result; + + /* compute r* */ + vrx = (vFloat){q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]); + vry = (vFloat){q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]); + vrz = (vFloat){q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]); + + /* normalize r* */ + rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f)); + vrx = vrx*rlen; + vry = vry*rlen; + vrz = vrz*rlen; + + /* rotate r* for cross and dot */ + vsrx= vec_perm(vrx, vrx, rotate); + vsry= vec_perm(vry, vry, rotate); + vsrz= vec_perm(vrz, vrz, rotate); + + /* cross product */ + gx = vsry*vrz - vsrz*vry; + gy = vsrz*vrx - vsrx*vrz; + gz = vsrx*vry - vsry*vrx; + + /* normalize */ + rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f)); + gx = gx*rlen; + gy = gy*rlen; + gz = gz*rlen; + + /* angle */ + vcos = vrx*vsrx + vry*vsry + vrz*vsrz; + vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f)); + vangle= vacosf(vcos); + + /* dot */ + vresult.v = (vec_splat_float(n[0])*gx + + vec_splat_float(n[1])*gy + + vec_splat_float(n[2])*gz)*vangle; + + result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI); + result= MAX2(result, 0.0f); + + return result; +} + +#endif + +/* SSE optimization, acos code doesn't work */ + +#if 0 + +#include <xmmintrin.h> + +static __m128 sse_approx_acos(__m128 x) +{ + /* needs a better approximation than taylor expansion of acos, since that + * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work + * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */ + + return _mm_set_ps1(1.0f); +} + +static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) +{ + float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3]; + float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result; + float fresult[4] __attribute__((aligned(16))); + __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult; + + /* compute r */ + qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]); + qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]); + qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]); + + rx = qx - _mm_set_ps1(p[0]); + ry = qy - _mm_set_ps1(p[1]); + rz = qz - _mm_set_ps1(p[2]); + + /* normalize r */ + rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f)); + rx = rx*rlen; + ry = ry*rlen; + rz = rz*rlen; + + /* cross product */ + srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1)); + sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1)); + srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1)); + + gx = sry*rz - srz*ry; + gy = srz*rx - srx*rz; + gz = srx*ry - sry*rx; + + /* normalize g */ + glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f)); + gx = gx*glen; + gy = gy*glen; + gz = gz*glen; + + /* compute angle */ + rcos = rx*srx + ry*sry + rz*srz; + rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f)); + + angle = sse_approx_cos(rcos); + aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle; + + /* sum together */ + result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI); + result= MAX2(result, 0.0f); + + return result; +} + +#endif + +static float saacosf(float fac) +{ + if(fac<= -1.0f) return (float)M_PI; + else if(fac>=1.0f) return 0.0f; + else return acos(fac); /* acosf(fac) */ +} + +static void normalizef(float *n) +{ + float d; + + d= INPR(n, n); + + if(d > 1.0e-35F) { + d= 1.0f/sqrt(d); /* sqrtf(d) */ + + n[0] *= d; + n[1] *= d; + n[2] *= d; + } +} + +static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) +{ + float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3]; + float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result; + + VECSUB(r0, q0, p); + VECSUB(r1, q1, p); + VECSUB(r2, q2, p); + VECSUB(r3, q3, p); + + normalizef(r0); + normalizef(r1); + normalizef(r2); + normalizef(r3); + + Crossf(g0, r1, r0); normalizef(g0); + Crossf(g1, r2, r1); normalizef(g1); + Crossf(g2, r3, r2); normalizef(g2); + Crossf(g3, r0, r3); normalizef(g3); + + a1= saacosf(INPR(r0, r1)); + a2= saacosf(INPR(r1, r2)); + a3= saacosf(INPR(r2, r3)); + a4= saacosf(INPR(r3, r0)); + + dot1= INPR(n, g0); + dot2= INPR(n, g1); + dot3= INPR(n, g2); + dot4= INPR(n, g3); + + result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI; + result= MAX2(result, 0.0f); + + return result; +} + +static float occ_form_factor(OccFace *face, float *p, float *n) +{ + ObjectInstanceRen *obi; + VlakRen *vlr; + float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib= 0.0f; + + obi= &R.objectinstance[face->obi]; + vlr= RE_findOrAddVlak(obi->obr, face->facenr); + + VECCOPY(v1, vlr->v1->co); + VECCOPY(v2, vlr->v2->co); + VECCOPY(v3, vlr->v3->co); + + if(obi->flag & R_TRANSFORMED) { + Mat4MulVecfl(obi->mat, v1); + Mat4MulVecfl(obi->mat, v2); + Mat4MulVecfl(obi->mat, v3); + } + + if(occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3)) + contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3); + + if(vlr->v4) { + VECCOPY(v4, vlr->v4->co); + if(obi->flag & R_TRANSFORMED) + Mat4MulVecfl(obi->mat, v4); + + if(occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3)) + contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3); + } + + return contrib; +} + +static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, float *pp, float *pn, float *occ, float *bentn) +{ + OccNode *node, **stack; + OccFace *face; + float resultocc, v[3], p[3], n[3], co[3], invd2; + float distfac, fac, error, d2, weight, emitarea; + int b, totstack; + + /* init variables */ + VECCOPY(p, pp); + VECCOPY(n, pn); + VECADDFAC(p, p, n, 1e-4f); + + if(bentn) + VECCOPY(bentn, n); + + error= tree->error; + distfac= tree->distfac; + + resultocc= 0.0f; + + /* init stack */ + stack= tree->stack[thread]; + stack[0]= tree->root; + totstack= 1; + + while(totstack) { + /* pop point off the stack */ + node= stack[--totstack]; + + VECSUB(v, node->co, p); + d2= INPR(v, v) + 1e-16f; + emitarea= MAX2(node->area, node->dco); + + if(d2*error > emitarea) { + if(distfac != 0.0f) { + fac= 1.0f/(1.0f + distfac*d2); + if(fac < 0.01f) + continue; + } + else + fac= 1.0f; + + /* accumulate occlusion from spherical harmonics */ + invd2 = 1.0f/sqrt(d2); + weight= occ_solid_angle(node, v, d2, invd2, n); + weight *= node->occlusion; + + if(bentn) { + bentn[0] -= weight*invd2*v[0]; + bentn[1] -= weight*invd2*v[1]; + bentn[2] -= weight*invd2*v[2]; + } + + resultocc += weight*fac; + } + else { + /* traverse into children */ + for(b=0; b<TOTCHILD; b++) { + if(node->childflag & (1<<b)) { + face= tree->face+node->child[b].face; + + /* accumulate occlusion with face form factor */ + if(!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) { + if(bentn || distfac != 0.0f) { + occ_face(face, co, NULL, NULL); + VECSUB(v, co, p); + d2= INPR(v, v) + 1e-16f; + + fac= (distfac == 0.0f)? 1.0f: 1.0f/(1.0f + distfac*d2); + if(fac < 0.01f) + continue; + } + else + fac= 1.0f; + + weight= occ_form_factor(face, p, n); + weight *= tree->occlusion[node->child[b].face]; + + if(bentn) { + invd2= 1.0f/sqrt(d2); + bentn[0] -= weight*invd2*v[0]; + bentn[1] -= weight*invd2*v[1]; + bentn[2] -= weight*invd2*v[2]; + } + + resultocc += weight*fac; + } + } + else if(node->child[b].node) { + /* push child on the stack */ + stack[totstack++]= node->child[b].node; + } + } + } + } + + if(occ) *occ= resultocc; + if(bentn) Normalize(bentn); +} + +static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass) +{ + float *occ, co[3], n[3]; + int pass, i; + + occ= MEM_callocN(sizeof(float)*tree->totface, "OcclusionPassOcc"); + + for(pass=0; pass<totpass; pass++) { + for(i=0; i<tree->totface; i++) { + occ_face(&tree->face[i], co, n, NULL); + VecMulf(n, -1.0f); + VECADDFAC(co, co, n, 1e-8f); + + occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL); + if(re->test_break()) + break; + } + + if(re->test_break()) + break; + + for(i=0; i<tree->totface; i++) { + tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f); + if(tree->occlusion[i] < 0.0f) + tree->occlusion[i]= 0.0f; + } + + occ_sum_occlusion(tree, tree->root); + } + + MEM_freeN(occ); +} + +static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, float *co, float *n, int thread, int onlyshadow, float *skycol) +{ + float nn[3], bn[3], fac, occ, occlusion, correction; + int aocolor; + + aocolor= re->wrld.aocolor; + if(onlyshadow) + aocolor= WO_AOPLAIN; + + VECCOPY(nn, n); + VecMulf(nn, -1.0f); + + occ_lookup(tree, thread, exclude, co, nn, &occ, (aocolor)? bn: NULL); + + correction= re->wrld.ao_approx_correction; + + occlusion= (1.0f-correction)*(1.0f-occ); + CLAMP(occlusion, 0.0f, 1.0f); + if(correction != 0.0f) + occlusion += correction*exp(-occ); + + if(aocolor) { + /* sky shading using bent normal */ + if(ELEM(aocolor, WO_AOSKYCOL, WO_AOSKYTEX)) { + fac= 0.5*(1.0f+bn[0]*re->grvec[0]+ bn[1]*re->grvec[1]+ bn[2]*re->grvec[2]); + skycol[0]= (1.0f-fac)*re->wrld.horr + fac*re->wrld.zenr; + skycol[1]= (1.0f-fac)*re->wrld.horg + fac*re->wrld.zeng; + skycol[2]= (1.0f-fac)*re->wrld.horb + fac*re->wrld.zenb; + } +#if 0 + else { /* WO_AOSKYTEX */ + float dxyview[3]; + bn[0]= -bn[0]; + bn[1]= -bn[1]; + bn[2]= -bn[2]; + dxyview[0]= 1.0f; + dxyview[1]= 1.0f; + dxyview[2]= 0.0f; + shadeSkyView(skycol, co, bn, dxyview); + } +#endif + + VecMulf(skycol, occlusion); + } + else { + skycol[0]= occlusion; + skycol[1]= occlusion; + skycol[2]= occlusion; + } +} + +/* ---------------------------- Caching ------------------------------- */ + +static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y) +{ + x -= cache->x; + y -= cache->y; + + x /= cache->step; + y /= cache->step; + x *= cache->step; + y *= cache->step; + + if(x < 0 || x >= cache->w || y < 0 || y >= cache->h) + return NULL; + else + return &cache->sample[y*cache->w + x]; +} + +static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *col) +{ + OcclusionCache *cache; + OcclusionCacheSample *samples[4], *sample; + float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo; + float d[3], dist2; + int i, x1, y1, x2, y2; + + if(!tree->cache) + return 0; + + /* first try to find a sample in the same pixel */ + cache= &tree->cache[thread]; + + if(cache->sample && cache->step) { + sample= &cache->sample[(y-cache->y)*cache->w + (x-cache->x)]; + if(sample->filled) { + VECSUB(d, sample->co, co); + dist2= INPR(d, d); + if(dist2 < 0.5f*sample->dist2 && INPR(sample->n, n) > 0.98f) { + VECCOPY(col, sample->col); + return 1; + } + } + } + else + return 0; + + /* try to interpolate between 4 neighbouring pixels */ + samples[0]= find_occ_sample(cache, x, y); + samples[1]= find_occ_sample(cache, x+cache->step, y); + samples[2]= find_occ_sample(cache, x, y+cache->step); + samples[3]= find_occ_sample(cache, x+cache->step, y+cache->step); + + for(i=0; i<4; i++) + if(!samples[i] || !samples[i]->filled) + return 0; + + /* require intensities not being too different */ + mino= MIN4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity); + maxo= MAX4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity); + + if(maxo - mino > 0.05f) + return 0; + + /* compute weighted interpolation between samples */ + col[0]= col[1]= col[2]= 0.0f; + totw= 0.0f; + + x1= samples[0]->x; + y1= samples[0]->y; + x2= samples[3]->x; + y2= samples[3]->y; + + tx= (float)(x2 - x)/(float)(x2 - x1); + ty= (float)(y2 - y)/(float)(y2 - y1); + + wb[3]= (1.0f-tx)*(1.0f-ty); + wb[2]= (tx)*(1.0f-ty); + wb[1]= (1.0f-tx)*(ty); + wb[0]= tx*ty; + + for(i=0; i<4; i++) { + VECSUB(d, samples[i]->co, co); + dist2= INPR(d, d); + + wz[i]= 1.0f; //(samples[i]->dist2/(1e-4f + dist2)); + wn[i]= pow(INPR(samples[i]->n, n), 32.0f); + + w= wb[i]*wn[i]*wz[i]; + + totw += w; + col[0] += w*samples[i]->col[0]; + col[1] += w*samples[i]->col[1]; + col[2] += w*samples[i]->col[2]; + } + + if(totw >= 0.9f) { + totw= 1.0f/totw; + col[0] *= totw; + col[1] *= totw; + col[2] *= totw; + return 1; + } + + return 0; +} + +static void sample_occ_surface(ShadeInput *shi) +{ + StrandRen *strand= shi->strand; + StrandSurface *mesh= strand->buffer->surface; + int *face, *index = RE_strandren_get_face(shi->obr, strand, 0); + float w[4], *co1, *co2, *co3, *co4; + + if(mesh && mesh->face && mesh->co && mesh->col && index) { + face= mesh->face[*index]; + + co1= mesh->co[face[0]]; + co2= mesh->co[face[1]]; + co3= mesh->co[face[2]]; + co4= (face[3])? mesh->co[face[3]]: NULL; + + InterpWeightsQ3Dfl(co1, co2, co3, co4, strand->vert->co, w); + + shi->ao[0]= shi->ao[1]= shi->ao[2]= 0.0f; + VECADDFAC(shi->ao, shi->ao, mesh->col[face[0]], w[0]); + VECADDFAC(shi->ao, shi->ao, mesh->col[face[1]], w[1]); + VECADDFAC(shi->ao, shi->ao, mesh->col[face[2]], w[2]); + if(face[3]) + VECADDFAC(shi->ao, shi->ao, mesh->col[face[3]], w[3]); + } + else { + shi->ao[0]= 1.0f; + shi->ao[1]= 1.0f; + shi->ao[2]= 1.0f; + } +} + +/* ------------------------- External Functions --------------------------- */ + +static void *exec_strandsurface_sample(void *data) +{ + OcclusionThread *othread= (OcclusionThread*)data; + Render *re= othread->re; + StrandSurface *mesh= othread->mesh; + float col[3], co[3], n[3], *co1, *co2, *co3, *co4; + int a, *face; + + for(a=othread->begin; a<othread->end; a++) { + face= mesh->face[a]; + co1= mesh->co[face[0]]; + co2= mesh->co[face[1]]; + co3= mesh->co[face[2]]; + + if(face[3]) { + co4= mesh->co[face[3]]; + + VecLerpf(co, co1, co3, 0.5f); + CalcNormFloat4(co1, co2, co3, co4, n); + } + else { + CalcCent3f(co, co1, co2, co3); + CalcNormFloat(co1, co2, co3, n); + } + VecMulf(n, -1.0f); + + sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, col); + VECCOPY(othread->facecol[a], col); + } + + return 0; +} + +void make_occ_tree(Render *re) +{ + OcclusionThread othreads[BLENDER_MAX_THREADS]; + StrandSurface *mesh; + ListBase threads; + float col[3], (*facecol)[3]; + int a, totface, totthread, *face, *count; + + /* ugly, needed for occ_face */ + R= *re; + + re->i.infostr= "Occlusion preprocessing"; + re->stats_draw(&re->i); + + re->occlusiontree= occ_tree_build(re); + + if(re->occlusiontree) { + if(re->wrld.ao_approx_passes) + occ_compute_passes(re, re->occlusiontree, re->wrld.ao_approx_passes); + + for(mesh=re->strandsurface.first; mesh; mesh=mesh->next) { + if(!mesh->face || !mesh->co || !mesh->col) + continue; + + count= MEM_callocN(sizeof(int)*mesh->totvert, "OcclusionCount"); + facecol= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceCol"); + + totthread= (mesh->totface > 10000)? re->r.threads: 1; + totface= mesh->totface/totthread; + for(a=0; a<totthread; a++) { + othreads[a].re= re; + othreads[a].facecol= facecol; + othreads[a].thread= a; + othreads[a].mesh= mesh; + othreads[a].begin= a*totface; + othreads[a].end= (a == totthread-1)? mesh->totface: (a+1)*totface; + } + + if(totthread == 1) { + exec_strandsurface_sample(&othreads[0]); + } + else { + BLI_init_threads(&threads, exec_strandsurface_sample, totthread); + + for(a=0; a<totthread; a++) + BLI_insert_thread(&threads, &othreads[a]); + + BLI_end_threads(&threads); + } + + for(a=0; a<mesh->totface; a++) { + face= mesh->face[a]; + + VECCOPY(col, facecol[a]); + VECADD(mesh->col[face[0]], mesh->col[face[0]], col); + count[face[0]]++; + VECADD(mesh->col[face[1]], mesh->col[face[1]], col); + count[face[1]]++; + VECADD(mesh->col[face[2]], mesh->col[face[2]], col); + count[face[2]]++; + + if(face[3]) { + VECADD(mesh->col[face[3]], mesh->col[face[3]], col); + count[face[3]]++; + } + } + + for(a=0; a<mesh->totvert; a++) + if(count[a]) + VecMulf(mesh->col[a], 1.0f/count[a]); + + MEM_freeN(count); + MEM_freeN(facecol); + } + } +} + +void free_occ(Render *re) +{ + if(re->occlusiontree) { + occ_free_tree(re->occlusiontree); + re->occlusiontree = NULL; + } +} + +void sample_occ(Render *re, ShadeInput *shi) +{ + OcclusionTree *tree= re->occlusiontree; + OcclusionCache *cache; + OcclusionCacheSample *sample; + OccFace exclude; + int onlyshadow; + + if(tree) { + if(shi->strand) { + sample_occ_surface(shi); + } + /* try to get result from the cache if possible */ + else if(!sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao)) { + /* no luck, let's sample the occlusion */ + exclude.obi= shi->obi - re->objectinstance; + exclude.facenr= shi->vlr->index; + onlyshadow= (shi->mat->mode & MA_ONLYSHADOW); + sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao); + + /* fill result into sample, each time */ + if(tree->cache) { + cache= &tree->cache[shi->thread]; + + if(cache->sample && cache->step) { + sample= &cache->sample[(shi->ys-cache->y)*cache->w + (shi->xs-cache->x)]; + VECCOPY(sample->co, shi->co); + VECCOPY(sample->n, shi->vno); + VECCOPY(sample->col, shi->ao); + sample->intensity= MAX3(sample->col[0], sample->col[1], sample->col[2]); + sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco); + sample->filled= 1; + } + } + } + } + else { + shi->ao[0]= 1.0f; + shi->ao[1]= 1.0f; + shi->ao[2]= 1.0f; + } +} + +void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp) +{ + OcclusionTree *tree= re->occlusiontree; + PixStr ps; + OcclusionCache *cache; + OcclusionCacheSample *sample; + OccFace exclude; + ShadeInput *shi; + intptr_t *rd=NULL; + int *ro=NULL, *rp=NULL, *rz=NULL, onlyshadow; + int x, y, step = CACHE_STEP; + + if(!tree->cache) + return; + + cache= &tree->cache[pa->thread]; + cache->w= pa->rectx; + cache->h= pa->recty; + cache->x= pa->disprect.xmin; + cache->y= pa->disprect.ymin; + cache->step= step; + cache->sample= MEM_callocN(sizeof(OcclusionCacheSample)*cache->w*cache->h, "OcclusionCacheSample"); + sample= cache->sample; + + if(re->osa) { + rd= pa->rectdaps; + } + else { + /* fake pixel struct for non-osa */ + ps.next= NULL; + ps.mask= 0xFFFF; + + ro= pa->recto; + rp= pa->rectp; + rz= pa->rectz; + } + + /* compute a sample at every step pixels */ + for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++) { + for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) { + if(!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax-1)) + continue; + if(!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax-1)) + continue; + + if(re->osa) { + if(!*rd) continue; + + shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y); + } + else { + if(!*rp) continue; + + ps.obi= *ro; + ps.facenr= *rp; + ps.z= *rz; + shade_samples_fill_with_ps(ssamp, &ps, x, y); + } + + shi= ssamp->shi; + if(shi->vlr) { + onlyshadow= (shi->mat->mode & MA_ONLYSHADOW); + exclude.obi= shi->obi - re->objectinstance; + exclude.facenr= shi->vlr->index; + sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao); + + VECCOPY(sample->co, shi->co); + VECCOPY(sample->n, shi->vno); + VECCOPY(sample->col, shi->ao); + sample->intensity= MAX3(sample->col[0], sample->col[1], sample->col[2]); + sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco); + sample->x= shi->xs; + sample->y= shi->ys; + sample->filled= 1; + } + + if(re->test_break()) + break; + } + } +} + +void free_occ_samples(Render *re, RenderPart *pa) +{ + OcclusionTree *tree= re->occlusiontree; + OcclusionCache *cache; + + if(tree->cache) { + cache= &tree->cache[pa->thread]; + + if(cache->sample) + MEM_freeN(cache->sample); + + cache->w= 0; + cache->h= 0; + cache->step= 0; + } +} + |