Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@pandora.be>2008-01-17 22:27:16 +0300
committerBrecht Van Lommel <brechtvanlommel@pandora.be>2008-01-17 22:27:16 +0300
commit8cdfe865ecd79383399296a29b57b10d30ea9173 (patch)
tree2c27e5438e7427ac32219ac6b9eacb5cc1ca3815
parent9af3b8a07e5ec51cc7838f8cbcd130262e2e095e (diff)
Approximate Ambient Occlusion
============================= A new approximate ambient occlusion method has been added, next to the existing one based on raytracing. This method is specifically targetted at use in animations, since it is inherently noise free, and so will not flicker across frames. http://www.blender.org/development/current-projects/changes-since-244/approximate-ambient-occlusion/ http://peach.blender.org/index.php/approximate-ambient-occlusion/ Further improvements are still needed, but it can be tested already. There are still a number of known issues: - Bias errors on backfaces. - For performance, instanced object do not occlude currently. - Sky textures don't work well, the derivatives for texture evaluation are not correct. - Multiple passes do not work entirely correct (they are not accurate to begin with, but could be better).
-rw-r--r--source/blender/blenkernel/intern/world.c8
-rw-r--r--source/blender/blenloader/intern/readfile.c6
-rw-r--r--source/blender/makesdna/DNA_world_types.h10
-rw-r--r--source/blender/render/extern/include/RE_shader_ext.h4
-rw-r--r--source/blender/render/intern/include/occlusion.h51
-rw-r--r--source/blender/render/intern/include/render_types.h5
-rw-r--r--source/blender/render/intern/include/renderdatabase.h2
-rw-r--r--source/blender/render/intern/include/shading.h1
-rw-r--r--source/blender/render/intern/source/convertblender.c44
-rw-r--r--source/blender/render/intern/source/occlusion.c1694
-rw-r--r--source/blender/render/intern/source/pipeline.c3
-rw-r--r--source/blender/render/intern/source/rendercore.c19
-rw-r--r--source/blender/render/intern/source/renderdatabase.c18
-rw-r--r--source/blender/render/intern/source/shadeinput.c8
-rw-r--r--source/blender/render/intern/source/shadeoutput.c45
-rw-r--r--source/blender/render/intern/source/strand.c1
-rw-r--r--source/blender/render/intern/source/texture.c2
-rw-r--r--source/blender/src/buttons_shading.c74
18 files changed, 1922 insertions, 73 deletions
diff --git a/source/blender/blenkernel/intern/world.c b/source/blender/blenkernel/intern/world.c
index 625ca57dbf3..8d3234475a4 100644
--- a/source/blender/blenkernel/intern/world.c
+++ b/source/blender/blenkernel/intern/world.c
@@ -98,12 +98,12 @@ World *add_world(char *name)
wrld->exp= 0.0f;
wrld->exposure=wrld->range= 1.0f;
- wrld->aodist= 5.0;
+ wrld->aodist= 5.0f;
wrld->aosamp= 5;
- wrld->aoenergy= 1.0;
- wrld->aobias= 0.05;
+ wrld->aoenergy= 1.0f;
+ wrld->aobias= 0.05f;
wrld->ao_samp_method = WO_AOSAMP_HAMMERSLEY;
-
+ wrld->ao_approx_error= 0.25f;
wrld->physicsEngine= WOPHY_BULLET;//WOPHY_SUMO; Bullet by default
wrld->preview = NULL;
diff --git a/source/blender/blenloader/intern/readfile.c b/source/blender/blenloader/intern/readfile.c
index 6a2e3aa481a..5712f8f03fc 100644
--- a/source/blender/blenloader/intern/readfile.c
+++ b/source/blender/blenloader/intern/readfile.c
@@ -6698,6 +6698,7 @@ static void do_versions(FileData *fd, Library *lib, Main *main)
Lamp *la;
Material *ma;
ParticleSettings *part;
+ World *wrld;
Mesh *me;
/* unless the file was created 2.44.3 but not 2.45, update the constraints */
@@ -6878,6 +6879,11 @@ static void do_versions(FileData *fd, Library *lib, Main *main)
}
}
+ for(wrld=main->world.first; wrld; wrld= wrld->id.next) {
+ if(wrld->ao_approx_error == 0.0f)
+ wrld->ao_approx_error= 0.25f;
+ }
+
if (main->versionfile < 245 || main->subversionfile < 12)
{
/* initialize skeleton generation toolsettings */
diff --git a/source/blender/makesdna/DNA_world_types.h b/source/blender/makesdna/DNA_world_types.h
index 9e5ec4cdb9a..e7a8d839227 100644
--- a/source/blender/makesdna/DNA_world_types.h
+++ b/source/blender/makesdna/DNA_world_types.h
@@ -104,9 +104,8 @@ typedef struct World {
float aodist, aodistfac, aoenergy, aobias;
short aomode, aosamp, aomix, aocolor;
float ao_adapt_thresh, ao_adapt_speed_fac;
- float pad2[2];
- short ao_samp_method;
- short pad1[3];
+ float ao_approx_error, ao_approx_correction;
+ short ao_samp_method, ao_gather_method, ao_approx_passes, pad1;
float *aosphere, *aotables;
@@ -151,12 +150,17 @@ typedef struct World {
/* aomode (use distances & random sampling modes) */
#define WO_AODIST 1
#define WO_AORNDSMP 2
+#define WO_AOCACHE 4
/* aocolor */
#define WO_AOPLAIN 0
#define WO_AOSKYCOL 1
#define WO_AOSKYTEX 2
+/* ao_gather_method */
+#define WO_AOGATHER_RAYTRACE 0
+#define WO_AOGATHER_APPROX 1
+
/* texco (also in DNA_material_types.h) */
#define TEXCO_ANGMAP 64
#define TEXCO_H_SPHEREMAP 256
diff --git a/source/blender/render/extern/include/RE_shader_ext.h b/source/blender/render/extern/include/RE_shader_ext.h
index 8971126e643..5d5db0471d8 100644
--- a/source/blender/render/extern/include/RE_shader_ext.h
+++ b/source/blender/render/extern/include/RE_shader_ext.h
@@ -64,6 +64,7 @@ struct ShadeInputCopy {
struct Material *mat;
struct VlakRen *vlr;
+ struct StrandRen *strand;
struct ObjectInstanceRen *obi;
struct ObjectRen *obr;
int facenr;
@@ -96,6 +97,7 @@ typedef struct ShadeInput
struct Material *mat;
struct VlakRen *vlr;
+ struct StrandRen *strand;
struct ObjectInstanceRen *obi;
struct ObjectRen *obr;
int facenr;
@@ -132,7 +134,7 @@ typedef struct ShadeInput
/* texture coordinates */
float lo[3], gl[3], ref[3], orn[3], winco[3], sticky[3], vcol[4], rad[3];
float refcol[4], displace[3];
- float strand, tang[3], stress, winspeed[4];
+ float strandco, tang[3], stress, winspeed[4];
float duplilo[3], dupliuv[3];
ShadeInputUV uv[8]; /* 8 = MAX_MTFACE */
diff --git a/source/blender/render/intern/include/occlusion.h b/source/blender/render/intern/include/occlusion.h
new file mode 100644
index 00000000000..3f10d50ab2e
--- /dev/null
+++ b/source/blender/render/intern/include/occlusion.h
@@ -0,0 +1,51 @@
+/*
+ * $Id$
+ *
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The Original Code is Copyright (C) 2008 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): Brecht Van Lommel.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef OCCLUSION_H
+#define OCCLUSION_H
+
+struct Render;
+struct ShadeInput;
+struct ShadeResult;
+struct RenderPart;
+struct ShadeSample;
+struct DerivedMesh;
+struct ObjectRen;
+
+void make_occ_tree(struct Render *re);
+void free_occ(struct Render *re);
+void sample_occ(struct Render *re, struct ShadeInput *shi);
+
+void cache_occ_samples(struct Render *re, struct RenderPart *pa, struct ShadeSample *ssamp);
+void free_occ_samples(struct Render *re, struct RenderPart *pa);
+
+void *cache_occ_mesh(struct Render *re, struct ObjectRen *obr, struct DerivedMesh *dm, float mat[][4]);
+
+#endif
+
diff --git a/source/blender/render/intern/include/render_types.h b/source/blender/render/intern/include/render_types.h
index e3a92912191..6124870d63e 100644
--- a/source/blender/render/intern/include/render_types.h
+++ b/source/blender/render/intern/include/render_types.h
@@ -158,6 +158,10 @@ struct Render
/* octree tables and variables for raytrace */
void *raytree;
+
+ /* occlusion tree */
+ void *occlusiontree;
+ ListBase occlusionmesh;
/* use this instead of R.r.cfra */
float cfra;
@@ -341,6 +345,7 @@ typedef struct StrandBuffer {
struct ObjectRen *obr;
struct Material *ma;
+ void *occlusionmesh;
unsigned int lay;
int overrideuv;
int flag, maxdepth;
diff --git a/source/blender/render/intern/include/renderdatabase.h b/source/blender/render/intern/include/renderdatabase.h
index a3a3b4e5e4a..a74579acc4a 100644
--- a/source/blender/render/intern/include/renderdatabase.h
+++ b/source/blender/render/intern/include/renderdatabase.h
@@ -70,6 +70,7 @@ typedef struct StrandTableNode {
float *winspeed;
float *surfnor;
float *simplify;
+ int *face;
struct MCol *mcol;
float *uv;
int totuv, totmcol;
@@ -114,6 +115,7 @@ float *RE_strandren_get_surfnor(struct ObjectRen *obr, struct StrandRen *strand,
float *RE_strandren_get_uv(struct ObjectRen *obr, struct StrandRen *strand, int n, char **name, int verify);
struct MCol *RE_strandren_get_mcol(struct ObjectRen *obr, struct StrandRen *strand, int n, char **name, int verify);
float *RE_strandren_get_simplify(struct ObjectRen *obr, struct StrandRen *strand, int verify);
+int *RE_strandren_get_face(struct ObjectRen *obr, struct StrandRen *strand, int verify);
float *RE_strandren_get_winspeed(struct ObjectInstanceRen *obi, struct StrandRen *strand, int verify);
struct VertRen *RE_vertren_copy(struct ObjectRen *obr, struct VertRen *ver);
diff --git a/source/blender/render/intern/include/shading.h b/source/blender/render/intern/include/shading.h
index 6ca63a2976d..e3ffe5ef28f 100644
--- a/source/blender/render/intern/include/shading.h
+++ b/source/blender/render/intern/include/shading.h
@@ -69,6 +69,7 @@ void shade_input_initialize(struct ShadeInput *shi, struct RenderPart *pa, struc
void shade_sample_initialize(struct ShadeSample *ssamp, struct RenderPart *pa, struct RenderLayer *rl);
void shade_samples_do_AO(struct ShadeSample *ssamp);
+void shade_samples_fill_with_ps(struct ShadeSample *ssamp, struct PixStr *ps, int x, int y);
int shade_samples(struct ShadeSample *ssamp, struct PixStr *ps, int x, int y);
void vlr_set_uv_indices(struct VlakRen *vlr, int *i1, int *i2, int *i3);
diff --git a/source/blender/render/intern/source/convertblender.c b/source/blender/render/intern/source/convertblender.c
index 25b5dac2a55..8117a85d015 100644
--- a/source/blender/render/intern/source/convertblender.c
+++ b/source/blender/render/intern/source/convertblender.c
@@ -100,6 +100,7 @@
#include "envmap.h"
#include "multires.h"
+#include "occlusion.h"
#include "render_types.h"
#include "rendercore.h"
#include "renderdatabase.h"
@@ -1497,7 +1498,7 @@ static int render_new_particle_system(Render *re, ObjectRen *obr, ParticleSystem
float *orco=0,*surfnor=0,*uvco=0, strandlen=0.0f, curlen=0.0f;
float hasize, pa_size, pa_time, r_tilt, cfra=bsystem_time(ob,(float)CFRA,0.0);
float adapt_angle=0.0, adapt_pix=0.0, random, simplify[2];
- int i, a, k, max_k=0, totpart, totuv=0, override_uv=-1, dosimplify = 0;
+ int i, a, k, max_k=0, totpart, totuv=0, override_uv=-1, dosimplify = 0, doapproxao = 0;
int path_possible=0, keys_possible=0, baked_keys=0, totchild=psys->totchild;
int seed, path_nbr=0, path=0, orco1=0, adapt=0, uv[3]={0,0,0}, num;
char **uv_name=0;
@@ -1656,6 +1657,10 @@ static int render_new_particle_system(Render *re, ObjectRen *obr, ParticleSystem
strandbuf->flag |= R_STRAND_B_UNITS;
svert= strandbuf->vert;
+
+ if((re->wrld.mode & WO_AMB_OCC) && (re->wrld.ao_gather_method == WO_AOGATHER_APPROX))
+ if(ma->amb != 0.0f)
+ doapproxao= 1;
}
}
}
@@ -1704,14 +1709,14 @@ static int render_new_particle_system(Render *re, ObjectRen *obr, ParticleSystem
else
psys_particle_on_emitter(ob, psmd,part->from,pa->num,pa->num_dmcache,pa->fuv,pa->foffset,co,nor,0,0,orco,0);
- if(uvco && ELEM(part->from,PART_FROM_FACE,PART_FROM_VOLUME)){
- layer=psmd->dm->faceData.layers + CustomData_get_layer_index(&psmd->dm->faceData,CD_MFACE);
+ num= pa->num_dmcache;
- num= pa->num_dmcache;
+ if(num == DMCACHE_NOTFOUND)
+ if(pa->num < psmd->dm->getNumFaces(psmd->dm))
+ num= pa->num;
- if(num == DMCACHE_NOTFOUND)
- if(pa->num < psmd->dm->getNumFaces(psmd->dm))
- num= pa->num;
+ if(uvco && ELEM(part->from,PART_FROM_FACE,PART_FROM_VOLUME)){
+ layer=psmd->dm->faceData.layers + CustomData_get_layer_index(&psmd->dm->faceData,CD_MFACE);
for(i=0; i<totuv; i++){
if(num != DMCACHE_NOTFOUND) {
@@ -1761,6 +1766,8 @@ static int render_new_particle_system(Render *re, ObjectRen *obr, ParticleSystem
r_tilt=2.0f*cpa->rand[2];
+ num= cpa->num;
+
/* get orco */
psys_particle_on_emitter(ob, psmd,
(part->childtype == PART_CHILD_FACES)? PART_FROM_FACE: PART_FROM_PARTICLE,
@@ -1831,6 +1838,11 @@ static int render_new_particle_system(Render *re, ObjectRen *obr, ParticleSystem
VECCOPY(snor, surfnor);
}
+ if(doapproxao && num >= 0) {
+ int *facenum= RE_strandren_get_face(obr, strand, 1);
+ *facenum= num;
+ }
+
if(uvco){
for(i=0; i<totuv; i++){
if(i != override_uv) {
@@ -1930,6 +1942,9 @@ static int render_new_particle_system(Render *re, ObjectRen *obr, ParticleSystem
break;
}
+ if(doapproxao)
+ strandbuf->occlusionmesh= cache_occ_mesh(re, obr, psmd->dm, mat);
+
/* 4. clean up */
if(ma) do_mat_ipo(ma);
@@ -3636,7 +3651,7 @@ void init_render_world(Render *re)
if(re->osa)
while(re->wrld.aosamp*re->wrld.aosamp < re->osa)
re->wrld.aosamp++;
- if(!(re->r.mode & R_RAYTRACE))
+ if(!(re->r.mode & R_RAYTRACE) && (re->wrld.ao_gather_method == WO_AOGATHER_RAYTRACE))
re->wrld.mode &= ~WO_AMB_OCC;
}
else {
@@ -4069,6 +4084,7 @@ void RE_Database_Free(Render *re)
if(re->r.mode & R_RAYTRACE) freeraytree(re);
free_sss(re);
+ free_occ(re);
re->totvlak=re->totvert=re->totstrand=re->totlamp=re->tothalo= 0;
re->i.convertdone= 0;
@@ -4289,7 +4305,7 @@ void RE_Database_FromScene(Render *re, Scene *scene, int use_camera_view)
}
init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */
- if(re->wrld.mode & WO_AMB_OCC) {
+ if((re->r.mode & R_RAYTRACE) && (re->wrld.mode & WO_AMB_OCC)) {
if (re->wrld.ao_samp_method == WO_AOSAMP_HAMMERSLEY)
init_render_hammersley(re);
else if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
@@ -4354,10 +4370,16 @@ void RE_Database_FromScene(Render *re, Scene *scene, int use_camera_view)
if(!re->test_break())
project_renderdata(re, projectverto, re->r.mode & R_PANORAMA, 0, 1);
+
+ /* Occlusion */
+ if((re->wrld.mode & WO_AMB_OCC) && !re->test_break())
+ if(re->wrld.ao_gather_method == WO_AOGATHER_APPROX)
+ if(re->r.renderer==R_INTERN)
+ make_occ_tree(re);
/* SSS */
if((re->r.mode & R_SSS) && !re->test_break())
- if (re->r.renderer==R_INTERN)
+ if(re->r.renderer==R_INTERN)
make_sss_tree(re);
}
@@ -4837,7 +4859,7 @@ void RE_Database_Baking(Render *re, Scene *scene, int type, Object *actob)
}
init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */
- if(re->wrld.mode & WO_AMB_OCC) {
+ if((re->r.mode & R_RAYTRACE) && (re->wrld.mode & WO_AMB_OCC)) {
if (re->wrld.ao_samp_method == WO_AOSAMP_HAMMERSLEY)
init_render_hammersley(re);
else if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
diff --git a/source/blender/render/intern/source/occlusion.c b/source/blender/render/intern/source/occlusion.c
new file mode 100644
index 00000000000..19394b4235a
--- /dev/null
+++ b/source/blender/render/intern/source/occlusion.c
@@ -0,0 +1,1694 @@
+/*
+ * $Id$
+ *
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The Original Code is Copyright (C) 2008 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): Brecht Van Lommel.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "DNA_material_types.h"
+#include "DNA_meshdata_types.h"
+
+#include "BLI_arithb.h"
+#include "BLI_blenlib.h"
+#include "BLI_memarena.h"
+#include "BLI_threads.h"
+
+#include "BKE_DerivedMesh.h"
+#include "BKE_global.h"
+#include "BKE_utildefines.h"
+
+#include "RE_shader_ext.h"
+
+/* local includes */
+#include "occlusion.h"
+#include "render_types.h"
+#include "rendercore.h"
+#include "renderdatabase.h"
+#include "pixelshading.h"
+#include "shading.h"
+#include "zbuf.h"
+
+/* ------------------------- Declarations --------------------------- */
+
+#define INVALID_INDEX ((int)(~0))
+#define INVPI 0.31830988618379069f
+#define TOTCHILD 8
+#define CACHE_STEP 3
+
+typedef struct OcclusionCacheSample {
+ float co[3], n[3], col[3], intensity, dist2;
+ int x, y, filled;
+} OcclusionCacheSample;
+
+typedef struct OcclusionCache {
+ OcclusionCacheSample *sample;
+ int x, y, w, h, step;
+} OcclusionCache;
+
+typedef struct OcclusionCacheMesh {
+ struct OcclusionCacheMesh *next, *prev;
+ ObjectRen *obr;
+ int (*face)[4];
+ float (*co)[3];
+ float (*col)[3];
+ int totvert, totface;
+} OcclusionCacheMesh;
+
+typedef struct OccFace {
+ int obi;
+ int facenr;
+} OccFace;
+
+typedef struct OccNode {
+ float co[3], area;
+ float sh[9], dco;
+ float occlusion;
+ int childflag;
+ union {
+ //OccFace face;
+ int face;
+ struct OccNode *node;
+ } child[TOTCHILD];
+} OccNode;
+
+typedef struct OcclusionTree {
+ MemArena *arena;
+
+ float (*co)[3]; /* temporary during build */
+
+ OccFace *face; /* instance and face indices */
+ float *occlusion; /* occlusion for faces */
+
+ OccNode *root;
+
+ OccNode **stack[BLENDER_MAX_THREADS];
+ int maxdepth;
+
+ int totface;
+
+ float error;
+ float distfac;
+
+ OcclusionCache *cache;
+} OcclusionTree;
+
+/* ------------------------- Shading --------------------------- */
+
+extern Render R; // meh
+
+#if 0
+static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
+{
+ ShadeInput *shi= ssamp->shi;
+ ShadeResult *shr= ssamp->shr;
+ float l, u, v, *v1, *v2, *v3;
+
+ /* init */
+ if(vlr->v4) {
+ shi->u= u= 0.5f;
+ shi->v= v= 0.5f;
+ }
+ else {
+ shi->u= u= 1.0f/3.0f;
+ shi->v= v= 1.0f/3.0f;
+ }
+
+ /* setup render coordinates */
+ v1= vlr->v1->co;
+ v2= vlr->v2->co;
+ v3= vlr->v3->co;
+
+ /* renderco */
+ l= 1.0f-u-v;
+
+ shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0];
+ shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1];
+ shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2];
+
+ shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
+
+ /* set up view vector */
+ VECCOPY(shi->view, shi->co);
+ Normalize(shi->view);
+
+ /* cache for shadow */
+ shi->samplenr++;
+
+ shi->xs= 0; // TODO
+ shi->ys= 0;
+
+ shade_input_set_normals(shi);
+
+ /* no normal flip */
+ if(shi->flippednor)
+ shade_input_flip_normals(shi);
+
+ /* not a pretty solution, but fixes common cases */
+ if(shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
+ VecMulf(shi->vn, -1.0f);
+ VecMulf(shi->vno, -1.0f);
+ }
+
+ /* init material vars */
+ // note, keep this synced with render_types.h
+ memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));
+ shi->har= shi->mat->har;
+
+ /* render */
+ shade_input_set_shade_texco(shi);
+ shade_material_loop(shi, shr); /* todo: nodes */
+
+ VECCOPY(rad, shr->combined);
+}
+
+static void occ_build_shade(Render *re, OcclusionTree *tree)
+{
+ ShadeSample ssamp;
+ ObjectInstanceRen *obi;
+ VlakRen *vlr;
+ int a;
+
+ R= *re;
+
+ /* setup shade sample with correct passes */
+ memset(&ssamp, 0, sizeof(ShadeSample));
+ ssamp.shi[0].lay= re->scene->lay;
+ ssamp.shi[0].passflag= SCE_PASS_DIFFUSE|SCE_PASS_RGBA;
+ ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC);
+ ssamp.tot= 1;
+
+ for(a=0; a<tree->totface; a++) {
+ obi= &R.objectinstance[tree->face[a].obi];
+ vlr= RE_findOrAddVlak(obi->obr, tree->face[a].vlr);
+
+ occ_shade(&ssamp, obi, vlr, tree->rad[a]);
+ }
+}
+#endif
+
+/* ------------------------- Spherical Harmonics --------------------------- */
+
+/* Use 2nd order SH => 9 coefficients, stored in this order:
+ 0 = (0,0),
+ 1 = (1,-1), 2 = (1,0), 3 = (1,1),
+ 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
+
+static void sh_copy(float *shresult, float *sh)
+{
+ memcpy(shresult, sh, sizeof(float)*9);
+}
+
+static void sh_mul(float *sh, float f)
+{
+ int i;
+
+ for(i=0; i<9; i++)
+ sh[i] *= f;
+}
+
+static void sh_add(float *shresult, float *sh1, float *sh2)
+{
+ int i;
+
+ for(i=0; i<9; i++)
+ shresult[i]= sh1[i] + sh2[i];
+}
+
+static void sh_from_disc(float *n, float area, float *shresult)
+{
+ /* See formula (3) in:
+ "An Efficient Representation for Irradiance Environment Maps" */
+ float sh[9], x, y, z;
+
+ x= n[0];
+ y= n[1];
+ z= n[2];
+
+ sh[0]= 0.282095f;
+
+ sh[1]= 0.488603f*y;
+ sh[2]= 0.488603f*z;
+ sh[3]= 0.488603f*x;
+
+ sh[4]= 1.092548f*x*y;
+ sh[5]= 1.092548f*y*z;
+ sh[6]= 0.315392f*(3.0f*z*z - 1.0f);
+ sh[7]= 1.092548f*x*z;
+ sh[8]= 0.546274f*(x*x - y*y);
+
+ sh_mul(sh, area);
+ sh_copy(shresult, sh);
+}
+
+static float sh_eval(float *sh, float *v)
+{
+ /* See formula (13) in:
+ "An Efficient Representation for Irradiance Environment Maps" */
+ static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
+ static const float c4 = 0.886227f, c5 = 0.247708f;
+ float x, y, z, sum;
+
+ x= v[0];
+ y= v[1];
+ z= v[2];
+
+ sum= c1*sh[8]*(x*x - y*y);
+ sum += c3*sh[6]*z*z;
+ sum += c4*sh[0];
+ sum += -c5*sh[6];
+ sum += 2.0f*c1*(sh[4]*x*y + sh[7]*x*z + sh[5]*y*z);
+ sum += 2.0f*c2*(sh[3]*x + sh[1]*y + sh[2]*z);
+
+ return sum;
+}
+
+/* ------------------------------ Building --------------------------------- */
+
+static void occ_face(const OccFace *face, float *co, float *normal, float *area)
+{
+ ObjectInstanceRen *obi;
+ VlakRen *vlr;
+
+ obi= &R.objectinstance[face->obi];
+ vlr= RE_findOrAddVlak(obi->obr, face->facenr);
+
+ if(co) {
+ if(vlr->v4)
+ VecLerpf(co, vlr->v1->co, vlr->v3->co, 0.5f);
+ else
+ CalcCent3f(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
+
+ if(obi->flag & R_TRANSFORMED)
+ Mat4MulVecfl(obi->mat, co);
+ }
+
+ if(normal) {
+ normal[0]= -vlr->n[0];
+ normal[1]= -vlr->n[1];
+ normal[2]= -vlr->n[2];
+
+ if(obi->flag & R_TRANSFORMED) {
+ float xn, yn, zn, (*imat)[3]= obi->imat;
+
+ xn= normal[0];
+ yn= normal[1];
+ zn= normal[2];
+
+ normal[0]= imat[0][0]*xn + imat[0][1]*yn + imat[0][2]*zn;
+ normal[1]= imat[1][0]*xn + imat[1][1]*yn + imat[1][2]*zn;
+ normal[2]= imat[2][0]*xn + imat[2][1]*yn + imat[2][2]*zn;
+ }
+ }
+
+ if(area) {
+ /* todo: correct area for instances */
+ if(vlr->v4)
+ *area= AreaQ3Dfl(vlr->v1->co, vlr->v2->co, vlr->v3->co, vlr->v4->co);
+ else
+ *area= AreaT3Dfl(vlr->v1->co, vlr->v2->co, vlr->v3->co);
+ }
+}
+
+static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
+{
+ OccNode *child;
+ float occ, area, totarea;
+ int a, b;
+
+ occ= 0.0f;
+ totarea= 0.0f;
+
+ for(b=0; b<TOTCHILD; b++) {
+ if(node->childflag & (1<<b)) {
+ a= node->child[b].face;
+ occ_face(&tree->face[a], 0, 0, &area);
+ occ += area*tree->occlusion[a];
+ totarea += area;
+ }
+ else if(node->child[b].node) {
+ child= node->child[b].node;
+ occ_sum_occlusion(tree, child);
+
+ occ += child->area*child->occlusion;
+ totarea += child->area;
+ }
+ }
+
+ if(totarea != 0.0f)
+ occ /= totarea;
+
+ node->occlusion= occ;
+}
+
+static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
+{
+ float len, maxlen= -1.0f;
+ int a, axis = 0;
+
+ INIT_MINMAX(min, max);
+
+ for(a=begin; a<end; a++)
+ DO_MINMAX(tree->co[a], min, max)
+
+ for(a=0; a<3; a++) {
+ len= max[a] - min[a];
+
+ if(len > maxlen) {
+ maxlen= len;
+ axis= a;
+ }
+ }
+
+ return axis;
+}
+
+void occ_node_from_face(OccFace *face, OccNode *node)
+{
+ float n[3];
+
+ occ_face(face, node->co, n, &node->area);
+ node->dco= 0.0f;
+ sh_from_disc(n, node->area, node->sh);
+}
+
+static void occ_build_dco(OcclusionTree *tree, OccNode *node, float *co, float *dco)
+{
+ OccNode *child;
+ float dist, d[3], nco[3];
+ int b;
+
+ for(b=0; b<TOTCHILD; b++) {
+ if(node->childflag & (1<<b)) {
+ occ_face(tree->face+node->child[b].face, nco, 0, 0);
+ }
+ else if(node->child[b].node) {
+ child= node->child[b].node;
+ occ_build_dco(tree, child, co, dco);
+ VECCOPY(nco, child->co);
+ }
+
+ VECSUB(d, nco, co);
+ dist= INPR(d, d);
+ if(dist > *dco)
+ *dco= dist;
+ }
+}
+
+static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
+{
+ float min[3], max[3], mid;
+ int axis, a, enda;
+
+ /* split in middle of boundbox. this seems faster than median split
+ * on complex scenes, possibly since it avoids two distant faces to
+ * be in the same node better? */
+ axis= occ_find_bbox_axis(tree, begin, end, min, max);
+ mid= 0.5f*(min[axis]+max[axis]);
+
+ a= begin;
+ enda= end;
+ while(a<enda) {
+ if(tree->co[a][axis] > mid) {
+ enda--;
+ SWAP(OccFace, tree->face[a], tree->face[enda]);
+ SWAP(float, tree->co[a][0], tree->co[enda][0]);
+ SWAP(float, tree->co[a][1], tree->co[enda][1]);
+ SWAP(float, tree->co[a][2], tree->co[enda][2]);
+ }
+ else
+ a++;
+ }
+
+ *split= enda;
+}
+
+static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
+{
+ /* split faces into eight groups */
+ int b, splitx, splity[2], splitz[4];
+
+ occ_build_split(tree, begin, end, &splitx);
+
+ occ_build_split(tree, begin, splitx, &splity[0]);
+ occ_build_split(tree, splitx, end, &splity[1]);
+
+ occ_build_split(tree, begin, splity[0], &splitz[0]);
+ occ_build_split(tree, splity[0], splitx, &splitz[1]);
+ occ_build_split(tree, splitx, splity[1], &splitz[2]);
+ occ_build_split(tree, splity[1], end, &splitz[3]);
+
+ offset[0]= begin;
+ offset[1]= splitz[0];
+ offset[2]= splity[0];
+ offset[3]= splitz[1];
+ offset[4]= splitx;
+ offset[5]= splitz[2];
+ offset[6]= splity[1];
+ offset[7]= splitz[3];
+
+ for(b=0; b<7; b++)
+ count[b]= offset[b+1] - offset[b];
+ count[7]= end - offset[7];
+}
+
+static OccNode *occ_build_recursive(OcclusionTree *tree, int begin, int end, int depth)
+{
+ OccNode *node, *child, tmpnode;
+ OccFace *face;
+ int a, b, offset[TOTCHILD], count[TOTCHILD];
+
+ /* keep track of maximum depth for stack */
+ if(depth > tree->maxdepth)
+ tree->maxdepth= depth;
+
+ /* add a new node */
+ node= BLI_memarena_alloc(tree->arena, sizeof(OccNode));
+ node->occlusion= 1.0f;
+
+ /* leaf node with only children */
+ if(end - begin <= TOTCHILD) {
+ for(a=begin, b=0; a<end; a++, b++) {
+ face= &tree->face[a];
+ node->child[b].face= a;
+ node->childflag |= (1<<b);
+ }
+ }
+ else {
+ /* order faces */
+ occ_build_8_split(tree, begin, end, offset, count);
+
+ for(b=0; b<TOTCHILD; b++) {
+ if(count[b] == 0) {
+ node->child[b].node= NULL;
+ }
+ else if(count[b] == 1) {
+ face= &tree->face[offset[b]];
+ node->child[b].face= offset[b];
+ node->childflag |= (1<<b);
+ }
+ else {
+ child= occ_build_recursive(tree, offset[b], offset[b]+count[b], depth+1);
+ node->child[b].node= child;
+ }
+ }
+ }
+
+ /* combine area, position and sh */
+ for(b=0; b<TOTCHILD; b++) {
+ if(node->childflag & (1<<b)) {
+ child= &tmpnode;
+ occ_node_from_face(tree->face+node->child[b].face, &tmpnode);
+ }
+ else {
+ child= node->child[b].node;
+ }
+
+ if(child) {
+ node->area += child->area;
+ sh_add(node->sh, node->sh, child->sh);
+ VECADDFAC(node->co, node->co, child->co, child->area);
+ }
+ }
+
+ if(node->area != 0.0f)
+ VecMulf(node->co, 1.0f/node->area);
+
+ /* compute maximum distance from center */
+ node->dco= 0.0f;
+ occ_build_dco(tree, node, node->co, &node->dco);
+
+ return node;
+}
+
+static void occ_build_sh_normalize(OccNode *node)
+{
+ /* normalize spherical harmonics to not include area, so
+ * we can clamp the dot product and then mutliply by area */
+ int b;
+
+ if(node->area != 0.0f)
+ sh_mul(node->sh, 1.0f/node->area);
+
+ for(b=0; b<TOTCHILD; b++) {
+ if(node->childflag & (1<<b));
+ else if(node->child[b].node)
+ occ_build_sh_normalize(node->child[b].node);
+ }
+}
+
+static OcclusionTree *occ_tree_build(Render *re)
+{
+ OcclusionTree *tree;
+ ObjectInstanceRen *obi;
+ ObjectRen *obr;
+ VlakRen *vlr= NULL;
+ int a, b, c, totface;
+
+ /* count */
+ totface= 0;
+ for(obi=re->instancetable.first; obi; obi=obi->next) {
+ if(obi->flag & R_TRANSFORMED)
+ continue;
+
+ obr= obi->obr;
+ for(a=0; a<obr->totvlak; a++) {
+ if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
+ else vlr++;
+
+ if(vlr->mat->mode & MA_TRACEBLE)
+ totface++;
+ }
+ }
+
+ if(totface == 0)
+ return NULL;
+
+ tree= MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
+ tree->totface= totface;
+
+ /* parameters */
+ tree->error= re->wrld.ao_approx_error;
+ tree->distfac= (re->wrld.aomode & WO_AODIST)? re->wrld.aodistfac: 0.0f;
+
+ /* allocation */
+ tree->arena= BLI_memarena_new(0x8000 * sizeof(OccNode));
+ BLI_memarena_use_calloc(tree->arena);
+
+ if(re->wrld.aomode & WO_AOCACHE)
+ tree->cache= MEM_callocN(sizeof(OcclusionCache)*BLENDER_MAX_THREADS, "OcclusionCache");
+
+ tree->face= MEM_callocN(sizeof(OccFace)*totface, "OcclusionFace");
+ tree->co= MEM_callocN(sizeof(float)*3*totface, "OcclusionCo");
+ tree->occlusion= MEM_callocN(sizeof(float)*totface, "OcclusionOcclusion");
+
+ /* make array of face pointers */
+ for(b=0, c=0, obi=re->instancetable.first; obi; obi=obi->next, c++) {
+ if(obi->flag & R_TRANSFORMED)
+ continue; /* temporary to avoid slow renders with loads of duplis */
+
+ obr= obi->obr;
+ for(a=0; a<obr->totvlak; a++) {
+ if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
+ else vlr++;
+
+ if(vlr->mat->mode & MA_TRACEBLE) {
+ tree->face[b].obi= c;
+ tree->face[b].facenr= a;
+ tree->occlusion[b]= 1.0f;
+ occ_face(&tree->face[b], tree->co[b], NULL, NULL);
+ b++;
+ }
+ }
+ }
+
+ /* recurse */
+ tree->root= occ_build_recursive(tree, 0, totface, 1);
+
+#if 0
+ if(tree->doindirect) {
+ occ_build_shade(re, tree);
+ occ_sum_occlusion(tree, tree->root);
+ }
+#endif
+
+ MEM_freeN(tree->co);
+ tree->co= NULL;
+
+ occ_build_sh_normalize(tree->root);
+
+ for(a=0; a<BLENDER_MAX_THREADS; a++)
+ tree->stack[a]= MEM_callocN(sizeof(OccNode)*TOTCHILD*(tree->maxdepth+1), "OccStack");
+
+ return tree;
+}
+
+static void occ_free_tree(OcclusionTree *tree)
+{
+ int a;
+
+ if(tree) {
+ if(tree->arena) BLI_memarena_free(tree->arena);
+ for(a=0; a<BLENDER_MAX_THREADS; a++)
+ if(tree->stack[a])
+ MEM_freeN(tree->stack[a]);
+ if(tree->occlusion) MEM_freeN(tree->occlusion);
+ if(tree->face) MEM_freeN(tree->face);
+ if(tree->cache) MEM_freeN(tree->cache);
+ MEM_freeN(tree);
+ }
+}
+
+/* ------------------------- Traversal --------------------------- */
+
+static float occ_solid_angle(OccNode *node, float *v, float d2, float *receivenormal)
+{
+ float dotreceive, dotemit, invd2 = 1.0f/sqrtf(d2);
+ float ev[3];
+
+ ev[0]= -v[0]*invd2;
+ ev[1]= -v[1]*invd2;
+ ev[2]= -v[2]*invd2;
+ dotemit= sh_eval(node->sh, ev);
+ dotreceive= INPR(receivenormal, v)*invd2;
+
+ CLAMP(dotemit, 0.0f, 1.0f);
+ CLAMP(dotreceive, 0.0f, 1.0f);
+
+ return ((node->area*dotemit*dotreceive)/(d2 + node->area*INVPI))*INVPI;
+}
+
+static void VecAddDir(float *result, float *v1, float *v2, float fac)
+{
+ result[0]= v1[0] + fac*(v2[0] - v1[0]);
+ result[1]= v1[1] + fac*(v2[1] - v1[1]);
+ result[2]= v1[2] + fac*(v2[2] - v1[2]);
+}
+
+static int occ_visible_quad(float *p, float *n, float *v0, float *v1, float *v2, float *q0, float *q1, float *q2, float *q3)
+{
+ static const float epsilon = 1e-6f;
+ float c, sd[3];
+
+ c= INPR(n, p);
+
+ /* signed distances from the vertices to the plane. */
+ sd[0]= INPR(n, v0) - c;
+ sd[1]= INPR(n, v1) - c;
+ sd[2]= INPR(n, v2) - c;
+
+ if(fabs(sd[0]) < epsilon) sd[0] = 0.0f;
+ if(fabs(sd[1]) < epsilon) sd[1] = 0.0f;
+ if(fabs(sd[2]) < epsilon) sd[2] = 0.0f;
+
+ if(sd[0] > 0) {
+ if(sd[1] > 0) {
+ if(sd[2] > 0) {
+ // +++
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ else if(sd[2] < 0) {
+ // ++-
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
+ VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
+ }
+ else {
+ // ++0
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ }
+ else if(sd[1] < 0) {
+ if(sd[2] > 0) {
+ // +-+
+ VECCOPY(q0, v0);
+ VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
+ VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
+ VECCOPY(q3, v2);
+ }
+ else if(sd[2] < 0) {
+ // +--
+ VECCOPY(q0, v0);
+ VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
+ VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
+ VECCOPY(q3, q2);
+ }
+ else {
+ // +-0
+ VECCOPY(q0, v0);
+ VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ }
+ else {
+ if(sd[2] > 0) {
+ // +0+
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ else if(sd[2] < 0) {
+ // +0-
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
+ VECCOPY(q3, q2);
+ }
+ else {
+ // +00
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ }
+ }
+ else if(sd[0] < 0) {
+ if(sd[1] > 0) {
+ if(sd[2] > 0) {
+ // -++
+ VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
+ }
+ else if(sd[2] < 0) {
+ // -+-
+ VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
+ VECCOPY(q1, v1);
+ VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
+ VECCOPY(q3, q2);
+ }
+ else {
+ // -+0
+ VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ }
+ else if(sd[1] < 0) {
+ if(sd[2] > 0) {
+ // --+
+ VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
+ VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ else if(sd[2] < 0) {
+ // ---
+ return 0;
+ }
+ else {
+ // --0
+ return 0;
+ }
+ }
+ else {
+ if(sd[2] > 0) {
+ // -0+
+ VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ else if(sd[2] < 0) {
+ // -0-
+ return 0;
+ }
+ else {
+ // -00
+ return 0;
+ }
+ }
+ }
+ else {
+ if(sd[1] > 0) {
+ if(sd[2] > 0) {
+ // 0++
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ else if(sd[2] < 0) {
+ // 0+-
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
+ VECCOPY(q3, q2);
+ }
+ else {
+ // 0+0
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ }
+ else if(sd[1] < 0) {
+ if(sd[2] > 0) {
+ // 0-+
+ VECCOPY(q0, v0);
+ VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ else if(sd[2] < 0) {
+ // 0--
+ return 0;
+ }
+ else {
+ // 0-0
+ return 0;
+ }
+ }
+ else {
+ if(sd[2] > 0) {
+ // 00+
+ VECCOPY(q0, v0);
+ VECCOPY(q1, v1);
+ VECCOPY(q2, v2);
+ VECCOPY(q3, q2);
+ }
+ else if(sd[2] < 0) {
+ // 00-
+ return 0;
+ }
+ else {
+ // 000
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+/* altivec optimization, this works, but is unused */
+
+#if 0
+#include <Accelerate/Accelerate.h>
+
+typedef union {
+ vFloat v;
+ float f[4];
+} vFloatResult;
+
+static vFloat vec_splat_float(float val)
+{
+ return (vFloat){val, val, val, val};
+}
+
+static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
+{
+ vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle;
+ vUInt8 rotate = (vUInt8){4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3};
+ vFloatResult vresult;
+ float result;
+
+ /* compute r* */
+ vrx = (vFloat){q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]);
+ vry = (vFloat){q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]);
+ vrz = (vFloat){q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]);
+
+ /* normalize r* */
+ rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f));
+ vrx = vrx*rlen;
+ vry = vry*rlen;
+ vrz = vrz*rlen;
+
+ /* rotate r* for cross and dot */
+ vsrx= vec_perm(vrx, vrx, rotate);
+ vsry= vec_perm(vry, vry, rotate);
+ vsrz= vec_perm(vrz, vrz, rotate);
+
+ /* cross product */
+ gx = vsry*vrz - vsrz*vry;
+ gy = vsrz*vrx - vsrx*vrz;
+ gz = vsrx*vry - vsry*vrx;
+
+ /* normalize */
+ rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f));
+ gx = gx*rlen;
+ gy = gy*rlen;
+ gz = gz*rlen;
+
+ /* angle */
+ vcos = vrx*vsrx + vry*vsry + vrz*vsrz;
+ vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f));
+ vangle= vacosf(vcos);
+
+ /* dot */
+ vresult.v = (vec_splat_float(n[0])*gx +
+ vec_splat_float(n[1])*gy +
+ vec_splat_float(n[2])*gz)*vangle;
+
+ result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI);
+ result= MAX2(result, 0.0f);
+
+ return result;
+}
+
+#endif
+
+/* SSE optimization, acos code doesn't work */
+
+#if 0
+
+#include <xmmintrin.h>
+
+static __m128 sse_approx_acos(__m128 x)
+{
+ /* needs a better approximation than taylor expansion of acos, since that
+ * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work
+ * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */
+
+ return _mm_set_ps1(1.0f);
+}
+
+static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
+{
+ float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
+ float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
+ float fresult[4] __attribute__((aligned(16)));
+ __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult;
+
+ /* compute r */
+ qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]);
+ qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]);
+ qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]);
+
+ rx = qx - _mm_set_ps1(p[0]);
+ ry = qy - _mm_set_ps1(p[1]);
+ rz = qz - _mm_set_ps1(p[2]);
+
+ /* normalize r */
+ rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f));
+ rx = rx*rlen;
+ ry = ry*rlen;
+ rz = rz*rlen;
+
+ /* cross product */
+ srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1));
+ sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1));
+ srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1));
+
+ gx = sry*rz - srz*ry;
+ gy = srz*rx - srx*rz;
+ gz = srx*ry - sry*rx;
+
+ /* normalize g */
+ glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f));
+ gx = gx*glen;
+ gy = gy*glen;
+ gz = gz*glen;
+
+ /* compute angle */
+ rcos = rx*srx + ry*sry + rz*srz;
+ rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f));
+
+ angle = sse_approx_cos(rcos);
+ aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle;
+
+ /* sum together */
+ result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI);
+ result= MAX2(result, 0.0f);
+
+ return result;
+}
+
+#endif
+
+static float saacosf(float fac)
+{
+ if(fac<= -1.0f) return (float)M_PI;
+ else if(fac>=1.0f) return 0.0f;
+ else return acos(fac); /* acosf(fac) */
+}
+
+static void normalizef(float *n)
+{
+ float d;
+
+ d= INPR(n, n);
+
+ if(d > 1.0e-35F) {
+ d= 1.0f/sqrt(d); /* sqrtf(d) */
+
+ n[0] *= d;
+ n[1] *= d;
+ n[2] *= d;
+ }
+}
+
+static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
+{
+ float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
+ float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
+
+ VECSUB(r0, q0, p);
+ VECSUB(r1, q1, p);
+ VECSUB(r2, q2, p);
+ VECSUB(r3, q3, p);
+
+ normalizef(r0);
+ normalizef(r1);
+ normalizef(r2);
+ normalizef(r3);
+
+ Crossf(g0, r1, r0); normalizef(g0);
+ Crossf(g1, r2, r1); normalizef(g1);
+ Crossf(g2, r3, r2); normalizef(g2);
+ Crossf(g3, r0, r3); normalizef(g3);
+
+ a1= saacosf(INPR(r0, r1));
+ a2= saacosf(INPR(r1, r2));
+ a3= saacosf(INPR(r2, r3));
+ a4= saacosf(INPR(r3, r0));
+
+ dot1= INPR(n, g0);
+ dot2= INPR(n, g1);
+ dot3= INPR(n, g2);
+ dot4= INPR(n, g3);
+
+ result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI;
+ result= MAX2(result, 0.0f);
+
+ return result;
+}
+
+float occ_form_factor(OccFace *face, float *p, float *n)
+{
+ ObjectInstanceRen *obi;
+ VlakRen *vlr;
+ float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib= 0.0f;
+
+ obi= &R.objectinstance[face->obi];
+ vlr= RE_findOrAddVlak(obi->obr, face->facenr);
+
+ VECCOPY(v1, vlr->v1->co);
+ VECCOPY(v2, vlr->v2->co);
+ VECCOPY(v3, vlr->v3->co);
+
+ if(obi->flag & R_TRANSFORMED) {
+ Mat4MulVecfl(obi->mat, v1);
+ Mat4MulVecfl(obi->mat, v2);
+ Mat4MulVecfl(obi->mat, v3);
+ }
+
+ if(occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
+ contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
+
+ if(vlr->v4) {
+ VECCOPY(v4, vlr->v4->co);
+ if(obi->flag & R_TRANSFORMED)
+ Mat4MulVecfl(obi->mat, v4);
+
+ if(occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
+ contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
+ }
+
+ return contrib;
+}
+
+static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, float *pp, float *pn, float *occ, float *bentn)
+{
+ OccNode *node, **stack;
+ OccFace *face;
+ float resultocc, v[3], p[3], n[3], co[3];
+ float distfac, error, d2, weight, emitarea;
+ int b, totstack;
+
+ /* init variables */
+ VECCOPY(p, pp);
+ VECCOPY(n, pn);
+ VECADDFAC(p, p, n, 1e-4f);
+
+ if(bentn)
+ VECCOPY(bentn, n);
+
+ error= tree->error;
+ distfac= tree->distfac;
+
+ resultocc= 0.0f;
+
+ /* init stack */
+ stack= tree->stack[thread];
+ stack[0]= tree->root;
+ totstack= 1;
+
+ while(totstack) {
+ /* pop point off the stack */
+ node= stack[--totstack];
+
+ VECSUB(v, node->co, p);
+ d2= INPR(v, v) + 1e-16f;
+ emitarea= MAX2(node->area, node->dco);
+
+ if(d2*error > emitarea) {
+ /* accumulate occlusion from spherical harmonics */
+ weight= occ_solid_angle(node, v, d2, n);
+ weight *= node->occlusion;
+
+ if(bentn) {
+ Normalize(v);
+ bentn[0] -= weight*v[0];
+ bentn[1] -= weight*v[1];
+ bentn[2] -= weight*v[2];
+ }
+
+ if(distfac != 0.0f)
+ weight /= (1.0 + distfac*d2);
+
+ resultocc += weight;
+ }
+ else {
+ /* traverse into children */
+ for(b=0; b<TOTCHILD; b++) {
+ if(node->childflag & (1<<b)) {
+ face= tree->face+node->child[b].face;
+
+ /* accumulate occlusion with face form factor */
+ if(!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
+ weight= occ_form_factor(face, p, n);
+ weight *= tree->occlusion[node->child[b].face];
+
+ if(bentn || distfac != 0.0f) {
+ occ_face(face, co, NULL, NULL);
+ VECSUB(v, co, p);
+
+ if(bentn) {
+ Normalize(v);
+ bentn[0] -= weight*v[0];
+ bentn[1] -= weight*v[1];
+ bentn[2] -= weight*v[2];
+ }
+
+ if(distfac != 0.0f) {
+ d2= INPR(v, v) + 1e-16f;
+ weight /= (1.0 + distfac*d2);
+ }
+ }
+
+ resultocc += weight;
+
+ }
+ }
+ else if(node->child[b].node) {
+ /* push child on the stack */
+ stack[totstack++]= node->child[b].node;
+ }
+ }
+ }
+ }
+
+ if(occ) *occ= resultocc;
+}
+
+static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
+{
+ float *occ, co[3], n[3];
+ int pass, i;
+
+ occ= MEM_callocN(sizeof(float)*tree->totface, "OcclusionPassOcc");
+
+ for(pass=0; pass<totpass; pass++) {
+ for(i=0; i<tree->totface; i++) {
+ occ_face(&tree->face[i], co, n, NULL);
+ VecMulf(n, -1.0f);
+ VECADDFAC(co, co, n, 1e-8f);
+
+ occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL);
+ if(re->test_break())
+ break;
+ }
+
+ if(re->test_break())
+ break;
+
+ for(i=0; i<tree->totface; i++) {
+ tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
+ if(tree->occlusion[i] < 0.0f)
+ tree->occlusion[i]= 0.0f;
+ }
+
+ occ_sum_occlusion(tree, tree->root);
+ }
+
+ MEM_freeN(occ);
+}
+
+static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, float *co, float *n, int thread, int onlyshadow, float *skycol)
+{
+ float nn[3], bn[3], dxyview[3], fac, occ, occlusion, correction;
+ int aocolor;
+
+ aocolor= re->wrld.aocolor;
+ if(onlyshadow)
+ aocolor= WO_AOPLAIN;
+
+ VECCOPY(nn, n);
+ VecMulf(nn, -1.0f);
+
+ occ_lookup(tree, thread, exclude, co, nn, &occ, (aocolor)? bn: NULL);
+
+ correction= re->wrld.ao_approx_correction;
+
+ occlusion= (1.0f-correction)*(1.0f-occ);
+ CLAMP(occlusion, 0.0f, 1.0f);
+ if(correction != 0.0f)
+ occlusion += correction*exp(-occ);
+
+ if(aocolor) {
+ /* sky shading using bent normal */
+ if(aocolor==WO_AOSKYCOL) {
+ fac= 0.5*(1.0f+bn[0]*re->grvec[0]+ bn[1]*re->grvec[1]+ bn[2]*re->grvec[2]);
+ skycol[0]= (1.0f-fac)*re->wrld.horr + fac*re->wrld.zenr;
+ skycol[1]= (1.0f-fac)*re->wrld.horg + fac*re->wrld.zeng;
+ skycol[2]= (1.0f-fac)*re->wrld.horb + fac*re->wrld.zenb;
+ }
+ else { /* WO_AOSKYTEX */
+ dxyview[0]= 1.0f;
+ dxyview[1]= 1.0f;
+ dxyview[2]= 0.0f;
+ shadeSkyView(skycol, co, bn, dxyview);
+ }
+
+ VecMulf(skycol, occlusion);
+ }
+ else {
+ skycol[0]= occlusion;
+ skycol[1]= occlusion;
+ skycol[2]= occlusion;
+ }
+}
+
+/* ---------------------------- Caching ------------------------------- */
+
+static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
+{
+ x -= cache->x;
+ y -= cache->y;
+
+ x /= cache->step;
+ y /= cache->step;
+ x *= cache->step;
+ y *= cache->step;
+
+ if(x < 0 || x >= cache->w || y < 0 || y >= cache->h)
+ return NULL;
+ else
+ return &cache->sample[y*cache->w + x];
+}
+
+static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *col)
+{
+ OcclusionCache *cache;
+ OcclusionCacheSample *samples[4], *sample;
+ float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
+ float d[3], dist2;
+ int i, x1, y1, x2, y2;
+
+ if(!tree->cache)
+ return 0;
+
+ /* first try to find a sample in the same pixel */
+ cache= &tree->cache[thread];
+
+ if(cache->sample && cache->step) {
+ sample= &cache->sample[(y-cache->y)*cache->w + (x-cache->x)];
+ if(sample->filled) {
+ VECSUB(d, sample->co, co);
+ dist2= INPR(d, d);
+ if(dist2 < 0.5f*sample->dist2 && INPR(sample->n, n) > 0.98f) {
+ VECCOPY(col, sample->col);
+ return 1;
+ }
+ }
+ }
+ else
+ return 0;
+
+ /* try to interpolate between 4 neighbouring pixels */
+ samples[0]= find_occ_sample(cache, x, y);
+ samples[1]= find_occ_sample(cache, x+cache->step, y);
+ samples[2]= find_occ_sample(cache, x, y+cache->step);
+ samples[3]= find_occ_sample(cache, x+cache->step, y+cache->step);
+
+ for(i=0; i<4; i++)
+ if(!samples[i] || !samples[i]->filled)
+ return 0;
+
+ /* require intensities not being too different */
+ mino= MIN4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
+ maxo= MAX4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
+
+ if(maxo - mino > 0.05f)
+ return 0;
+
+ /* compute weighted interpolation between samples */
+ col[0]= col[1]= col[2]= 0.0f;
+ totw= 0.0f;
+
+ x1= samples[0]->x;
+ y1= samples[0]->y;
+ x2= samples[3]->x;
+ y2= samples[3]->y;
+
+ tx= (float)(x2 - x)/(float)(x2 - x1);
+ ty= (float)(y2 - y)/(float)(y2 - y1);
+
+ wb[3]= (1.0f-tx)*(1.0f-ty);
+ wb[2]= (tx)*(1.0f-ty);
+ wb[1]= (1.0f-tx)*(ty);
+ wb[0]= tx*ty;
+
+ for(i=0; i<4; i++) {
+ VECSUB(d, samples[i]->co, co);
+ dist2= INPR(d, d);
+
+ wz[i]= 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
+ wn[i]= pow(INPR(samples[i]->n, n), 32.0f);
+
+ w= wb[i]*wn[i]*wz[i];
+
+ totw += w;
+ col[0] += w*samples[i]->col[0];
+ col[1] += w*samples[i]->col[1];
+ col[2] += w*samples[i]->col[2];
+ }
+
+ if(totw >= 0.9f) {
+ totw= 1.0f/totw;
+ col[0] *= totw;
+ col[1] *= totw;
+ col[2] *= totw;
+ return 1;
+ }
+
+ return 0;
+}
+
+static void sample_occ_cache_mesh(ShadeInput *shi)
+{
+ StrandRen *strand= shi->strand;
+ OcclusionCacheMesh *mesh= strand->buffer->occlusionmesh;
+ int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
+ float w[4], *co1, *co2, *co3, *co4;
+
+ if(mesh && index) {
+ face= mesh->face[*index];
+
+ co1= mesh->co[face[0]];
+ co2= mesh->co[face[1]];
+ co3= mesh->co[face[2]];
+ co4= (face[3])? mesh->co[face[3]]: NULL;
+
+ InterpWeightsQ3Dfl(co1, co2, co3, co4, strand->vert->co, w);
+
+ shi->ao[0]= shi->ao[1]= shi->ao[2]= 0.0f;
+ VECADDFAC(shi->ao, shi->ao, mesh->col[face[0]], w[0]);
+ VECADDFAC(shi->ao, shi->ao, mesh->col[face[1]], w[1]);
+ VECADDFAC(shi->ao, shi->ao, mesh->col[face[2]], w[2]);
+ if(face[3])
+ VECADDFAC(shi->ao, shi->ao, mesh->col[face[3]], w[3]);
+ }
+ else {
+ shi->ao[0]= 1.0f;
+ shi->ao[1]= 1.0f;
+ shi->ao[2]= 1.0f;
+ }
+}
+
+void *cache_occ_mesh(Render *re, ObjectRen *obr, DerivedMesh *dm, float mat[][4])
+{
+ OcclusionCacheMesh *mesh;
+ MFace *mface;
+ MVert *mvert;
+ int a, totvert, totface;
+
+ totvert= dm->getNumVerts(dm);
+ totface= dm->getNumFaces(dm);
+
+ mesh= re->occlusionmesh.last;
+ if(mesh && mesh->obr->ob == obr->ob && mesh->obr->par == obr->par
+ && mesh->totvert==totvert && mesh->totface==totface)
+ return mesh;
+
+ mesh= MEM_callocN(sizeof(OcclusionCacheMesh), "OcclusionCacheMesh");
+ mesh->obr= obr;
+ mesh->totvert= totvert;
+ mesh->totface= totface;
+ mesh->co= MEM_callocN(sizeof(float)*3*mesh->totvert, "OcclusionMeshCo");
+ mesh->face= MEM_callocN(sizeof(int)*4*mesh->totface, "OcclusionMeshFaces");
+ mesh->col= MEM_callocN(sizeof(float)*3*mesh->totvert, "OcclusionMeshCol");
+
+ mvert= dm->getVertArray(dm);
+ for(a=0; a<mesh->totvert; a++, mvert++) {
+ VECCOPY(mesh->co[a], mvert->co);
+ Mat4MulVecfl(mat, mesh->co[a]);
+ }
+
+ mface= dm->getFaceArray(dm);
+ for(a=0; a<mesh->totface; a++, mface++) {
+ mesh->face[a][0]= mface->v1;
+ mesh->face[a][1]= mface->v2;
+ mesh->face[a][2]= mface->v3;
+ mesh->face[a][3]= mface->v4;
+ }
+
+ BLI_addtail(&re->occlusionmesh, mesh);
+
+ return mesh;
+}
+
+/* ------------------------- External Functions --------------------------- */
+
+void make_occ_tree(Render *re)
+{
+ OcclusionCacheMesh *mesh;
+ float col[3], co[3], n[3], *co1, *co2, *co3, *co4;
+ int a, *face, *count;
+
+ /* ugly, needed for occ_face */
+ R= *re;
+
+ re->i.infostr= "Occlusion preprocessing";
+ re->stats_draw(&re->i);
+
+ re->occlusiontree= occ_tree_build(re);
+
+ if(re->occlusiontree) {
+ if(re->wrld.ao_approx_passes)
+ occ_compute_passes(re, re->occlusiontree, re->wrld.ao_approx_passes);
+
+ for(mesh=re->occlusionmesh.first; mesh; mesh=mesh->next) {
+ count= MEM_callocN(sizeof(int)*mesh->totvert, "OcclusionCount");
+
+ for(a=0; a<mesh->totface; a++) {
+ face= mesh->face[a];
+ co1= mesh->co[face[0]];
+ co2= mesh->co[face[1]];
+ co3= mesh->co[face[2]];
+
+ if(face[3]) {
+ co4= mesh->co[face[3]];
+
+ VecLerpf(co, co1, co3, 0.5f);
+ CalcNormFloat4(co1, co2, co3, co4, n);
+ }
+ else {
+ CalcCent3f(co, co1, co2, co3);
+ CalcNormFloat(co1, co2, co3, n);
+ }
+ VecMulf(n, -1.0f);
+
+ sample_occ_tree(re, re->occlusiontree, NULL, co, n, 0, 0, col);
+
+ VECADD(mesh->col[face[0]], mesh->col[face[0]], col);
+ count[face[0]]++;
+ VECADD(mesh->col[face[1]], mesh->col[face[1]], col);
+ count[face[1]]++;
+ VECADD(mesh->col[face[2]], mesh->col[face[2]], col);
+ count[face[2]]++;
+
+ if(face[3]) {
+ VECADD(mesh->col[face[3]], mesh->col[face[3]], col);
+ count[face[3]]++;
+ }
+ }
+
+ for(a=0; a<mesh->totvert; a++)
+ if(count[a])
+ VecMulf(mesh->col[a], 1.0f/count[a]);
+
+ MEM_freeN(count);
+ }
+ }
+}
+
+void free_occ(Render *re)
+{
+ OcclusionCacheMesh *mesh;
+
+ if(re->occlusiontree) {
+ occ_free_tree(re->occlusiontree);
+ re->occlusiontree = NULL;
+ }
+
+ for(mesh=re->occlusionmesh.first; mesh; mesh=mesh->next) {
+ if(mesh->co) MEM_freeN(mesh->co);
+ if(mesh->col) MEM_freeN(mesh->col);
+ if(mesh->face) MEM_freeN(mesh->face);
+ }
+
+ BLI_freelistN(&re->occlusionmesh);
+}
+
+void sample_occ(Render *re, ShadeInput *shi)
+{
+ OcclusionTree *tree= re->occlusiontree;
+ OcclusionCache *cache;
+ OcclusionCacheSample *sample;
+ OccFace exclude;
+ int onlyshadow;
+
+ if(tree) {
+ if(shi->strand) {
+ sample_occ_cache_mesh(shi);
+ }
+ /* try to get result from the cache if possible */
+ else if(!sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao)) {
+ /* no luck, let's sample the occlusion */
+ exclude.obi= shi->obi - re->objectinstance;
+ exclude.facenr= shi->vlr->index;
+ onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
+ sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao);
+
+ if(G.rt & 32)
+ shi->ao[2] *= 2.0f;
+
+ /* fill result into sample, each time */
+ if(tree->cache) {
+ cache= &tree->cache[shi->thread];
+
+ if(cache->sample && cache->step) {
+ sample= &cache->sample[(shi->ys-cache->y)*cache->w + (shi->xs-cache->x)];
+ VECCOPY(sample->co, shi->co);
+ VECCOPY(sample->n, shi->vno);
+ VECCOPY(sample->col, shi->ao);
+ sample->intensity= MAX3(sample->col[0], sample->col[1], sample->col[2]);
+ sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
+ sample->filled= 1;
+ }
+ }
+ }
+ }
+ else {
+ shi->ao[0]= 1.0f;
+ shi->ao[1]= 1.0f;
+ shi->ao[2]= 1.0f;
+ }
+}
+
+void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
+{
+ OcclusionTree *tree= re->occlusiontree;
+ PixStr ps;
+ OcclusionCache *cache;
+ OcclusionCacheSample *sample;
+ OccFace exclude;
+ ShadeInput *shi;
+ int *ro, *rp, *rz, onlyshadow;
+ int x, y, step = CACHE_STEP;
+
+ if(!tree->cache)
+ return;
+
+ cache= &tree->cache[pa->thread];
+ cache->w= pa->rectx;
+ cache->h= pa->recty;
+ cache->x= pa->disprect.xmin;
+ cache->y= pa->disprect.ymin;
+ cache->step= step;
+ cache->sample= MEM_callocN(sizeof(OcclusionCacheSample)*cache->w*cache->h, "OcclusionCacheSample");
+ sample= cache->sample;
+
+ ps.next= NULL;
+ ps.mask= 0xFFFF;
+
+ ro= pa->recto;
+ rp= pa->rectp;
+ rz= pa->rectz;
+
+ /* compute a sample at every step pixels */
+ for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
+ for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, ro++, rp++, rz++) {
+ if((((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax-1) && (((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax-1)) {
+ if(*rp) {
+ ps.obi= *ro;
+ ps.facenr= *rp;
+ ps.z= *rz;
+
+ shade_samples_fill_with_ps(ssamp, &ps, x, y);
+ shi= ssamp->shi;
+
+ onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
+ exclude.obi= shi->obi - re->objectinstance;
+ exclude.facenr= shi->vlr->index;
+ sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao);
+
+ VECCOPY(sample->co, shi->co);
+ VECCOPY(sample->n, shi->vno);
+ VECCOPY(sample->col, shi->ao);
+ sample->intensity= MAX3(sample->col[0], sample->col[1], sample->col[2]);
+ sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
+ sample->x= shi->xs;
+ sample->y= shi->ys;
+ sample->filled= 1;
+ }
+
+ if(re->test_break())
+ break;
+ }
+
+ sample++;
+ }
+ }
+}
+
+void free_occ_samples(Render *re, RenderPart *pa)
+{
+ OcclusionTree *tree= re->occlusiontree;
+ OcclusionCache *cache;
+
+ if(tree->cache) {
+ cache= &tree->cache[pa->thread];
+
+ if(cache->sample)
+ MEM_freeN(cache->sample);
+
+ cache->w= 0;
+ cache->h= 0;
+ cache->step= 0;
+ }
+}
+
diff --git a/source/blender/render/intern/source/pipeline.c b/source/blender/render/intern/source/pipeline.c
index 0f9d4470164..2c03b0ae6c7 100644
--- a/source/blender/render/intern/source/pipeline.c
+++ b/source/blender/render/intern/source/pipeline.c
@@ -538,6 +538,9 @@ static RenderResult *new_render_result(Render *re, rcti *partrct, int crop, int
if(srl->passflag & SCE_PASS_REFRACT)
render_layer_add_pass(rr, rl, 3, SCE_PASS_REFRACT);
}
+ else if(re->wrld.mode & WO_AMB_OCC)
+ if(re->wrld.ao_gather_method == WO_AOGATHER_APPROX)
+ render_layer_add_pass(rr, rl, 3, SCE_PASS_AO);
if(re->r.mode & R_RADIO)
if(srl->passflag & SCE_PASS_RADIO)
render_layer_add_pass(rr, rl, 3, SCE_PASS_RADIO);
diff --git a/source/blender/render/intern/source/rendercore.c b/source/blender/render/intern/source/rendercore.c
index e2f331afafd..d0ea69dc833 100644
--- a/source/blender/render/intern/source/rendercore.c
+++ b/source/blender/render/intern/source/rendercore.c
@@ -60,6 +60,7 @@
#include "renderpipeline.h"
#include "render_types.h"
#include "renderdatabase.h"
+#include "occlusion.h"
#include "pixelblending.h"
#include "pixelshading.h"
#include "shadbuf.h"
@@ -534,7 +535,11 @@ static void shadeDA_tile(RenderPart *pa, RenderLayer *rl)
/* general shader info, passes */
shade_sample_initialize(&ssamp, pa, rl);
addpassflag= rl->passflag & ~(SCE_PASS_Z|SCE_PASS_COMBINED);
-
+
+ /* occlusion caching */
+ if(R.occlusiontree)
+ cache_occ_samples(&R, pa, &ssamp);
+
/* filtered render, for now we assume only 1 filter size */
if(pa->crop) {
crop= 1;
@@ -582,6 +587,9 @@ static void shadeDA_tile(RenderPart *pa, RenderLayer *rl)
if(R.r.mode & R_SHADOW)
ISB_free(pa);
+
+ if(R.occlusiontree)
+ free_occ_samples(&R, pa);
}
/* ************* pixel struct ******** */
@@ -1072,6 +1080,9 @@ void zbufshade_tile(RenderPart *pa)
/* irregular shadowb buffer creation */
if(R.r.mode & R_SHADOW)
ISB_create(pa, NULL);
+
+ if(R.occlusiontree)
+ cache_occ_samples(&R, pa, &ssamp);
for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++, rr->renrect.ymax++) {
for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, ro++, rz++, rp++, fcol+=4, offs++) {
@@ -1085,9 +1096,6 @@ void zbufshade_tile(RenderPart *pa)
if(shade_samples(&ssamp, &ps, x, y)) {
QUATCOPY(fcol, ssamp.shr[0].combined);
- if(!(fcol[0] == fcol[0]))
- printvecf("fudgecol", fcol);
-
/* passes */
if(addpassflag)
add_passes(rl, offs, ssamp.shi, ssamp.shr);
@@ -1098,6 +1106,9 @@ void zbufshade_tile(RenderPart *pa)
if(R.test_break()) break;
}
+ if(R.occlusiontree)
+ free_occ_samples(&R, pa);
+
if(R.r.mode & R_SHADOW)
ISB_free(pa);
}
diff --git a/source/blender/render/intern/source/renderdatabase.c b/source/blender/render/intern/source/renderdatabase.c
index 58a89809b83..8aaf29b65c4 100644
--- a/source/blender/render/intern/source/renderdatabase.c
+++ b/source/blender/render/intern/source/renderdatabase.c
@@ -105,6 +105,7 @@
#define RE_UV_ELEMS 2
#define RE_SURFNOR_ELEMS 3
#define RE_SIMPLIFY_ELEMS 2
+#define RE_FACE_ELEMS 1
float *RE_vertren_get_sticky(ObjectRen *obr, VertRen *ver, int verify)
{
@@ -607,6 +608,21 @@ float *RE_strandren_get_simplify(struct ObjectRen *obr, struct StrandRen *strand
return simplify + (strand->index & 255)*RE_SIMPLIFY_ELEMS;
}
+int *RE_strandren_get_face(ObjectRen *obr, StrandRen *strand, int verify)
+{
+ int *face;
+ int nr= strand->index>>8;
+
+ face= obr->strandnodes[nr].face;
+ if(face==NULL) {
+ if(verify)
+ face= obr->strandnodes[nr].face= MEM_callocN(256*RE_FACE_ELEMS*sizeof(int), "face table");
+ else
+ return NULL;
+ }
+ return face + (strand->index & 255)*RE_FACE_ELEMS;
+}
+
/* winspeed is exception, it is stored per instance */
float *RE_strandren_get_winspeed(ObjectInstanceRen *obi, StrandRen *strand, int verify)
{
@@ -757,6 +773,8 @@ void free_renderdata_strandnodes(StrandTableNode *strandnodes)
MEM_freeN(strandnodes[a].surfnor);
if(strandnodes[a].simplify)
MEM_freeN(strandnodes[a].simplify);
+ if(strandnodes[a].face)
+ MEM_freeN(strandnodes[a].face);
}
MEM_freeN(strandnodes);
diff --git a/source/blender/render/intern/source/shadeinput.c b/source/blender/render/intern/source/shadeinput.c
index d862729317b..586d54aa04b 100644
--- a/source/blender/render/intern/source/shadeinput.c
+++ b/source/blender/render/intern/source/shadeinput.c
@@ -422,7 +422,7 @@ void shade_input_set_strand_texco(ShadeInput *shi, StrandRen *strand, StrandVert
}
if(texco & TEXCO_STRAND) {
- shi->strand= spoint->strandco;
+ shi->strandco= spoint->strandco;
if(shi->osatex) {
shi->dxstrand= spoint->dtstrandco;
@@ -935,7 +935,7 @@ void shade_input_set_shade_texco(ShadeInput *shi)
}
if(texco & TEXCO_STRAND) {
- shi->strand= (l*v3->accum - u*v1->accum - v*v2->accum);
+ shi->strandco= (l*v3->accum - u*v1->accum - v*v2->accum);
if(shi->osatex) {
dl= shi->dx_u+shi->dx_v;
shi->dxstrand= dl*v3->accum-shi->dx_u*v1->accum-shi->dx_v*v2->accum;
@@ -1236,7 +1236,7 @@ void shade_samples_do_AO(ShadeSample *ssamp)
if(!(R.r.mode & R_SHADOW))
return;
- if(!(R.r.mode & R_RAYTRACE))
+ if(!(R.r.mode & R_RAYTRACE) && !(R.wrld.ao_gather_method == WO_AOGATHER_APPROX))
return;
if(R.wrld.mode & WO_AMB_OCC)
@@ -1248,7 +1248,7 @@ void shade_samples_do_AO(ShadeSample *ssamp)
}
-static void shade_samples_fill_with_ps(ShadeSample *ssamp, PixStr *ps, int x, int y)
+void shade_samples_fill_with_ps(ShadeSample *ssamp, PixStr *ps, int x, int y)
{
ShadeInput *shi;
float xs, ys;
diff --git a/source/blender/render/intern/source/shadeoutput.c b/source/blender/render/intern/source/shadeoutput.c
index ca81d456487..0ba7c09cdd8 100644
--- a/source/blender/render/intern/source/shadeoutput.c
+++ b/source/blender/render/intern/source/shadeoutput.c
@@ -43,6 +43,7 @@
#include "DNA_material_types.h"
/* local include */
+#include "occlusion.h"
#include "renderpipeline.h"
#include "render_types.h"
#include "pixelblending.h"
@@ -1006,8 +1007,9 @@ static void do_specular_ramp(ShadeInput *shi, float is, float t, float *spec)
/* pure AO, check for raytrace and world should have been done */
void ambient_occlusion(ShadeInput *shi)
{
-
- if((R.r.mode & R_RAYTRACE) && shi->mat->amb!=0.0f)
+ if((R.wrld.ao_gather_method == WO_AOGATHER_APPROX) && shi->mat->amb!=0.0f)
+ sample_occ(&R, shi);
+ else if((R.r.mode & R_RAYTRACE) && shi->mat->amb!=0.0f)
ray_ao(shi, shi->ao);
else
shi->ao[0]= shi->ao[1]= shi->ao[2]= 1.0f;
@@ -1017,25 +1019,28 @@ void ambient_occlusion(ShadeInput *shi)
/* wrld mode was checked for */
void ambient_occlusion_to_diffuse(ShadeInput *shi, float *diff)
{
-
- if((R.r.mode & R_RAYTRACE) && shi->mat->amb!=0.0f) {
- float f= R.wrld.aoenergy*shi->mat->amb;
-
- if (R.wrld.aomix==WO_AOADDSUB) {
- diff[0] = 2.0f*shi->ao[0]-1.0f;
- diff[1] = 2.0f*shi->ao[1]-1.0f;
- diff[2] = 2.0f*shi->ao[2]-1.0f;
- }
- else if (R.wrld.aomix==WO_AOSUB) {
- diff[0] = shi->ao[0]-1.0f;
- diff[1] = shi->ao[1]-1.0f;
- diff[2] = shi->ao[2]-1.0f;
- }
- else {
- VECCOPY(diff, shi->ao);
+ if((R.r.mode & R_RAYTRACE) || R.wrld.ao_gather_method == WO_AOGATHER_APPROX) {
+ if(shi->mat->amb!=0.0f) {
+ float f= R.wrld.aoenergy*shi->mat->amb;
+
+ if (R.wrld.aomix==WO_AOADDSUB) {
+ diff[0] = 2.0f*shi->ao[0]-1.0f;
+ diff[1] = 2.0f*shi->ao[1]-1.0f;
+ diff[2] = 2.0f*shi->ao[2]-1.0f;
+ }
+ else if (R.wrld.aomix==WO_AOSUB) {
+ diff[0] = shi->ao[0]-1.0f;
+ diff[1] = shi->ao[1]-1.0f;
+ diff[2] = shi->ao[2]-1.0f;
+ }
+ else {
+ VECCOPY(diff, shi->ao);
+ }
+
+ VECMUL(diff, f);
}
-
- VECMUL(diff, f);
+ else
+ diff[0]= diff[1]= diff[2]= 0.0f;
}
else
diff[0]= diff[1]= diff[2]= 0.0f;
diff --git a/source/blender/render/intern/source/strand.c b/source/blender/render/intern/source/strand.c
index 692c86a68cf..ccb3b3ee0f4 100644
--- a/source/blender/render/intern/source/strand.c
+++ b/source/blender/render/intern/source/strand.c
@@ -685,6 +685,7 @@ static void strand_shade_point(Render *re, ShadeSample *ssamp, StrandSegment *ss
vlr.flag |= R_TANGENT;
shi->vlr= &vlr;
+ shi->strand= sseg->strand;
shi->obi= sseg->obi;
shi->obr= sseg->obi->obr;
diff --git a/source/blender/render/intern/source/texture.c b/source/blender/render/intern/source/texture.c
index a4182637f02..be248ddda3f 100644
--- a/source/blender/render/intern/source/texture.c
+++ b/source/blender/render/intern/source/texture.c
@@ -1521,7 +1521,7 @@ void do_material_tex(ShadeInput *shi)
}
else if(mtex->texco==TEXCO_STRAND) {
co= tempvec; dx= dxt; dy= dyt;
- co[0]= shi->strand;
+ co[0]= shi->strandco;
co[1]= co[2]= 0.0f;
dx[0]= shi->dxstrand;
dx[1]= dx[2]= 0.0f;
diff --git a/source/blender/src/buttons_shading.c b/source/blender/src/buttons_shading.c
index c7ba1430748..d21d4f3c8de 100644
--- a/source/blender/src/buttons_shading.c
+++ b/source/blender/src/buttons_shading.c
@@ -2152,18 +2152,27 @@ static void world_panel_amb_occ(World *wrld)
uiBlockSetCol(block, TH_AUTO);
if(!(wrld->mode & WO_AMB_OCC)) return;
-
- yco -= YSPACE;
- uiDefButS(block, NUM, B_REDR, "Samples:",
- X2CLM1, yco-=BUTH, BUTW2, BUTH, &wrld->aosamp, 1.0, 32.0, 100, 0, "Sets the number of samples used for AO (actual number: squared)");
-
yco -= YSPACE;
-
- uiDefButF(block, NUM, B_REDR, "Max Dist:",
- X2CLM1, yco-=BUTH, BUTW2, BUTH, &wrld->aodist, 0.001, 5000.0, 100, 0, "Sets length of AO rays, defines how far away other faces give occlusion effect");
- yco -= YSPACE;
+ if(wrld->ao_gather_method == WO_AOGATHER_RAYTRACE) {
+ uiDefButS(block, NUM, B_REDR, "Samples:",
+ X2CLM1, yco-=BUTH, BUTW2, BUTH, &wrld->aosamp, 1.0, 32.0, 100, 0, "Sets the number of samples used for AO (actual number: squared)");
+
+ yco -= YSPACE;
+
+ uiDefButF(block, NUM, B_REDR, "Max Dist:",
+ X2CLM1, yco-=BUTH, BUTW2, BUTH, &wrld->aodist, 0.001, 5000.0, 100, 0, "Sets length of AO rays, defines how far away other faces give occlusion effect");
+ }
+ else {
+ uiDefButS(block, NUM, B_REDR, "Passes:",
+ X2CLM1, yco-=BUTH, BUTW2, BUTH, &wrld->ao_approx_passes, 0.0, 10.0, 0, 0, "Sets the number of preprocessing passes to reduce overocclusion");
+
+ yco -= YSPACE;
+
+ uiDefButF(block, NUM, B_REDR, "Correction:",
+ X2CLM1, yco-=BUTH, BUTW2, BUTH, &wrld->ao_approx_correction, 0.0, 1.0, 0, 0, "Ad-hoc correction for over-occlusion due to the approximation.");
+ }
uiBlockBeginAlign(block);
uiDefButBitS(block, TOG, WO_AODIST, B_AO_FALLOFF, "Use Falloff",
@@ -2176,24 +2185,39 @@ static void world_panel_amb_occ(World *wrld)
/* column 2 */
yco = PANEL_YMAX - BUTH - YSPACE;
- uiDefButS(block, MENU, B_REDR, "Constant QMC %x2|Adaptive QMC %x1|Constant Jittered %x0",
- X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->ao_samp_method, 0, 0, 0, 0, "Method for generating shadow samples: Constant QMC: best quality, Adaptive QMC: fast in high contrast areas");
-
+ uiDefButS(block, MENU, B_REDR, "Gather Method%t|Raytrace %x0|Approximate %x1",
+ X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->ao_gather_method, 0, 0, 0, 0, "Method for occlusion gathering: Raytrace: slow when noise free results are required, but accurate, Approximate: faster and without noise, but inaccurate");
+
yco -= YSPACE;
-
- if (wrld->ao_samp_method == WO_AOSAMP_HALTON) {
+
+ if(wrld->ao_gather_method == WO_AOGATHER_RAYTRACE) {
+ uiDefButS(block, MENU, B_REDR, "Constant QMC %x2|Adaptive QMC %x1|Constant Jittered %x0",
+ X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->ao_samp_method, 0, 0, 0, 0, "Method for generating shadow samples: Constant QMC: best quality, Adaptive QMC: fast in high contrast areas");
+
+ yco -= YSPACE;
+
+ if (wrld->ao_samp_method == WO_AOSAMP_HALTON) {
+ uiBlockBeginAlign(block);
+ uiDefButF(block, NUM, B_REDR, "Threshold:",
+ X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->ao_adapt_thresh, 0.0, 1.0, 100, 0, "Samples below this threshold will be considered fully shadowed/unshadowed and skipped");
+ uiDefButF(block, NUMSLI, B_REDR, "Adapt Vec:",
+ X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->ao_adapt_speed_fac, 0.0, 1.0, 100, 0, "Use the speed vector pass to reduce AO samples in fast moving pixels. The higher the value, the more aggressive the sample reduction. Requires Vec pass enabled.");
+ uiBlockEndAlign(block);
+ } else if (wrld->ao_samp_method == WO_AOSAMP_CONSTANT) {
+ uiDefButF(block, NUMSLI, B_REDR, "Bias:",
+ X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->aobias, 0.0, 0.5, 10, 0, "Sets bias to prevent smoothed faces to show banding (in radians)");
+ }
+ }
+ else {
uiBlockBeginAlign(block);
- uiDefButF(block, NUM, B_REDR, "Threshold:",
- X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->ao_adapt_thresh, 0.0, 1.0, 100, 0, "Samples below this threshold will be considered fully shadowed/unshadowed and skipped");
- uiDefButF(block, NUMSLI, B_REDR, "Adapt Vec:",
- X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->ao_adapt_speed_fac, 0.0, 1.0, 100, 0, "Use the speed vector pass to reduce AO samples in fast moving pixels. The higher the value, the more aggressive the sample reduction. Requires Vec pass enabled.");
+ uiDefButF(block, NUM, B_REDR, "Error:",
+ X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->ao_approx_error, 0.0001, 10.0, 0, 0, "Error tolerance (low values are slower and higher quality)");
+
+ uiDefButBitS(block, TOG, WO_AOCACHE, B_REDR, "Pixel Cache",
+ X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->aomode, 0, 0, 0, 0, "Cache AO results in pixels and interpolate over neighbouring pixels for speedup.");
uiBlockEndAlign(block);
- } else if (wrld->ao_samp_method == WO_AOSAMP_CONSTANT) {
- uiDefButF(block, NUMSLI, B_REDR, "Bias:",
- X2CLM2, yco-=BUTH, BUTW2, BUTH, &wrld->aobias, 0.0, 0.5, 10, 0, "Sets bias to prevent smoothed faces to show banding (in radians)");
}
-
yco = PANEL_YMAX - (5*BUTH+4*YSPACE);
/* result mix modes */
@@ -2204,6 +2228,7 @@ static void world_panel_amb_occ(World *wrld)
X3CLM2, yco, BUTW3, BUTH, &wrld->aomix, 1.0, (float)WO_AOSUB, 0, 0, "subtracts light/shadows (needs at least one normal light to make anything visible)");
uiDefButS(block, ROW, B_REDR, "Both",
X3CLM3, yco, BUTW3, BUTH, &wrld->aomix, 1.0, (float)WO_AOADDSUB, 0, 0, "both lightens & darkens");
+ uiBlockEndAlign(block);
yco -= YSPACE;
@@ -2216,12 +2241,11 @@ static void world_panel_amb_occ(World *wrld)
uiDefButS(block, ROW, B_REDR, "Sky Texture",
X3CLM3, yco, BUTW3, BUTH, &wrld->aocolor, 2.0, (float)WO_AOSKYTEX, 0, 0, "Does full Sky texture render for diffuse energy");
uiBlockEndAlign(block);
-
+
yco -= YSPACE;
-
+
uiDefButF(block, NUMSLI, B_REDR, "Energy:",
X2CLM1, yco-=BUTH, BUTW2, BUTH, &wrld->aoenergy, 0.01, 3.0, 100, 0, "Sets global energy scale for AO");
-
}
static void world_panel_world(World *wrld)