diff options
Diffstat (limited to 'source/blender/render')
57 files changed, 33599 insertions, 431 deletions
diff --git a/source/blender/render/CMakeLists.txt b/source/blender/render/CMakeLists.txt index 0f0060c7578..359369228f8 100644 --- a/source/blender/render/CMakeLists.txt +++ b/source/blender/render/CMakeLists.txt @@ -24,7 +24,7 @@ # ***** END GPL LICENSE BLOCK ***** -set(INC +set(INC extern/include intern/include ../blenkernel diff --git a/source/blender/render/extern/include/RE_pipeline.h b/source/blender/render/extern/include/RE_pipeline.h index 1b0707bafc0..660e81eb022 100644 --- a/source/blender/render/extern/include/RE_pipeline.h +++ b/source/blender/render/extern/include/RE_pipeline.h @@ -103,11 +103,11 @@ typedef struct RenderPass { /* after render, the Combined pass is in combined, for renderlayers read from files it is a real pass */ typedef struct RenderLayer { struct RenderLayer *next, *prev; - + /* copy of RenderData */ char name[RE_MAXNAME]; int layflag, passflag, pass_xor; - + /* MULTIVIEW_TODO: acolrect and scolrect are not supported by multiview at the moment. * If they are really required they should be in RenderView instead */ @@ -121,16 +121,16 @@ typedef struct RenderLayer { void *exrhandle; ListBase passes; - + } RenderLayer; typedef struct RenderResult { struct RenderResult *next, *prev; - + /* target image size */ int rectx, recty; short crop, sample_nr; - + /* the following rect32, rectf and rectz buffers are for temporary storage only, for RenderResult structs * created in #RE_AcquireResultImage - which do not have RenderView */ @@ -140,25 +140,25 @@ typedef struct RenderResult { float *rectf; /* if this exists, a copy of one of layers, or result of composited layers */ float *rectz; - + /* coordinates within final image (after cropping) */ rcti tilerect; /* offset to apply to get a border render in full image */ int xof, yof; - + /* the main buffers */ ListBase layers; - + /* multiView maps to a StringVector in OpenEXR */ ListBase views; /* RenderView */ /* allowing live updates: */ volatile rcti renrect; volatile RenderLayer *renlay; - + /* optional saved endresult on disk */ int do_exr_tile; - + /* for render results in Image, verify validity for sequences */ int framenr; diff --git a/source/blender/render/intern/include/envmap.h b/source/blender/render/intern/include/envmap.h new file mode 100644 index 00000000000..c66427ae788 --- /dev/null +++ b/source/blender/render/intern/include/envmap.h @@ -0,0 +1,54 @@ +/* + * envmap_ext.h + * + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): none yet. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/include/envmap.h + * \ingroup render + */ + + +#ifndef __ENVMAP_H__ +#define __ENVMAP_H__ + +/** + * Make environment maps for all objects in the scene that have an + * environment map as texture. + * (initrender.c) + */ + +struct Render; +struct TexResult; +struct ImagePool; + +void make_envmaps(struct Render *re); +int envmaptex(struct Tex *tex, const float texvec[3], float dxt[3], float dyt[3], int osatex, struct TexResult *texres, struct ImagePool *pool, const bool skip_image_load); +void env_rotate_scene(struct Render *re, float mat[4][4], int do_rotate); + +#endif /* __ENVMAP_H__ */ + diff --git a/source/blender/render/intern/include/initrender.h b/source/blender/render/intern/include/initrender.h index e7ff3c7097c..b8732e7cc5c 100644 --- a/source/blender/render/intern/include/initrender.h +++ b/source/blender/render/intern/include/initrender.h @@ -31,7 +31,7 @@ #ifndef __INITRENDER_H__ -#define __INITRENDER_H__ +#define __INITRENDER_H__ /* Functions */ diff --git a/source/blender/render/intern/include/pixelblending.h b/source/blender/render/intern/include/pixelblending.h new file mode 100644 index 00000000000..022510c7132 --- /dev/null +++ b/source/blender/render/intern/include/pixelblending.h @@ -0,0 +1,65 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributor(s): 2004-2006 Blender Foundation, full recode + * + * ***** END GPL/BL DUAL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/include/pixelblending.h + * \ingroup render + */ + + +#ifndef __PIXELBLENDING_H__ +#define __PIXELBLENDING_H__ + + +/** + * add 1 pixel to into filtered three lines + * (float vecs to float vec) + */ +void add_filt_fmask(unsigned int mask, const float col[4], float *rowbuf, int row_w); +void add_filt_fmask_pixsize(unsigned int mask, float *in, float *rowbuf, int row_w, int pixsize); +void add_filt_fmask_coord(float filt[3][3], const float col[4], float *rowbuf, int row_stride, int x, int y, rcti *mask); +void mask_array(unsigned int mask, float filt[3][3]); + +/** + * Alpha-over blending for floats. + */ +void addAlphaOverFloat(float dest[4], const float source[4]); + +/** + * Alpha-under blending for floats. + */ +void addAlphaUnderFloat(float dest[4], const float source[4]); + + +/** + * Same for floats + */ +void addalphaAddfacFloat(float dest[4], const float source[4], char addfac); + +/** + * dest = dest + source + */ +void addalphaAddFloat(float dest[4], const float source[4]); + +#endif /* __PIXELBLENDING_H__ */ diff --git a/source/blender/render/intern/include/pixelshading.h b/source/blender/render/intern/include/pixelshading.h new file mode 100644 index 00000000000..0e630eda475 --- /dev/null +++ b/source/blender/render/intern/include/pixelshading.h @@ -0,0 +1,62 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributor(s): 2004-2006, Blender Foundation, full recode + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/include/pixelshading.h + * \ingroup render + * + * These functions determine what actual color a pixel will have. + */ + +#ifndef __PIXELSHADING_H__ +#define __PIXELSHADING_H__ + + +/** + * Render the pixel at (x,y) for object ap. Apply the jitter mask. + * Output is given in float collector[4]. The type vector: + * t[0] - min. distance + * t[1] - face/halo index + * t[2] - jitter mask + * t[3] - type ZB_POLY or ZB_HALO + * t[4] - max. distance + * mask is pixel coverage in bits + * \return pointer to the object + */ +int shadeHaloFloat(HaloRen *har, + float *col, int zz, + float dist, float xn, + float yn, short flarec); + +/** + * Render the sky at pixel (x, y). + */ +void shadeSkyPixel(float collector[4], float fx, float fy, short thread); +void shadeSkyView(float col_r[3], const float rco[3], const float view[3], const float dxyview[2], short thread); +void shadeAtmPixel(struct SunSky *sunsky, float *collector, float fx, float fy, float distance); +void shadeSunView(float col_r[3], const float view[3]); +/* ------------------------------------------------------------------------- */ + +#endif + diff --git a/source/blender/render/intern/include/pointdensity.h b/source/blender/render/intern/include/pointdensity.h new file mode 100644 index 00000000000..eadf714c1ba --- /dev/null +++ b/source/blender/render/intern/include/pointdensity.h @@ -0,0 +1,51 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): Matt Ebb + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/include/pointdensity.h + * \ingroup render + */ + + +#ifndef __POINTDENSITY_H__ +#define __POINTDENSITY_H__ + +/** + * Make point density kd-trees for all point density textures in the scene + */ + +struct PointDensity; +struct Render; +struct TexResult; + +void free_pointdensity(struct PointDensity *pd); +void cache_pointdensity(struct Render *re, struct PointDensity *pd); +void make_pointdensities(struct Render *re); +void free_pointdensities(struct Render *re); +int pointdensitytex(struct Tex *tex, const float texvec[3], struct TexResult *texres); + +#endif /* __POINTDENSITY_H__ */ + diff --git a/source/blender/render/intern/include/raycounter.h b/source/blender/render/intern/include/raycounter.h new file mode 100644 index 00000000000..e16c6e13c7e --- /dev/null +++ b/source/blender/render/intern/include/raycounter.h @@ -0,0 +1,74 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/include/raycounter.h + * \ingroup render + */ + + +#ifndef __RAYCOUNTER_H__ +#define __RAYCOUNTER_H__ + +//#define RE_RAYCOUNTER /* enable counters per ray, useful for measuring raytrace structures performance */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef RE_RAYCOUNTER + +/* ray counter functions */ + +typedef struct RayCounter { + struct { + unsigned long long test, hit; + } faces, bb, simd_bb, raycast, raytrace_hint, rayshadow_last_hit; +} RayCounter; + +#define RE_RC_INIT(isec, shi) (isec).raycounter = &((shi).shading.raycounter) +void RE_RC_INFO(RayCounter *rc); +void RE_RC_MERGE(RayCounter *rc, RayCounter *tmp); +#define RE_RC_COUNT(var) (var)++ + +extern RayCounter re_rc_counter[]; + +#else + +/* ray counter stubs */ + +#define RE_RC_INIT(isec,shi) +#define RE_RC_INFO(rc) +#define RE_RC_MERGE(dest,src) +#define RE_RC_COUNT(var) + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/source/blender/render/intern/include/rayintersection.h b/source/blender/render/intern/include/rayintersection.h new file mode 100644 index 00000000000..a303301ad3b --- /dev/null +++ b/source/blender/render/intern/include/rayintersection.h @@ -0,0 +1,136 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2007 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + * RE_raytrace.h: ray tracing api, can be used independently from the renderer. + */ + +/** \file blender/render/intern/include/rayintersection.h + * \ingroup render + */ + + +#ifndef __RAYINTERSECTION_H__ +#define __RAYINTERSECTION_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "BLI_math_geom.h" + +struct RayObject; + +/* Ray Hints */ + +#define RE_RAY_LCTS_MAX_SIZE 256 +#define RT_USE_LAST_HIT /* last shadow hit is reused before raycasting on whole tree */ +//#define RT_USE_HINT /* last hit object is reused before raycasting on whole tree */ + +typedef struct LCTSHint { + int size; + struct RayObject *stack[RE_RAY_LCTS_MAX_SIZE]; +} LCTSHint; + +typedef struct RayHint { + union { LCTSHint lcts; } data; +} RayHint; + +/* Ray Intersection */ + +typedef struct Isect { + /* ray start, direction (normalized vector), and max distance. on hit, + * the distance is modified to be the distance to the hit point. */ + float start[3]; + float dir[3]; + float dist; + + /* for envmap and incremental view update renders */ + float origstart[3]; + float origdir[3]; + + /* precomputed values to accelerate bounding box intersection */ + int bv_index[6]; + float idot_axis[3]; + + /* intersection options */ + int mode; /* RE_RAY_SHADOW, RE_RAY_MIRROR, RE_RAY_SHADOW_TRA */ + int lay; /* -1 default, set for layer lamps */ + int skip; /* skip flags */ + int check; /* check flags */ + void *userdata; /* used by bake check */ + + /* hit information */ + float u, v; + int isect; /* which half of quad */ + + struct { + void *ob; + void *face; + } hit, orig; + + /* last hit optimization */ + struct RayObject *last_hit; + + /* hints */ +#ifdef RT_USE_HINT + RayTraceHint *hint, *hit_hint; +#endif + RayHint *hint; + + /* ray counter */ +#ifdef RE_RAYCOUNTER + RayCounter *raycounter; +#endif + + /* Precalculated coefficients for watertight intersection check. */ + struct IsectRayPrecalc isect_precalc; +} Isect; + +/* ray types */ +#define RE_RAY_SHADOW 0 +#define RE_RAY_MIRROR 1 +#define RE_RAY_SHADOW_TRA 2 + +/* skip options */ +#define RE_SKIP_CULLFACE (1 << 0) +/* if using this flag then *face should be a pointer to a VlakRen */ +#define RE_SKIP_VLR_NEIGHBOUR (1 << 1) + +/* check options */ +#define RE_CHECK_VLR_NONE 0 +#define RE_CHECK_VLR_RENDER 1 +#define RE_CHECK_VLR_NON_SOLID_MATERIAL 2 +#define RE_CHECK_VLR_BAKE 3 + +/* arbitrary, but can't use e.g. FLT_MAX because of precision issues */ +#define RE_RAYTRACE_MAXDIST 1e15f +#define RE_RAYTRACE_EPSILON 0.0f + +#ifdef __cplusplus +} +#endif + +#endif /* __RAYINTERSECTION_H__ */ + diff --git a/source/blender/render/intern/include/render_types.h b/source/blender/render/intern/include/render_types.h index 8308b5e76e4..fd24f4eb053 100644 --- a/source/blender/render/intern/include/render_types.h +++ b/source/blender/render/intern/include/render_types.h @@ -52,10 +52,10 @@ struct Main; /* this is handed over to threaded hiding/passes/shading engine */ typedef struct RenderPart { struct RenderPart *next, *prev; - + RenderResult *result; /* result of part rendering */ ListBase fullresult; /* optional full sample buffers */ - + rcti disprect; /* part coordinates within total picture */ int rectx, recty; /* the size */ int nr; /* nr is partnr */ @@ -74,10 +74,10 @@ struct Render { struct Render *next, *prev; char name[RE_MAXNAME]; int slot; - + /* state settings */ short flag, ok, result_ok; - + /* result of rendering */ RenderResult *result; /* if render with single-layer option, other rendered layers are stored here */ @@ -88,29 +88,29 @@ struct Render { * write lock, all external code must use a read lock. internal code is assumed * to not conflict with writes, so no lock used for that */ ThreadRWMutex resultmutex; - + /* window size, display rect, viewplane */ int winx, winy; /* buffer width and height with percentage applied * without border & crop. convert to long before multiplying together to avoid overflow. */ rcti disprect; /* part within winx winy */ rctf viewplane; /* mapped on winx winy */ - + /* final picture width and height (within disprect) */ int rectx, recty; - - /* real maximum size of parts after correction for minimum + + /* real maximum size of parts after correction for minimum * partx*xparts can be larger than rectx, in that case last part is smaller */ int partx, party; - + /* Camera transform, only used by Freestyle. */ float viewmat[4][4], viewinv[4][4]; float viewmat_orig[4][4]; /* for incremental render */ float winmat[4][4]; - + /* clippping */ float clipsta; float clipend; - + /* main, scene, and its full copy of renderdata and world */ struct Main *main; Scene *scene; @@ -119,13 +119,13 @@ struct Render { int active_view_layer; struct Object *camera_override; unsigned int lay, layer_override; - + ThreadRWMutex partsmutex; ListBase parts; - + /* render engine */ struct RenderEngine *engine; - + #ifdef WITH_FREESTYLE struct Main *freestyle_bmain; ListBase freestyle_renders; @@ -140,17 +140,17 @@ struct Render { void *duh; void (*current_scene_update)(void *handle, struct Scene *scene); void *suh; - + void (*stats_draw)(void *handle, RenderStats *ri); void *sdh; void (*progress)(void *handle, float i); void *prh; - + void (*draw_lock)(void *handle, int i); void *dlh; int (*test_break)(void *handle); void *tbh; - + RenderStats i; struct ReportList *reports; diff --git a/source/blender/render/intern/include/rendercore.h b/source/blender/render/intern/include/rendercore.h new file mode 100644 index 00000000000..aa3efca9e5b --- /dev/null +++ b/source/blender/render/intern/include/rendercore.h @@ -0,0 +1,105 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): none yet. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +#ifndef __RENDERCORE_H__ +#define __RENDERCORE_H__ + +/** \file blender/render/intern/include/rendercore.h + * \ingroup render + */ + +#include "render_types.h" + +#include "RE_engine.h" + +#include "DNA_node_types.h" + +#include "NOD_composite.h" + +struct ShadeInput; +struct ShadeResult; +struct World; +struct RenderPart; +struct RenderLayer; +struct RayObject; + +/* ------------------------------------------------------------------------- */ + +typedef struct PixStr { + struct PixStr *next; + int obi, facenr, z, maskz; + unsigned short mask; + short shadfac; +} PixStr; + +typedef struct PixStrMain { + struct PixStrMain *next, *prev; + struct PixStr *ps; + int counter; +} PixStrMain; + +/* ------------------------------------------------------------------------- */ + + +void calc_view_vector(float view[3], float x, float y); +float mistfactor(float zcor, const float co[3]); /* dist and height, return alpha */ + +void renderspothalo(struct ShadeInput *shi, float col[4], float alpha); +void add_halo_flare(Render *re); + +void calc_renderco_zbuf(float co[3], const float view[3], int z); +void calc_renderco_ortho(float co[3], float x, float y, int z); + +int count_mask(unsigned short mask); + +void zbufshade_tile(struct RenderPart *pa); +void zbufshadeDA_tile(struct RenderPart *pa); + +void zbufshade_sss_tile(struct RenderPart *pa); + +int get_sample_layers(struct RenderPart *pa, struct RenderLayer *rl, struct RenderLayer **rlpp); + +void render_internal_update_passes(struct RenderEngine *engine, struct Scene *scene, struct SceneRenderLayer *srl); + + +/* -------- ray.c ------- */ + +struct RayObject *RE_rayobject_create(int type, int size, int octree_resolution); + +extern void freeraytree(Render *re); +extern void makeraytree(Render *re); +struct RayObject* makeraytree_object(Render *re, ObjectInstanceRen *obi); + +extern void ray_shadow(ShadeInput *shi, LampRen *lar, float shadfac[4]); +extern void ray_trace(ShadeInput *shi, ShadeResult *); +extern void ray_ao(ShadeInput *shi, float ao[3], float env[3]); +extern void init_jitter_plane(LampRen *lar); +extern void init_ao_sphere(Render *re, struct World *wrld); +extern void init_render_qmcsampler(Render *re); +extern void free_render_qmcsampler(Render *re); + +#endif /* __RENDERCORE_H__ */ diff --git a/source/blender/render/intern/include/shading.h b/source/blender/render/intern/include/shading.h new file mode 100644 index 00000000000..e306c3c075c --- /dev/null +++ b/source/blender/render/intern/include/shading.h @@ -0,0 +1,105 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2006 Blender Foundation + * All rights reserved. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/include/shading.h + * \ingroup render + */ + + +struct ShadeInput; +struct ShadeResult; +struct RenderPart; +struct RenderLayer; +struct PixStr; +struct LampRen; +struct VlakRen; +struct StrandPoint; +struct ObjectInstanceRen; +struct Isect; + +/* shadeinput.c */ + +#define RE_MAX_OSA 16 + +/* needed to calculate shadow and AO for an entire pixel */ +typedef struct ShadeSample { + int tot; /* amount of shi in use, can be 1 for not FULL_OSA */ + + RenderLayer *rlpp[RE_MAX_OSA]; /* fast lookup from sample to renderlayer (fullsample buf) */ + + /* could be malloced once */ + ShadeInput shi[RE_MAX_OSA]; + ShadeResult shr[RE_MAX_OSA]; +} ShadeSample; + + + /* also the node shader callback */ +void shade_material_loop(struct ShadeInput *shi, struct ShadeResult *shr); + +void shade_input_set_triangle_i(struct ShadeInput *shi, struct ObjectInstanceRen *obi, struct VlakRen *vlr, short i1, short i2, short i3); +void shade_input_set_triangle(struct ShadeInput *shi, int obi, int facenr, int normal_flip); +void shade_input_copy_triangle(struct ShadeInput *shi, struct ShadeInput *from); +void shade_input_calc_viewco(struct ShadeInput *shi, float x, float y, float z, float view[3], float dxyview[2], float co[3], float dxco[3], float dyco[3]); +void shade_input_set_viewco(struct ShadeInput *shi, float x, float y, float sx, float sy, float z); +void shade_input_set_uv(struct ShadeInput *shi); +void shade_input_set_normals(struct ShadeInput *shi); +void shade_input_set_vertex_normals(struct ShadeInput *shi); +void shade_input_flip_normals(struct ShadeInput *shi); +void shade_input_set_shade_texco(struct ShadeInput *shi); +void shade_input_set_strand(struct ShadeInput *shi, struct StrandRen *strand, struct StrandPoint *spoint); +void shade_input_set_strand_texco(struct ShadeInput *shi, struct StrandRen *strand, struct StrandVert *svert, struct StrandPoint *spoint); +void shade_input_do_shade(struct ShadeInput *shi, struct ShadeResult *shr); + +void shade_input_init_material(struct ShadeInput *shi); +void shade_input_initialize(struct ShadeInput *shi, struct RenderPart *pa, struct RenderLayer *rl, int sample); + +void shade_sample_initialize(struct ShadeSample *ssamp, struct RenderPart *pa, struct RenderLayer *rl); +void shade_samples_do_AO(struct ShadeSample *ssamp); +void shade_samples_fill_with_ps(struct ShadeSample *ssamp, struct PixStr *ps, int x, int y); +int shade_samples(struct ShadeSample *ssamp, struct PixStr *ps, int x, int y); + +void vlr_set_uv_indices(struct VlakRen *vlr, int *i1, int *i2, int *i3); + +void calc_R_ref(struct ShadeInput *shi); + +void barycentric_differentials_from_position( + const float co[3], const float v1[3], const float v2[3], const float v3[3], + const float dxco[3], const float dyco[3], const float facenor[3], const bool differentials, + float *u, float *v, float *dx_u, float *dx_v, float *dy_u, float *dy_v); + +/* shadeoutput. */ +void shade_lamp_loop(struct ShadeInput *shi, struct ShadeResult *shr); + +void shade_color(struct ShadeInput *shi, ShadeResult *shr); + +void ambient_occlusion(struct ShadeInput *shi); +void environment_lighting_apply(struct ShadeInput *shi, struct ShadeResult *shr); + +ListBase *get_lights(struct ShadeInput *shi); +float lamp_get_visibility(struct LampRen *lar, const float co[3], float lv[3], float *dist); +void lamp_get_shadow(struct LampRen *lar, ShadeInput *shi, float inp, float shadfac[4], int do_real); + +float fresnel_fac(const float view[3], const float vn[3], float fresnel, float fac); + +/* rayshade.c */ +extern void shade_ray(struct Isect *is, struct ShadeInput *shi, struct ShadeResult *shr); diff --git a/source/blender/render/intern/include/strand.h b/source/blender/render/intern/include/strand.h new file mode 100644 index 00000000000..f4e22c78b42 --- /dev/null +++ b/source/blender/render/intern/include/strand.h @@ -0,0 +1,99 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Contributor(s): Brecht Van Lommel. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/include/strand.h + * \ingroup render + */ + + +#ifndef __STRAND_H__ +#define __STRAND_H__ + +struct StrandVert; +struct StrandRen; +struct StrandBuffer; +struct ShadeSample; +struct StrandPart; +struct Render; +struct ZSpan; +struct ObjectInstanceRen; +struct StrandSurface; +struct DerivedMesh; +struct ObjectRen; + +typedef struct StrandPoint { + /* position within segment */ + float t; + + /* camera space */ + float co[3]; + float nor[3]; + float tan[3]; + float strandco; + float width; + + /* derivatives */ + float dtco[3], dsco[3]; + float dtstrandco; + + /* outer points */ + float co1[3], co2[3]; + float hoco1[4], hoco2[4]; + float zco1[3], zco2[3]; + int clip1, clip2; + + /* screen space */ + float hoco[4]; + float x, y; + + /* simplification */ + float alpha; +} StrandPoint; + +typedef struct StrandSegment { + struct StrandVert *v[4]; + struct StrandRen *strand; + struct StrandBuffer *buffer; + struct ObjectInstanceRen *obi; + float sqadaptcos; + + StrandPoint point1, point2; + int shaded; +} StrandSegment; + +struct StrandShadeCache; +typedef struct StrandShadeCache StrandShadeCache; + +void strand_eval_point(StrandSegment *sseg, StrandPoint *spoint); +void render_strand_segment(struct Render *re, float winmat[4][4], struct StrandPart *spart, struct ZSpan *zspan, int totzspan, StrandSegment *sseg); +void strand_minmax(struct StrandRen *strand, float min[3], float max[3], const float width); + +struct StrandSurface *cache_strand_surface(struct Render *re, struct ObjectRen *obr, struct DerivedMesh *dm, float mat[4][4], int timeoffset); +void free_strand_surface(struct Render *re); + +struct StrandShadeCache *strand_shade_cache_create(void); +void strand_shade_cache_free(struct StrandShadeCache *cache); +void strand_shade_segment(struct Render *re, struct StrandShadeCache *cache, struct StrandSegment *sseg, struct ShadeSample *ssamp, float t, float s, int addpassflag); +void strand_shade_unref(struct StrandShadeCache *cache, struct ObjectInstanceRen *obi, struct StrandVert *svert); + +#endif + diff --git a/source/blender/render/intern/include/sunsky.h b/source/blender/render/intern/include/sunsky.h new file mode 100644 index 00000000000..c608f9fc48c --- /dev/null +++ b/source/blender/render/intern/include/sunsky.h @@ -0,0 +1,81 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Contributor(s): zaghaghi + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/include/sunsky.h + * \ingroup render + */ + +#ifndef __SUNSKY_H__ +#define __SUNSKY_H__ + +// #define SPECTRUM_MAX_COMPONENTS 100 + +typedef struct SunSky { + short effect_type, skyblendtype, sky_colorspace; + float turbidity; + float theta, phi; + + float toSun[3]; + + /*float sunSpectralRaddata[SPECTRUM_MAX_COMPONENTS];*/ + float sunSolidAngle; + + float zenith_Y, zenith_x, zenith_y; + + float perez_Y[5], perez_x[5], perez_y[5]; + + /* suggested by glome in patch [#8063] */ + float horizon_brightness; + float spread; + float sun_brightness; + float sun_size; + float backscattered_light; + float skyblendfac; + float sky_exposure; + + float atm_HGg; + + float atm_SunIntensity; + float atm_InscatteringMultiplier; + float atm_ExtinctionMultiplier; + float atm_BetaRayMultiplier; + float atm_BetaMieMultiplier; + float atm_DistanceMultiplier; + + float atm_BetaRay[3]; + float atm_BetaDashRay[3]; + float atm_BetaMie[3]; + float atm_BetaDashMie[3]; + float atm_BetaRM[3]; +} SunSky; + +void InitSunSky(struct SunSky *sunsky, float turb, const float toSun[3], float horizon_brightness, + float spread, float sun_brightness, float sun_size, float back_scatter, + float skyblendfac, short skyblendtype, float sky_exposure, float sky_colorspace); + +void GetSkyXYZRadiance(struct SunSky *sunsky, float theta, float phi, float color_out[3]); +void GetSkyXYZRadiancef(struct SunSky *sunsky, const float varg[3], float color_out[3]); +void InitAtmosphere(struct SunSky *sunSky, float sun_intens, float mief, float rayf, float inscattf, float extincf, float disf); +void AtmospherePixleShader(struct SunSky *sunSky, float view[3], float s, float rgb[3]); +void ClipColor(float c[3]); + +#endif /*__SUNSKY_H__*/ diff --git a/source/blender/render/intern/include/texture_ocean.h b/source/blender/render/intern/include/texture_ocean.h new file mode 100644 index 00000000000..6d7bc6fe7b0 --- /dev/null +++ b/source/blender/render/intern/include/texture_ocean.h @@ -0,0 +1,35 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributors: Matt Ebb + * + * ***** END GPL LICENSE BLOCK ***** + */ + +#ifndef __TEXTURE_OCEAN_H__ +#define __TEXTURE_OCEAN_H__ + +/** \file blender/render/intern/include/texture_ocean.h + * \ingroup render + */ + +int ocean_texture(struct Tex *tex, const float texvec[2], struct TexResult *texres); + +#endif /* __TEXTURE_OCEAN_H__ */ diff --git a/source/blender/render/intern/include/voxeldata.h b/source/blender/render/intern/include/voxeldata.h new file mode 100644 index 00000000000..041ca78a799 --- /dev/null +++ b/source/blender/render/intern/include/voxeldata.h @@ -0,0 +1,47 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): Raul Fernandez Hernandez (Farsthary), Matt Ebb. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/include/voxeldata.h + * \ingroup render + */ + +#ifndef __VOXELDATA_H__ +#define __VOXELDATA_H__ + +struct Render; +struct TexResult; + +typedef struct VoxelDataHeader { + int resolX, resolY, resolZ; + int frames; +} VoxelDataHeader; + +void cache_voxeldata(Tex *tex, int scene_frame); +void make_voxeldata(struct Render *re); +int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texres); + +#endif /* __VOXELDATA_H__ */ diff --git a/source/blender/render/intern/include/zbuf.h b/source/blender/render/intern/include/zbuf.h index 3dfcbc355c4..0654a4f8df6 100644 --- a/source/blender/render/intern/include/zbuf.h +++ b/source/blender/render/intern/include/zbuf.h @@ -36,7 +36,7 @@ /* span fill in method, is also used to localize data for zbuffering */ typedef struct ZSpan { int rectx, recty; /* range for clipping */ - + int miny1, maxy1, miny2, maxy2; /* actual filled in range */ const float *minp1, *maxp1, *minp2, *maxp2; /* vertex pointers detect min/max range in */ float *span1, *span2; diff --git a/source/blender/render/intern/raytrace/bvh.h b/source/blender/render/intern/raytrace/bvh.h new file mode 100644 index 00000000000..0f9a506762b --- /dev/null +++ b/source/blender/render/intern/raytrace/bvh.h @@ -0,0 +1,407 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/bvh.h + * \ingroup render + */ + + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" + +#include "raycounter.h" +#include "rayintersection.h" +#include "rayobject.h" +#include "rayobject_hint.h" +#include "rayobject_rtbuild.h" + +#include <assert.h> + +#ifdef __SSE__ +#include <xmmintrin.h> +#endif + +#ifndef __BVH_H__ +#define __BVH_H__ + +#ifdef __SSE__ +inline int test_bb_group4(__m128 *bb_group, const Isect *isec) +{ + const __m128 tmin0 = _mm_setzero_ps(); + const __m128 tmax0 = _mm_set_ps1(isec->dist); + + float start[3], idot_axis[3]; + copy_v3_v3(start, isec->start); + copy_v3_v3(idot_axis, isec->idot_axis); + + const __m128 tmin1 = _mm_max_ps(tmin0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[0]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) ); + const __m128 tmax1 = _mm_min_ps(tmax0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[1]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) ); + const __m128 tmin2 = _mm_max_ps(tmin1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[2]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) ); + const __m128 tmax2 = _mm_min_ps(tmax1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[3]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) ); + const __m128 tmin3 = _mm_max_ps(tmin2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[4]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) ); + const __m128 tmax3 = _mm_min_ps(tmax2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[5]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) ); + + return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3)); +} +#endif + +/* + * Determines the distance that the ray must travel to hit the bounding volume of the given node + * Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe + * [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9] + */ +static inline int rayobject_bb_intersect_test(const Isect *isec, const float *_bb) +{ + const float *bb = _bb; + + float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0]; + float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0]; + float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1]; + float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1]; + float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2]; + float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2]; + + RE_RC_COUNT(isec->raycounter->bb.test); + + if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0; + if (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) return 0; + if (t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0; + RE_RC_COUNT(isec->raycounter->bb.hit); + + return 1; +} + +/* bvh tree generics */ +template<class Tree> static void bvh_add(Tree *obj, RayObject *ob) +{ + rtbuild_add(obj->builder, ob); +} + +template<class Node> +inline bool is_leaf(Node *node) +{ + return !RE_rayobject_isAligned(node); +} + +template<class Tree> static void bvh_done(Tree *obj); + +template<class Tree> +static void bvh_free(Tree *obj) +{ + if (obj->builder) + rtbuild_free(obj->builder); + + if (obj->node_arena) + BLI_memarena_free(obj->node_arena); + + MEM_freeN(obj); +} + +template<class Tree> +static void bvh_bb(Tree *obj, float *min, float *max) +{ + if (obj->root) + bvh_node_merge_bb(obj->root, min, max); +} + + +template<class Tree> +static float bvh_cost(Tree *obj) +{ + assert(obj->cost >= 0.0f); + return obj->cost; +} + + + +/* bvh tree nodes generics */ +template<class Node> static inline int bvh_node_hit_test(Node *node, Isect *isec) +{ + return rayobject_bb_intersect_test(isec, (const float *)node->bb); +} + + +template<class Node> +static inline void bvh_node_merge_bb(Node *node, float min[3], float max[3]) +{ + if (is_leaf(node)) { + RE_rayobject_merge_bb((RayObject *)node, min, max); + } + else { + DO_MIN(node->bb, min); + DO_MAX(node->bb + 3, max); + } +} + + + +/* + * recursively transverse a BVH looking for a rayhit using a local stack + */ +template<class Node> static inline void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, int &stack_pos); + +template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT, bool SHADOW> +static int bvh_node_stack_raycast(Node *root, Isect *isec) +{ + Node *stack[MAX_STACK_SIZE]; + int hit = 0, stack_pos = 0; + + if (!TEST_ROOT && !is_leaf(root)) + bvh_node_push_childs(root, isec, stack, stack_pos); + else + stack[stack_pos++] = root; + + while (stack_pos) { + Node *node = stack[--stack_pos]; + if (!is_leaf(node)) { + if (bvh_node_hit_test(node, isec)) { + bvh_node_push_childs(node, isec, stack, stack_pos); + assert(stack_pos <= MAX_STACK_SIZE); + } + } + else { + hit |= RE_rayobject_intersect( (RayObject *)node, isec); + if (SHADOW && hit) return hit; + } + } + return hit; +} + + +#ifdef __SSE__ +/* + * Generic SIMD bvh recursion + * this was created to be able to use any simd (with the cost of some memmoves) + * it can take advantage of any SIMD width and doens't needs any special tree care + */ +template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT> +static int bvh_node_stack_raycast_simd(Node *root, Isect *isec) +{ + Node *stack[MAX_STACK_SIZE]; + + int hit = 0, stack_pos = 0; + + if (!TEST_ROOT) { + if (!is_leaf(root)) { + if (!is_leaf(root->child)) + bvh_node_push_childs(root, isec, stack, stack_pos); + else + return RE_rayobject_intersect( (RayObject *)root->child, isec); + } + else + return RE_rayobject_intersect( (RayObject *)root, isec); + } + else { + if (!is_leaf(root)) + stack[stack_pos++] = root; + else + return RE_rayobject_intersect( (RayObject *)root, isec); + } + + while (true) { + //Use SIMD 4 + if (stack_pos >= 4) { + __m128 t_bb[6]; + Node *t_node[4]; + + stack_pos -= 4; + + /* prepare the 4BB for SIMD */ + t_node[0] = stack[stack_pos + 0]->child; + t_node[1] = stack[stack_pos + 1]->child; + t_node[2] = stack[stack_pos + 2]->child; + t_node[3] = stack[stack_pos + 3]->child; + + const float *bb0 = stack[stack_pos + 0]->bb; + const float *bb1 = stack[stack_pos + 1]->bb; + const float *bb2 = stack[stack_pos + 2]->bb; + const float *bb3 = stack[stack_pos + 3]->bb; + + const __m128 x0y0x1y1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(1, 0, 1, 0) ); + const __m128 x2y2x3y3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(1, 0, 1, 0) ); + t_bb[0] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2, 0, 2, 0) ); + t_bb[1] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3, 1, 3, 1) ); + + const __m128 z0X0z1X1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(3, 2, 3, 2) ); + const __m128 z2X2z3X3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(3, 2, 3, 2) ); + t_bb[2] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2, 0, 2, 0) ); + t_bb[3] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3, 1, 3, 1) ); + + const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps(_mm_load_ps(bb0 + 4), _mm_load_ps(bb1 + 4), _MM_SHUFFLE(1, 0, 1, 0) ); + const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps(_mm_load_ps(bb2 + 4), _mm_load_ps(bb3 + 4), _MM_SHUFFLE(1, 0, 1, 0) ); + t_bb[4] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2, 0, 2, 0) ); + t_bb[5] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3, 1, 3, 1) ); +#if 0 + for (int i = 0; i < 4; i++) + { + Node *t = stack[stack_pos + i]; + assert(!is_leaf(t)); + + float *bb = ((float *)t_bb) + i; + bb[4 * 0] = t->bb[0]; + bb[4 * 1] = t->bb[1]; + bb[4 * 2] = t->bb[2]; + bb[4 * 3] = t->bb[3]; + bb[4 * 4] = t->bb[4]; + bb[4 * 5] = t->bb[5]; + t_node[i] = t->child; + } +#endif + RE_RC_COUNT(isec->raycounter->simd_bb.test); + int res = test_bb_group4(t_bb, isec); + + for (int i = 0; i < 4; i++) + if (res & (1 << i)) { + RE_RC_COUNT(isec->raycounter->simd_bb.hit); + if (!is_leaf(t_node[i])) { + for (Node *t = t_node[i]; t; t = t->sibling) { + assert(stack_pos < MAX_STACK_SIZE); + stack[stack_pos++] = t; + } + } + else { + hit |= RE_rayobject_intersect( (RayObject *)t_node[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + } + else if (stack_pos > 0) { + Node *node = stack[--stack_pos]; + assert(!is_leaf(node)); + + if (bvh_node_hit_test(node, isec)) { + if (!is_leaf(node->child)) { + bvh_node_push_childs(node, isec, stack, stack_pos); + assert(stack_pos <= MAX_STACK_SIZE); + } + else { + hit |= RE_rayobject_intersect( (RayObject *)node->child, isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + } + else break; + } + return hit; +} +#endif + +/* + * recursively transverse a BVH looking for a rayhit using system stack + */ +#if 0 +template<class Node> +static int bvh_node_raycast(Node *node, Isect *isec) +{ + int hit = 0; + if (bvh_test_node(node, isec)) + { + if (isec->idot_axis[node->split_axis] > 0.0f) + { + int i; + for (i = 0; i < BVH_NCHILDS; i++) + if (!is_leaf(node->child[i])) + { + if (node->child[i] == 0) break; + + hit |= bvh_node_raycast(node->child[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + else { + hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + else { + int i; + for (i = BVH_NCHILDS - 1; i >= 0; i--) + if (!is_leaf(node->child[i])) + { + if (node->child[i]) + { + hit |= dfs_raycast(node->child[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + else { + hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec); + if (hit && isec->mode == RE_RAY_SHADOW) return hit; + } + } + } + return hit; +} +#endif + +template<class Node, class HintObject> +static void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject *hintObject) +{ + assert(hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE); + + if (is_leaf(node)) { + hint->stack[hint->size++] = (RayObject *)node; + } + else { + int childs = count_childs(node); + if (hint->size + reserve_space + childs <= RE_RAY_LCTS_MAX_SIZE) { + int result = hint_test_bb(hintObject, node->bb, node->bb + 3); + if (result == HINT_RECURSE) { + /* We are 100% sure the ray will be pass inside this node */ + bvh_dfs_make_hint_push_siblings(node->child, hint, reserve_space, hintObject); + } + else if (result == HINT_ACCEPT) { + hint->stack[hint->size++] = (RayObject *)node; + } + } + else { + hint->stack[hint->size++] = (RayObject *)node; + } + } +} + + +template<class Tree> +static RayObjectAPI *bvh_get_api(int maxstacksize); + + +template<class Tree, int DFS_STACK_SIZE> +static inline RayObject *bvh_create_tree(int size) +{ + Tree *obj = (Tree *)MEM_callocN(sizeof(Tree), "BVHTree"); + assert(RE_rayobject_isAligned(obj)); /* RayObject API assumes real data to be 4-byte aligned */ + + obj->rayobj.api = bvh_get_api<Tree>(DFS_STACK_SIZE); + obj->root = NULL; + + obj->node_arena = NULL; + obj->builder = rtbuild_create(size); + + return RE_rayobject_unalignRayAPI((RayObject *) obj); +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject.cpp b/source/blender/render/intern/raytrace/rayobject.cpp new file mode 100644 index 00000000000..fee877b311d --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject.cpp @@ -0,0 +1,534 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject.cpp + * \ingroup render + */ + + +#include <assert.h> + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +#include "DNA_material_types.h" + +#include "rayintersection.h" +#include "rayobject.h" +#include "raycounter.h" +#include "render_types.h" +#include "renderdatabase.h" + +/* RayFace + * + * note we force always inline here, because compiler refuses to otherwise + * because function is too long. Since this is code that is called billions + * of times we really do want to inline. */ + +MALWAYS_INLINE RayObject *rayface_from_coords(RayFace *rayface, void *ob, void *face, + float *v1, float *v2, float *v3, float *v4) +{ + rayface->ob = ob; + rayface->face = face; + + copy_v3_v3(rayface->v1, v1); + copy_v3_v3(rayface->v2, v2); + copy_v3_v3(rayface->v3, v3); + + if (v4) { + copy_v3_v3(rayface->v4, v4); + rayface->quad = 1; + } + else { + rayface->quad = 0; + } + + return RE_rayobject_unalignRayFace(rayface); +} + +MALWAYS_INLINE void rayface_from_vlak(RayFace *rayface, ObjectInstanceRen *obi, VlakRen *vlr) +{ + rayface_from_coords(rayface, obi, vlr, vlr->v1->co, vlr->v2->co, vlr->v3->co, vlr->v4 ? vlr->v4->co : NULL); + + if (obi->transform_primitives) { + mul_m4_v3(obi->mat, rayface->v1); + mul_m4_v3(obi->mat, rayface->v2); + mul_m4_v3(obi->mat, rayface->v3); + + if (RE_rayface_isQuad(rayface)) + mul_m4_v3(obi->mat, rayface->v4); + } +} + +RayObject *RE_rayface_from_vlak(RayFace *rayface, ObjectInstanceRen *obi, VlakRen *vlr) +{ + return rayface_from_coords(rayface, obi, vlr, vlr->v1->co, vlr->v2->co, vlr->v3->co, vlr->v4 ? vlr->v4->co : NULL); +} + +RayObject *RE_rayface_from_coords(RayFace *rayface, void *ob, void *face, float *v1, float *v2, float *v3, float *v4) +{ + return rayface_from_coords(rayface, ob, face, v1, v2, v3, v4); +} + +/* VlakPrimitive */ + +RayObject *RE_vlakprimitive_from_vlak(VlakPrimitive *face, struct ObjectInstanceRen *obi, struct VlakRen *vlr) +{ + face->ob = obi; + face->face = vlr; + + return RE_rayobject_unalignVlakPrimitive(face); +} + +/* Checks for ignoring faces or materials */ + +MALWAYS_INLINE int vlr_check_intersect(Isect *is, ObjectInstanceRen *obi, VlakRen *vlr) +{ + /* for baking selected to active non-traceable materials might still + * be in the raytree */ + if (!(vlr->flag & R_TRACEBLE)) + return 0; + + /* I know... cpu cycle waste, might do smarter once */ + if (is->mode == RE_RAY_MIRROR) + return !(vlr->mat->mode & MA_ONLYCAST); + else + return (vlr->mat->mode2 & MA_CASTSHADOW) && (is->lay & obi->lay); +} + +MALWAYS_INLINE int vlr_check_intersect_solid(Isect *UNUSED(is), ObjectInstanceRen *UNUSED(obi), VlakRen *vlr) +{ + /* solid material types only */ + if (vlr->mat->material_type == MA_TYPE_SURFACE) + return 1; + else + return 0; +} + +MALWAYS_INLINE int vlr_check_bake(Isect *is, ObjectInstanceRen *obi, VlakRen *UNUSED(vlr)) +{ + return (obi->obr->ob != is->userdata) && (obi->obr->ob->flag & SELECT); +} + +/* Ray Triangle/Quad Intersection */ + +static bool isect_ray_tri_watertight_no_sign_check_v3( + const float ray_origin[3], const struct IsectRayPrecalc *isect_precalc, + const float v0[3], const float v1[3], const float v2[3], + float *r_lambda, float r_uv[2]) +{ + const int kx = isect_precalc->kx; + const int ky = isect_precalc->ky; + const int kz = isect_precalc->kz; + const float sx = isect_precalc->sx; + const float sy = isect_precalc->sy; + const float sz = isect_precalc->sz; + + /* Calculate vertices relative to ray origin. */ + const float a[3] = {v0[0] - ray_origin[0], v0[1] - ray_origin[1], v0[2] - ray_origin[2]}; + const float b[3] = {v1[0] - ray_origin[0], v1[1] - ray_origin[1], v1[2] - ray_origin[2]}; + const float c[3] = {v2[0] - ray_origin[0], v2[1] - ray_origin[1], v2[2] - ray_origin[2]}; + + const float a_kx = a[kx], a_ky = a[ky], a_kz = a[kz]; + const float b_kx = b[kx], b_ky = b[ky], b_kz = b[kz]; + const float c_kx = c[kx], c_ky = c[ky], c_kz = c[kz]; + + /* Perform shear and scale of vertices. */ + const float ax = a_kx - sx * a_kz; + const float ay = a_ky - sy * a_kz; + const float bx = b_kx - sx * b_kz; + const float by = b_ky - sy * b_kz; + const float cx = c_kx - sx * c_kz; + const float cy = c_ky - sy * c_kz; + + /* Calculate scaled barycentric coordinates. */ + const float u = cx * by - cy * bx; + const float v = ax * cy - ay * cx; + const float w = bx * ay - by * ax; + float det; + + if ((u < 0.0f || v < 0.0f || w < 0.0f) && + (u > 0.0f || v > 0.0f || w > 0.0f)) + { + return false; + } + + /* Calculate determinant. */ + det = u + v + w; + if (UNLIKELY(det == 0.0f)) { + return false; + } + else { + /* Calculate scaled z-coordinates of vertices and use them to calculate + * the hit distance. + */ + const float t = (u * a_kz + v * b_kz + w * c_kz) * sz; + /* Normalize u, v and t. */ + const float inv_det = 1.0f / det; + if (r_uv) { + r_uv[0] = u * inv_det; + r_uv[1] = v * inv_det; + } + *r_lambda = t * inv_det; + return true; + } +} + +MALWAYS_INLINE int isec_tri_quad(const float start[3], + const struct IsectRayPrecalc *isect_precalc, + const RayFace *face, + float r_uv[2], float *r_lambda) +{ + float uv[2], l; + + if (isect_ray_tri_watertight_v3(start, isect_precalc, face->v1, face->v2, face->v3, &l, uv)) { + /* check if intersection is within ray length */ + if (l > -RE_RAYTRACE_EPSILON && l < *r_lambda) { + r_uv[0] = -uv[0]; + r_uv[1] = -uv[1]; + *r_lambda = l; + return 1; + } + } + + /* intersect second triangle in quad */ + if (RE_rayface_isQuad(face)) { + if (isect_ray_tri_watertight_v3(start, isect_precalc, face->v1, face->v3, face->v4, &l, uv)) { + /* check if intersection is within ray length */ + if (l > -RE_RAYTRACE_EPSILON && l < *r_lambda) { + r_uv[0] = -uv[0]; + r_uv[1] = -uv[1]; + *r_lambda = l; + return 2; + } + } + } + + return 0; +} + +/* Simpler yes/no Ray Triangle/Quad Intersection */ + +MALWAYS_INLINE int isec_tri_quad_neighbour(const float start[3], + const float dir[3], + const RayFace *face) +{ + float r[3]; + struct IsectRayPrecalc isect_precalc; + float uv[2], l; + + negate_v3_v3(r, dir); /* note, different than above function */ + + isect_ray_tri_watertight_v3_precalc(&isect_precalc, r); + + if (isect_ray_tri_watertight_no_sign_check_v3(start, &isect_precalc, face->v1, face->v2, face->v3, &l, uv)) { + return 1; + } + + /* intersect second triangle in quad */ + if (RE_rayface_isQuad(face)) { + if (isect_ray_tri_watertight_no_sign_check_v3(start, &isect_precalc, face->v1, face->v3, face->v4, &l, uv)) { + return 2; + } + } + + return 0; +} + +/* RayFace intersection with checks and neighbor verifaction included, + * Isect is modified if the face is hit. */ + +MALWAYS_INLINE int intersect_rayface(RayObject *hit_obj, RayFace *face, Isect *is) +{ + float dist, uv[2]; + int ok = 0; + + /* avoid self-intersection */ + if (is->orig.ob == face->ob && is->orig.face == face->face) + return 0; + + /* check if we should intersect this face */ + if (is->check == RE_CHECK_VLR_RENDER) { + if (vlr_check_intersect(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0) + return 0; + } + else if (is->check == RE_CHECK_VLR_NON_SOLID_MATERIAL) { + if (vlr_check_intersect(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0) + return 0; + if (vlr_check_intersect_solid(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0) + return 0; + } + else if (is->check == RE_CHECK_VLR_BAKE) { + if (vlr_check_bake(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0) + return 0; + } + + /* ray counter */ + RE_RC_COUNT(is->raycounter->faces.test); + + dist = is->dist; + ok = isec_tri_quad(is->start, &is->isect_precalc, face, uv, &dist); + + if (ok) { + + /* when a shadow ray leaves a face, it can be little outside the edges + * of it, causing intersection to be detected in its neighbor face */ + if (is->skip & RE_SKIP_VLR_NEIGHBOUR) { + if (dist < 0.1f && is->orig.ob == face->ob) { + VlakRen *a = (VlakRen *)is->orig.face; + VlakRen *b = (VlakRen *)face->face; + ObjectRen *obr = ((ObjectInstanceRen *)face->ob)->obr; + + VertRen **va, **vb; + int *org_idx_a, *org_idx_b; + int i, j; + bool is_neighbor = false; + + /* "same" vertex means either the actual same VertRen, or the same 'final org index', if available + * (autosmooth only, currently). */ + for (i = 0, va = &a->v1; !is_neighbor && i < 4 && *va; ++i, ++va) { + org_idx_a = RE_vertren_get_origindex(obr, *va, false); + for (j = 0, vb = &b->v1; !is_neighbor && j < 4 && *vb; ++j, ++vb) { + if (*va == *vb) { + is_neighbor = true; + } + else if (org_idx_a) { + org_idx_b = RE_vertren_get_origindex(obr, *vb, 0); + if (org_idx_b && *org_idx_a == *org_idx_b) { + is_neighbor = true; + } + } + } + } + + /* So there's a shared edge or vertex, let's intersect ray with self, if that's true + * we can safely return 1, otherwise we assume the intersection is invalid, 0 */ + if (is_neighbor) { + /* create RayFace from original face, transformed if necessary */ + RayFace origface; + ObjectInstanceRen *ob = (ObjectInstanceRen *)is->orig.ob; + rayface_from_vlak(&origface, ob, (VlakRen *)is->orig.face); + + if (!isec_tri_quad_neighbour(is->start, is->dir, &origface)) { + return 0; + } + } + } + } + + RE_RC_COUNT(is->raycounter->faces.hit); + + is->isect = ok; // which half of the quad + is->dist = dist; + is->u = uv[0]; is->v = uv[1]; + + is->hit.ob = face->ob; + is->hit.face = face->face; +#ifdef RT_USE_LAST_HIT + is->last_hit = hit_obj; +#endif + return 1; + } + + return 0; +} + +/* Intersection */ + +int RE_rayobject_raycast(RayObject *r, Isect *isec) +{ + int i; + + /* Pre-calculate orientation for watertight intersection checks. */ + isect_ray_tri_watertight_v3_precalc(&isec->isect_precalc, isec->dir); + + RE_RC_COUNT(isec->raycounter->raycast.test); + + /* setup vars used on raycast */ + for (i = 0; i < 3; i++) { + isec->idot_axis[i] = 1.0f / isec->dir[i]; + + isec->bv_index[2 * i] = isec->idot_axis[i] < 0.0f ? 1 : 0; + isec->bv_index[2 * i + 1] = 1 - isec->bv_index[2 * i]; + + isec->bv_index[2 * i] = i + 3 * isec->bv_index[2 * i]; + isec->bv_index[2 * i + 1] = i + 3 * isec->bv_index[2 * i + 1]; + } + +#ifdef RT_USE_LAST_HIT + /* last hit heuristic */ + if (isec->mode == RE_RAY_SHADOW && isec->last_hit) { + RE_RC_COUNT(isec->raycounter->rayshadow_last_hit.test); + + if (RE_rayobject_intersect(isec->last_hit, isec)) { + RE_RC_COUNT(isec->raycounter->raycast.hit); + RE_RC_COUNT(isec->raycounter->rayshadow_last_hit.hit); + return 1; + } + } +#endif + +#ifdef RT_USE_HINT + isec->hit_hint = 0; +#endif + + if (RE_rayobject_intersect(r, isec)) { + RE_RC_COUNT(isec->raycounter->raycast.hit); + +#ifdef RT_USE_HINT + isec->hint = isec->hit_hint; +#endif + return 1; + } + + return 0; +} + +int RE_rayobject_intersect(RayObject *r, Isect *i) +{ + if (RE_rayobject_isRayFace(r)) { + return intersect_rayface(r, (RayFace *) RE_rayobject_align(r), i); + } + else if (RE_rayobject_isVlakPrimitive(r)) { + //TODO optimize (useless copy to RayFace to avoid duplicate code) + VlakPrimitive *face = (VlakPrimitive *) RE_rayobject_align(r); + RayFace nface; + rayface_from_vlak(&nface, face->ob, face->face); + + return intersect_rayface(r, &nface, i); + } + else if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + return r->api->raycast(r, i); + } + else { + assert(0); + return 0; + } +} + +/* Building */ + +void RE_rayobject_add(RayObject *r, RayObject *o) +{ + r = RE_rayobject_align(r); + return r->api->add(r, o); +} + +void RE_rayobject_done(RayObject *r) +{ + r = RE_rayobject_align(r); + r->api->done(r); +} + +void RE_rayobject_free(RayObject *r) +{ + r = RE_rayobject_align(r); + r->api->free(r); +} + +float RE_rayobject_cost(RayObject *r) +{ + if (RE_rayobject_isRayFace(r) || RE_rayobject_isVlakPrimitive(r)) { + return 1.0f; + } + else if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + return r->api->cost(r); + } + else { + assert(0); + return 1.0f; + } +} + +/* Bounding Boxes */ + +void RE_rayobject_merge_bb(RayObject *r, float min[3], float max[3]) +{ + if (RE_rayobject_isRayFace(r)) { + RayFace *face = (RayFace *) RE_rayobject_align(r); + + DO_MINMAX(face->v1, min, max); + DO_MINMAX(face->v2, min, max); + DO_MINMAX(face->v3, min, max); + if (RE_rayface_isQuad(face)) DO_MINMAX(face->v4, min, max); + } + else if (RE_rayobject_isVlakPrimitive(r)) { + VlakPrimitive *face = (VlakPrimitive *) RE_rayobject_align(r); + RayFace nface; + rayface_from_vlak(&nface, face->ob, face->face); + + DO_MINMAX(nface.v1, min, max); + DO_MINMAX(nface.v2, min, max); + DO_MINMAX(nface.v3, min, max); + if (RE_rayface_isQuad(&nface)) DO_MINMAX(nface.v4, min, max); + } + else if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + r->api->bb(r, min, max); + } + else + assert(0); +} + +/* Hints */ + +void RE_rayobject_hint_bb(RayObject *r, RayHint *hint, float *min, float *max) +{ + if (RE_rayobject_isRayFace(r) || RE_rayobject_isVlakPrimitive(r)) { + return; + } + else if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + return r->api->hint_bb(r, hint, min, max); + } + else + assert(0); +} + +/* RayObjectControl */ + +int RE_rayobjectcontrol_test_break(RayObjectControl *control) +{ + if (control->test_break) + return control->test_break(control->data); + + return 0; +} + +void RE_rayobject_set_control(RayObject *r, void *data, RE_rayobjectcontrol_test_break_callback test_break) +{ + if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + r->control.data = data; + r->control.test_break = test_break; + } +} + diff --git a/source/blender/render/intern/raytrace/rayobject_hint.h b/source/blender/render/intern/raytrace/rayobject_hint.h new file mode 100644 index 00000000000..88a32819bd2 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_hint.h @@ -0,0 +1,72 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_hint.h + * \ingroup render + */ + + +#ifndef __RAYOBJECT_HINT_H__ +#define __RAYOBJECT_HINT_H__ + +#define HINT_RECURSE 1 +#define HINT_ACCEPT 0 +#define HINT_DISCARD -1 + +struct HintBB { + float bb[6]; +}; + +inline int hint_test_bb(HintBB *obj, float *Nmin, float *Nmax) +{ + if (bb_fits_inside(Nmin, Nmax, obj->bb, obj->bb + 3) ) + return HINT_RECURSE; + else + return HINT_ACCEPT; +} +#if 0 +struct HintFrustum { + float co[3]; + float no[4][3]; +}; + +inline int hint_test_bb(HintFrustum &obj, float *Nmin, float *Nmax) +{ + //if frustum inside BB + { + return HINT_RECURSE; + } + //if BB outside frustum + { + return HINT_DISCARD; + } + + return HINT_ACCEPT; +} +#endif + +#endif /* __RAYOBJECT_HINT_H__ */ diff --git a/source/blender/render/intern/raytrace/rayobject_instance.cpp b/source/blender/render/intern/raytrace/rayobject_instance.cpp new file mode 100644 index 00000000000..361e7963d96 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_instance.cpp @@ -0,0 +1,211 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_instance.cpp + * \ingroup render + */ + + +#include <assert.h> + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +#include "rayintersection.h" +#include "rayobject.h" + +#define RE_COST_INSTANCE (1.0f) + +static int RE_rayobject_instance_intersect(RayObject *o, Isect *isec); +static void RE_rayobject_instance_free(RayObject *o); +static void RE_rayobject_instance_bb(RayObject *o, float *min, float *max); +static float RE_rayobject_instance_cost(RayObject *o); + +static void RE_rayobject_instance_hint_bb(RayObject *UNUSED(o), RayHint *UNUSED(hint), + float *UNUSED(min), float *UNUSED(max)) +{} + +static RayObjectAPI instance_api = +{ + RE_rayobject_instance_intersect, + NULL, //static void RE_rayobject_instance_add(RayObject *o, RayObject *ob); + NULL, //static void RE_rayobject_instance_done(RayObject *o); + RE_rayobject_instance_free, + RE_rayobject_instance_bb, + RE_rayobject_instance_cost, + RE_rayobject_instance_hint_bb +}; + +typedef struct InstanceRayObject { + RayObject rayobj; + RayObject *target; + + void *ob; //Object represented by this instance + void *target_ob; //Object represented by the inner RayObject, needed to handle self-intersection + + float global2target[4][4]; + float target2global[4][4]; + +} InstanceRayObject; + + +RayObject *RE_rayobject_instance_create(RayObject *target, float transform[4][4], void *ob, void *target_ob) +{ + InstanceRayObject *obj = (InstanceRayObject *)MEM_callocN(sizeof(InstanceRayObject), "InstanceRayObject"); + assert(RE_rayobject_isAligned(obj) ); /* RayObject API assumes real data to be 4-byte aligned */ + + obj->rayobj.api = &instance_api; + obj->target = target; + obj->ob = ob; + obj->target_ob = target_ob; + + copy_m4_m4(obj->target2global, transform); + invert_m4_m4(obj->global2target, obj->target2global); + + return RE_rayobject_unalignRayAPI((RayObject *) obj); +} + +static int RE_rayobject_instance_intersect(RayObject *o, Isect *isec) +{ + InstanceRayObject *obj = (InstanceRayObject *)o; + float start[3], dir[3], idot_axis[3], dist; + int changed = 0, i, res; + + // TODO - this is disabling self intersection on instances + if (isec->orig.ob == obj->ob && obj->ob) { + changed = 1; + isec->orig.ob = obj->target_ob; + } + + // backup old values + copy_v3_v3(start, isec->start); + copy_v3_v3(dir, isec->dir); + copy_v3_v3(idot_axis, isec->idot_axis); + dist = isec->dist; + + // transform to target coordinates system + mul_m4_v3(obj->global2target, isec->start); + mul_mat3_m4_v3(obj->global2target, isec->dir); + isec->dist *= normalize_v3(isec->dir); + + // update idot_axis and bv_index + for (i = 0; i < 3; i++) { + isec->idot_axis[i] = 1.0f / isec->dir[i]; + + isec->bv_index[2 * i] = isec->idot_axis[i] < 0.0f ? 1 : 0; + isec->bv_index[2 * i + 1] = 1 - isec->bv_index[2 * i]; + + isec->bv_index[2 * i] = i + 3 * isec->bv_index[2 * i]; + isec->bv_index[2 * i + 1] = i + 3 * isec->bv_index[2 * i + 1]; + } + + // Pre-calculate orientation for watertight intersection checks. + isect_ray_tri_watertight_v3_precalc(&isec->isect_precalc, isec->dir); + + // raycast + res = RE_rayobject_intersect(obj->target, isec); + + // map dist into original coordinate space + if (res == 0) { + isec->dist = dist; + } + else { + // note we don't just multiply dist, because of possible + // non-uniform scaling in the transform matrix + float vec[3]; + + mul_v3_v3fl(vec, isec->dir, isec->dist); + mul_mat3_m4_v3(obj->target2global, vec); + + isec->dist = len_v3(vec); + isec->hit.ob = obj->ob; + +#ifdef RT_USE_LAST_HIT + // TODO support for last hit optimization in instances that can jump + // directly to the last hit face. + // For now it jumps directly to the last-hit instance root node. + isec->last_hit = RE_rayobject_unalignRayAPI((RayObject *) obj); +#endif + } + + // restore values + copy_v3_v3(isec->start, start); + copy_v3_v3(isec->dir, dir); + copy_v3_v3(isec->idot_axis, idot_axis); + + if (changed) + isec->orig.ob = obj->ob; + + // restore bv_index + for (i = 0; i < 3; i++) { + isec->bv_index[2 * i] = isec->idot_axis[i] < 0.0f ? 1 : 0; + isec->bv_index[2 * i + 1] = 1 - isec->bv_index[2 * i]; + + isec->bv_index[2 * i] = i + 3 * isec->bv_index[2 * i]; + isec->bv_index[2 * i + 1] = i + 3 * isec->bv_index[2 * i + 1]; + } + + // Pre-calculate orientation for watertight intersection checks. + isect_ray_tri_watertight_v3_precalc(&isec->isect_precalc, isec->dir); + + return res; +} + +static void RE_rayobject_instance_free(RayObject *o) +{ + InstanceRayObject *obj = (InstanceRayObject *)o; + MEM_freeN(obj); +} + +static float RE_rayobject_instance_cost(RayObject *o) +{ + InstanceRayObject *obj = (InstanceRayObject *)o; + return RE_rayobject_cost(obj->target) + RE_COST_INSTANCE; +} + +static void RE_rayobject_instance_bb(RayObject *o, float *min, float *max) +{ + //TODO: + // *better bb.. calculated without rotations of bb + // *maybe cache that better-fitted-BB at the InstanceRayObject + InstanceRayObject *obj = (InstanceRayObject *)o; + + float m[3], M[3], t[3]; + int i, j; + INIT_MINMAX(m, M); + RE_rayobject_merge_bb(obj->target, m, M); + + //There must be a faster way than rotating all the 8 vertexs of the BB + for (i = 0; i < 8; i++) { + for (j = 0; j < 3; j++) t[j] = (i & (1 << j)) ? M[j] : m[j]; + mul_m4_v3(obj->target2global, t); + DO_MINMAX(t, min, max); + } +} + diff --git a/source/blender/render/intern/raytrace/rayobject_octree.cpp b/source/blender/render/intern/raytrace/rayobject_octree.cpp new file mode 100644 index 00000000000..4b73e64ca45 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_octree.cpp @@ -0,0 +1,1101 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 1990-1998 NeoGeo BV. + * All rights reserved. + * + * Contributors: 2004/2005 Blender Foundation, full recode + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_octree.cpp + * \ingroup render + */ + + +/* IMPORTANT NOTE: this code must be independent of any other render code + * to use it outside the renderer! */ + +#include <math.h> +#include <string.h> +#include <stdlib.h> +#include <float.h> +#include <assert.h> + +#include "MEM_guardedalloc.h" + +#include "DNA_material_types.h" + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +#include "rayintersection.h" +#include "rayobject.h" + +/* ********** structs *************** */ +#define BRANCH_ARRAY 1024 +#define NODE_ARRAY 4096 + +typedef struct Branch { + struct Branch *b[8]; +} Branch; + +typedef struct OcVal { + short ocx, ocy, ocz; +} OcVal; + +typedef struct Node { + struct RayFace *v[8]; + struct OcVal ov[8]; + struct Node *next; +} Node; + +typedef struct Octree { + RayObject rayobj; + + struct Branch **adrbranch; + struct Node **adrnode; + float ocsize; /* ocsize: mult factor, max size octree */ + float ocfacx, ocfacy, ocfacz; + float min[3], max[3]; + int ocres; + int branchcount, nodecount; + + /* during building only */ + char *ocface; + + RayFace **ro_nodes; + int ro_nodes_size, ro_nodes_used; + +} Octree; + +static int RE_rayobject_octree_intersect(RayObject *o, Isect *isec); +static void RE_rayobject_octree_add(RayObject *o, RayObject *ob); +static void RE_rayobject_octree_done(RayObject *o); +static void RE_rayobject_octree_free(RayObject *o); +static void RE_rayobject_octree_bb(RayObject *o, float *min, float *max); + +/* + * This function is not expected to be called by current code state. + */ +static float RE_rayobject_octree_cost(RayObject *UNUSED(o)) +{ + return 1.0; +} + +static void RE_rayobject_octree_hint_bb(RayObject *UNUSED(o), RayHint *UNUSED(hint), + float *UNUSED(min), float *UNUSED(max)) +{ + return; +} + +static RayObjectAPI octree_api = +{ + RE_rayobject_octree_intersect, + RE_rayobject_octree_add, + RE_rayobject_octree_done, + RE_rayobject_octree_free, + RE_rayobject_octree_bb, + RE_rayobject_octree_cost, + RE_rayobject_octree_hint_bb +}; + +/* **************** ocval method ******************* */ +/* within one octree node, a set of 3x15 bits defines a 'boundbox' to OR with */ + +#define OCVALRES 15 +#define BROW16(min, max) \ + (((max) >= OCVALRES ? 0xFFFF : (1 << ((max) + 1)) - 1) - (((min) > 0) ? ((1 << (min)) - 1) : 0)) + +static void calc_ocval_face(float *v1, float *v2, float *v3, float *v4, short x, short y, short z, OcVal *ov) +{ + float min[3], max[3]; + int ocmin, ocmax; + + copy_v3_v3(min, v1); + copy_v3_v3(max, v1); + DO_MINMAX(v2, min, max); + DO_MINMAX(v3, min, max); + if (v4) { + DO_MINMAX(v4, min, max); + } + + ocmin = OCVALRES * (min[0] - x); + ocmax = OCVALRES * (max[0] - x); + ov->ocx = BROW16(ocmin, ocmax); + + ocmin = OCVALRES * (min[1] - y); + ocmax = OCVALRES * (max[1] - y); + ov->ocy = BROW16(ocmin, ocmax); + + ocmin = OCVALRES * (min[2] - z); + ocmax = OCVALRES * (max[2] - z); + ov->ocz = BROW16(ocmin, ocmax); + +} + +static void calc_ocval_ray(OcVal *ov, float xo, float yo, float zo, float *vec1, float *vec2) +{ + int ocmin, ocmax; + + if (vec1[0] < vec2[0]) { + ocmin = OCVALRES * (vec1[0] - xo); + ocmax = OCVALRES * (vec2[0] - xo); + } + else { + ocmin = OCVALRES * (vec2[0] - xo); + ocmax = OCVALRES * (vec1[0] - xo); + } + ov->ocx = BROW16(ocmin, ocmax); + + if (vec1[1] < vec2[1]) { + ocmin = OCVALRES * (vec1[1] - yo); + ocmax = OCVALRES * (vec2[1] - yo); + } + else { + ocmin = OCVALRES * (vec2[1] - yo); + ocmax = OCVALRES * (vec1[1] - yo); + } + ov->ocy = BROW16(ocmin, ocmax); + + if (vec1[2] < vec2[2]) { + ocmin = OCVALRES * (vec1[2] - zo); + ocmax = OCVALRES * (vec2[2] - zo); + } + else { + ocmin = OCVALRES * (vec2[2] - zo); + ocmax = OCVALRES * (vec1[2] - zo); + } + ov->ocz = BROW16(ocmin, ocmax); +} + +/* ************* octree ************** */ + +static Branch *addbranch(Octree *oc, Branch *br, short ocb) +{ + int index; + + if (br->b[ocb]) return br->b[ocb]; + + oc->branchcount++; + index = oc->branchcount >> 12; + + if (oc->adrbranch[index] == NULL) + oc->adrbranch[index] = (Branch *)MEM_callocN(4096 * sizeof(Branch), "new oc branch"); + + if (oc->branchcount >= BRANCH_ARRAY * 4096) { + printf("error; octree branches full\n"); + oc->branchcount = 0; + } + + return br->b[ocb] = oc->adrbranch[index] + (oc->branchcount & 4095); +} + +static Node *addnode(Octree *oc) +{ + int index; + + oc->nodecount++; + index = oc->nodecount >> 12; + + if (oc->adrnode[index] == NULL) + oc->adrnode[index] = (Node *)MEM_callocN(4096 * sizeof(Node), "addnode"); + + if (oc->nodecount > NODE_ARRAY * NODE_ARRAY) { + printf("error; octree nodes full\n"); + oc->nodecount = 0; + } + + return oc->adrnode[index] + (oc->nodecount & 4095); +} + +static bool face_in_node(RayFace *face, short x, short y, short z, float rtf[4][3]) +{ + static float nor[3], d; + float fx, fy, fz; + + // init static vars + if (face) { + normal_tri_v3(nor, rtf[0], rtf[1], rtf[2]); + d = -nor[0] * rtf[0][0] - nor[1] * rtf[0][1] - nor[2] * rtf[0][2]; + return 0; + } + + fx = x; + fy = y; + fz = z; + + if ((fx) * nor[0] + (fy) * nor[1] + (fz) * nor[2] + d > 0.0f) { + if ((fx + 1) * nor[0] + (fy ) * nor[1] + (fz ) * nor[2] + d < 0.0f) return 1; + if ((fx ) * nor[0] + (fy + 1) * nor[1] + (fz ) * nor[2] + d < 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz ) * nor[2] + d < 0.0f) return 1; + + if ((fx ) * nor[0] + (fy ) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy ) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1; + if ((fx ) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1; + } + else { + if ((fx + 1) * nor[0] + (fy ) * nor[1] + (fz ) * nor[2] + d > 0.0f) return 1; + if ((fx ) * nor[0] + (fy + 1) * nor[1] + (fz ) * nor[2] + d > 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz ) * nor[2] + d > 0.0f) return 1; + + if ((fx ) * nor[0] + (fy ) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy ) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1; + if ((fx ) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1; + if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1; + } + + return 0; +} + +static void ocwrite(Octree *oc, RayFace *face, int quad, short x, short y, short z, float rtf[4][3]) +{ + Branch *br; + Node *no; + short a, oc0, oc1, oc2, oc3, oc4, oc5; + + x <<= 2; + y <<= 1; + + br = oc->adrbranch[0]; + + if (oc->ocres == 512) { + oc0 = ((x & 1024) + (y & 512) + (z & 256)) >> 8; + br = addbranch(oc, br, oc0); + } + if (oc->ocres >= 256) { + oc0 = ((x & 512) + (y & 256) + (z & 128)) >> 7; + br = addbranch(oc, br, oc0); + } + if (oc->ocres >= 128) { + oc0 = ((x & 256) + (y & 128) + (z & 64)) >> 6; + br = addbranch(oc, br, oc0); + } + + oc0 = ((x & 128) + (y & 64) + (z & 32)) >> 5; + oc1 = ((x & 64) + (y & 32) + (z & 16)) >> 4; + oc2 = ((x & 32) + (y & 16) + (z & 8)) >> 3; + oc3 = ((x & 16) + (y & 8) + (z & 4)) >> 2; + oc4 = ((x & 8) + (y & 4) + (z & 2)) >> 1; + oc5 = ((x & 4) + (y & 2) + (z & 1)); + + br = addbranch(oc, br, oc0); + br = addbranch(oc, br, oc1); + br = addbranch(oc, br, oc2); + br = addbranch(oc, br, oc3); + br = addbranch(oc, br, oc4); + no = (Node *)br->b[oc5]; + if (no == NULL) br->b[oc5] = (Branch *)(no = addnode(oc)); + + while (no->next) no = no->next; + + a = 0; + if (no->v[7]) { /* node full */ + no->next = addnode(oc); + no = no->next; + } + else { + while (no->v[a] != NULL) a++; + } + + no->v[a] = (RayFace *) RE_rayobject_align(face); + + if (quad) + calc_ocval_face(rtf[0], rtf[1], rtf[2], rtf[3], x >> 2, y >> 1, z, &no->ov[a]); + else + calc_ocval_face(rtf[0], rtf[1], rtf[2], NULL, x >> 2, y >> 1, z, &no->ov[a]); +} + +static void d2dda(Octree *oc, short b1, short b2, short c1, short c2, char *ocface, short rts[4][3], float rtf[4][3]) +{ + int ocx1, ocx2, ocy1, ocy2; + int x, y, dx = 0, dy = 0; + float ox1, ox2, oy1, oy2; + float lambda, lambda_o, lambda_x, lambda_y, ldx, ldy; + + ocx1 = rts[b1][c1]; + ocy1 = rts[b1][c2]; + ocx2 = rts[b2][c1]; + ocy2 = rts[b2][c2]; + + if (ocx1 == ocx2 && ocy1 == ocy2) { + ocface[oc->ocres * ocx1 + ocy1] = 1; + return; + } + + ox1 = rtf[b1][c1]; + oy1 = rtf[b1][c2]; + ox2 = rtf[b2][c1]; + oy2 = rtf[b2][c2]; + + if (ox1 != ox2) { + if (ox2 - ox1 > 0.0f) { + lambda_x = (ox1 - ocx1 - 1.0f) / (ox1 - ox2); + ldx = -1.0f / (ox1 - ox2); + dx = 1; + } + else { + lambda_x = (ox1 - ocx1) / (ox1 - ox2); + ldx = 1.0f / (ox1 - ox2); + dx = -1; + } + } + else { + lambda_x = 1.0f; + ldx = 0; + } + + if (oy1 != oy2) { + if (oy2 - oy1 > 0.0f) { + lambda_y = (oy1 - ocy1 - 1.0f) / (oy1 - oy2); + ldy = -1.0f / (oy1 - oy2); + dy = 1; + } + else { + lambda_y = (oy1 - ocy1) / (oy1 - oy2); + ldy = 1.0f / (oy1 - oy2); + dy = -1; + } + } + else { + lambda_y = 1.0f; + ldy = 0; + } + + x = ocx1; y = ocy1; + lambda = MIN2(lambda_x, lambda_y); + + while (true) { + + if (x < 0 || y < 0 || x >= oc->ocres || y >= oc->ocres) { + /* pass*/ + } + else { + ocface[oc->ocres * x + y] = 1; + } + + lambda_o = lambda; + if (lambda_x == lambda_y) { + lambda_x += ldx; + x += dx; + lambda_y += ldy; + y += dy; + } + else { + if (lambda_x < lambda_y) { + lambda_x += ldx; + x += dx; + } + else { + lambda_y += ldy; + y += dy; + } + } + lambda = MIN2(lambda_x, lambda_y); + if (lambda == lambda_o) break; + if (lambda >= 1.0f) break; + } + ocface[oc->ocres * ocx2 + ocy2] = 1; +} + +static void filltriangle(Octree *oc, short c1, short c2, char *ocface, short *ocmin, short *ocmax) +{ + int a, x, y, y1, y2; + + for (x = ocmin[c1]; x <= ocmax[c1]; x++) { + a = oc->ocres * x; + for (y = ocmin[c2]; y <= ocmax[c2]; y++) { + if (ocface[a + y]) { + y++; + while (ocface[a + y] && y != ocmax[c2]) y++; + for (y1 = ocmax[c2]; y1 > y; y1--) { + if (ocface[a + y1]) { + for (y2 = y; y2 <= y1; y2++) ocface[a + y2] = 1; + y1 = 0; + } + } + y = ocmax[c2]; + } + } + } +} + +static void RE_rayobject_octree_free(RayObject *tree) +{ + Octree *oc = (Octree *)tree; + +#if 0 + printf("branches %d nodes %d\n", oc->branchcount, oc->nodecount); + printf("raycount %d\n", raycount); + printf("ray coherent %d\n", coherent_ray); + printf("accepted %d rejected %d\n", accepted, rejected); +#endif + if (oc->ocface) + MEM_freeN(oc->ocface); + + if (oc->adrbranch) { + int a = 0; + while (oc->adrbranch[a]) { + MEM_freeN(oc->adrbranch[a]); + oc->adrbranch[a] = NULL; + a++; + } + MEM_freeN(oc->adrbranch); + oc->adrbranch = NULL; + } + oc->branchcount = 0; + + if (oc->adrnode) { + int a = 0; + while (oc->adrnode[a]) { + MEM_freeN(oc->adrnode[a]); + oc->adrnode[a] = NULL; + a++; + } + MEM_freeN(oc->adrnode); + oc->adrnode = NULL; + } + oc->nodecount = 0; + + MEM_freeN(oc); +} + + +RayObject *RE_rayobject_octree_create(int ocres, int size) +{ + Octree *oc = (Octree *)MEM_callocN(sizeof(Octree), "Octree"); + assert(RE_rayobject_isAligned(oc) ); /* RayObject API assumes real data to be 4-byte aligned */ + + oc->rayobj.api = &octree_api; + + oc->ocres = ocres; + + oc->ro_nodes = (RayFace **)MEM_callocN(sizeof(RayFace *) * size, "octree rayobject nodes"); + oc->ro_nodes_size = size; + oc->ro_nodes_used = 0; + + + return RE_rayobject_unalignRayAPI((RayObject *) oc); +} + + +static void RE_rayobject_octree_add(RayObject *tree, RayObject *node) +{ + Octree *oc = (Octree *)tree; + + assert(RE_rayobject_isRayFace(node) ); + assert(oc->ro_nodes_used < oc->ro_nodes_size); + oc->ro_nodes[oc->ro_nodes_used++] = (RayFace *)RE_rayobject_align(node); +} + +static void octree_fill_rayface(Octree *oc, RayFace *face) +{ + float ocfac[3], rtf[4][3]; + float co1[3], co2[3], co3[3], co4[3]; + short rts[4][3]; + short ocmin[3], ocmax[3]; + char *ocface = oc->ocface; // front, top, size view of face, to fill in + int a, b, c, oc1, oc2, oc3, oc4, x, y, z, ocres2; + + ocfac[0] = oc->ocfacx; + ocfac[1] = oc->ocfacy; + ocfac[2] = oc->ocfacz; + + ocres2 = oc->ocres * oc->ocres; + + copy_v3_v3(co1, face->v1); + copy_v3_v3(co2, face->v2); + copy_v3_v3(co3, face->v3); + if (RE_rayface_isQuad(face)) + copy_v3_v3(co4, face->v4); + + for (c = 0; c < 3; c++) { + rtf[0][c] = (co1[c] - oc->min[c]) * ocfac[c]; + rts[0][c] = (short)rtf[0][c]; + rtf[1][c] = (co2[c] - oc->min[c]) * ocfac[c]; + rts[1][c] = (short)rtf[1][c]; + rtf[2][c] = (co3[c] - oc->min[c]) * ocfac[c]; + rts[2][c] = (short)rtf[2][c]; + if (RE_rayface_isQuad(face)) { + rtf[3][c] = (co4[c] - oc->min[c]) * ocfac[c]; + rts[3][c] = (short)rtf[3][c]; + } + } + + for (c = 0; c < 3; c++) { + oc1 = rts[0][c]; + oc2 = rts[1][c]; + oc3 = rts[2][c]; + if (!RE_rayface_isQuad(face)) { + ocmin[c] = min_iii(oc1, oc2, oc3); + ocmax[c] = max_iii(oc1, oc2, oc3); + } + else { + oc4 = rts[3][c]; + ocmin[c] = min_iiii(oc1, oc2, oc3, oc4); + ocmax[c] = max_iiii(oc1, oc2, oc3, oc4); + } + if (ocmax[c] > oc->ocres - 1) ocmax[c] = oc->ocres - 1; + if (ocmin[c] < 0) ocmin[c] = 0; + } + + if (ocmin[0] == ocmax[0] && ocmin[1] == ocmax[1] && ocmin[2] == ocmax[2]) { + ocwrite(oc, face, RE_rayface_isQuad(face), ocmin[0], ocmin[1], ocmin[2], rtf); + } + else { + + d2dda(oc, 0, 1, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 0, 1, 0, 2, ocface, rts, rtf); + d2dda(oc, 0, 1, 1, 2, ocface + 2 * ocres2, rts, rtf); + d2dda(oc, 1, 2, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 1, 2, 0, 2, ocface, rts, rtf); + d2dda(oc, 1, 2, 1, 2, ocface + 2 * ocres2, rts, rtf); + if (!RE_rayface_isQuad(face)) { + d2dda(oc, 2, 0, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 2, 0, 0, 2, ocface, rts, rtf); + d2dda(oc, 2, 0, 1, 2, ocface + 2 * ocres2, rts, rtf); + } + else { + d2dda(oc, 2, 3, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 2, 3, 0, 2, ocface, rts, rtf); + d2dda(oc, 2, 3, 1, 2, ocface + 2 * ocres2, rts, rtf); + d2dda(oc, 3, 0, 0, 1, ocface + ocres2, rts, rtf); + d2dda(oc, 3, 0, 0, 2, ocface, rts, rtf); + d2dda(oc, 3, 0, 1, 2, ocface + 2 * ocres2, rts, rtf); + } + /* nothing todo with triangle..., just fills :) */ + filltriangle(oc, 0, 1, ocface + ocres2, ocmin, ocmax); + filltriangle(oc, 0, 2, ocface, ocmin, ocmax); + filltriangle(oc, 1, 2, ocface + 2 * ocres2, ocmin, ocmax); + + /* init static vars here */ + face_in_node(face, 0, 0, 0, rtf); + + for (x = ocmin[0]; x <= ocmax[0]; x++) { + a = oc->ocres * x; + for (y = ocmin[1]; y <= ocmax[1]; y++) { + if (ocface[a + y + ocres2]) { + b = oc->ocres * y + 2 * ocres2; + for (z = ocmin[2]; z <= ocmax[2]; z++) { + if (ocface[b + z] && ocface[a + z]) { + if (face_in_node(NULL, x, y, z, rtf)) + ocwrite(oc, face, RE_rayface_isQuad(face), x, y, z, rtf); + } + } + } + } + } + + /* same loops to clear octree, doubt it can be done smarter */ + for (x = ocmin[0]; x <= ocmax[0]; x++) { + a = oc->ocres * x; + for (y = ocmin[1]; y <= ocmax[1]; y++) { + /* x-y */ + ocface[a + y + ocres2] = 0; + + b = oc->ocres * y + 2 * ocres2; + for (z = ocmin[2]; z <= ocmax[2]; z++) { + /* y-z */ + ocface[b + z] = 0; + /* x-z */ + ocface[a + z] = 0; + } + } + } + } +} + +static void RE_rayobject_octree_done(RayObject *tree) +{ + Octree *oc = (Octree *)tree; + int c; + float t00, t01, t02; + int ocres2 = oc->ocres * oc->ocres; + + INIT_MINMAX(oc->min, oc->max); + + /* Calculate Bounding Box */ + for (c = 0; c < oc->ro_nodes_used; c++) + RE_rayobject_merge_bb(RE_rayobject_unalignRayFace(oc->ro_nodes[c]), oc->min, oc->max); + + /* Alloc memory */ + oc->adrbranch = (Branch **)MEM_callocN(sizeof(void *) * BRANCH_ARRAY, "octree branches"); + oc->adrnode = (Node **)MEM_callocN(sizeof(void *) * NODE_ARRAY, "octree nodes"); + + oc->adrbranch[0] = (Branch *)MEM_callocN(4096 * sizeof(Branch), "makeoctree"); + + /* the lookup table, per face, for which nodes to fill in */ + oc->ocface = (char *)MEM_callocN(3 * ocres2 + 8, "ocface"); + memset(oc->ocface, 0, 3 * ocres2); + + for (c = 0; c < 3; c++) { /* octree enlarge, still needed? */ + oc->min[c] -= 0.01f; + oc->max[c] += 0.01f; + } + + t00 = oc->max[0] - oc->min[0]; + t01 = oc->max[1] - oc->min[1]; + t02 = oc->max[2] - oc->min[2]; + + /* this minus 0.1 is old safety... seems to be needed? */ + oc->ocfacx = (oc->ocres - 0.1f) / t00; + oc->ocfacy = (oc->ocres - 0.1f) / t01; + oc->ocfacz = (oc->ocres - 0.1f) / t02; + + oc->ocsize = sqrtf(t00 * t00 + t01 * t01 + t02 * t02); /* global, max size octree */ + + for (c = 0; c < oc->ro_nodes_used; c++) { + octree_fill_rayface(oc, oc->ro_nodes[c]); + } + + MEM_freeN(oc->ocface); + oc->ocface = NULL; + MEM_freeN(oc->ro_nodes); + oc->ro_nodes = NULL; + +#if 0 + printf("%f %f - %f\n", oc->min[0], oc->max[0], oc->ocfacx); + printf("%f %f - %f\n", oc->min[1], oc->max[1], oc->ocfacy); + printf("%f %f - %f\n", oc->min[2], oc->max[2], oc->ocfacz); +#endif +} + +static void RE_rayobject_octree_bb(RayObject *tree, float *min, float *max) +{ + Octree *oc = (Octree *)tree; + DO_MINMAX(oc->min, min, max); + DO_MINMAX(oc->max, min, max); +} + +/* check all faces in this node */ +static int testnode(Octree *UNUSED(oc), Isect *is, Node *no, OcVal ocval) +{ + short nr = 0; + + /* return on any first hit */ + if (is->mode == RE_RAY_SHADOW) { + + for (; no; no = no->next) { + for (nr = 0; nr < 8; nr++) { + RayFace *face = no->v[nr]; + OcVal *ov = no->ov + nr; + + if (!face) break; + + if ( (ov->ocx & ocval.ocx) && (ov->ocy & ocval.ocy) && (ov->ocz & ocval.ocz) ) { + if (RE_rayobject_intersect(RE_rayobject_unalignRayFace(face), is) ) + return 1; + } + } + } + } + else { + /* else mirror or glass or shadowtra, return closest face */ + int found = 0; + + for (; no; no = no->next) { + for (nr = 0; nr < 8; nr++) { + RayFace *face = no->v[nr]; + OcVal *ov = no->ov + nr; + + if (!face) break; + + if ( (ov->ocx & ocval.ocx) && (ov->ocy & ocval.ocy) && (ov->ocz & ocval.ocz) ) { + if (RE_rayobject_intersect(RE_rayobject_unalignRayFace(face), is) ) { + found = 1; + } + } + } + } + + return found; + } + + return 0; +} + +/* find the Node for the octree coord x y z */ +static Node *ocread(Octree *oc, int x, int y, int z) +{ + Branch *br; + int oc1; + + x <<= 2; + y <<= 1; + + br = oc->adrbranch[0]; + + if (oc->ocres == 512) { + oc1 = ((x & 1024) + (y & 512) + (z & 256)) >> 8; + br = br->b[oc1]; + if (br == NULL) { + return NULL; + } + } + if (oc->ocres >= 256) { + oc1 = ((x & 512) + (y & 256) + (z & 128)) >> 7; + br = br->b[oc1]; + if (br == NULL) { + return NULL; + } + } + if (oc->ocres >= 128) { + oc1 = ((x & 256) + (y & 128) + (z & 64)) >> 6; + br = br->b[oc1]; + if (br == NULL) { + return NULL; + } + } + + oc1 = ((x & 128) + (y & 64) + (z & 32)) >> 5; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 64) + (y & 32) + (z & 16)) >> 4; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 32) + (y & 16) + (z & 8)) >> 3; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 16) + (y & 8) + (z & 4)) >> 2; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 8) + (y & 4) + (z & 2)) >> 1; + br = br->b[oc1]; + if (br) { + oc1 = ((x & 4) + (y & 2) + (z & 1)); + return (Node *)br->b[oc1]; + } + } + } + } + } + + return NULL; +} + +static int cliptest(float p, float q, float *u1, float *u2) +{ + float r; + + if (p < 0.0f) { + if (q < p) return 0; + else if (q < 0.0f) { + r = q / p; + if (r > *u2) return 0; + else if (r > *u1) *u1 = r; + } + } + else { + if (p > 0.0f) { + if (q < 0.0f) return 0; + else if (q < p) { + r = q / p; + if (r < *u1) return 0; + else if (r < *u2) *u2 = r; + } + } + else if (q < 0.0f) return 0; + } + return 1; +} + +/* extensive coherence checks/storage cancels out the benefit of it, and gives errors... we + * need better methods, sample code commented out below (ton) */ + +#if 0 + +in top : static int coh_nodes[16 * 16 * 16][6]; +in makeoctree : memset(coh_nodes, 0, sizeof(coh_nodes)); + +static void add_coherence_test(int ocx1, int ocx2, int ocy1, int ocy2, int ocz1, int ocz2) +{ + short *sp; + + sp = coh_nodes[(ocx2 & 15) + 16 * (ocy2 & 15) + 256 * (ocz2 & 15)]; + sp[0] = ocx1; sp[1] = ocy1; sp[2] = ocz1; + sp[3] = ocx2; sp[4] = ocy2; sp[5] = ocz2; + +} + +static int do_coherence_test(int ocx1, int ocx2, int ocy1, int ocy2, int ocz1, int ocz2) +{ + short *sp; + + sp = coh_nodes[(ocx2 & 15) + 16 * (ocy2 & 15) + 256 * (ocz2 & 15)]; + if (sp[0] == ocx1 && sp[1] == ocy1 && sp[2] == ocz1 && + sp[3] == ocx2 && sp[4] == ocy2 && sp[5] == ocz2) return 1; + return 0; +} + +#endif + +/* return 1: found valid intersection */ +/* starts with is->orig.face */ +static int RE_rayobject_octree_intersect(RayObject *tree, Isect *is) +{ + Octree *oc = (Octree *)tree; + Node *no; + OcVal ocval; + float vec1[3], vec2[3], start[3], end[3]; + float u1, u2, ox1, ox2, oy1, oy2, oz1, oz2; + float lambda_o, lambda_x, ldx, lambda_y, ldy, lambda_z, ldz, dda_lambda; + float o_lambda = 0; + int dx, dy, dz; + int xo, yo, zo, c1 = 0; + int ocx1, ocx2, ocy1, ocy2, ocz1, ocz2; + + /* clip with octree */ + if (oc->branchcount == 0) return 0; + + /* do this before intersect calls */ +#if 0 + is->facecontr = NULL; /* to check shared edge */ + is->obcontr = 0; + is->faceisect = is->isect = 0; /* shared edge, quad half flag */ + is->userdata = oc->userdata; +#endif + + copy_v3_v3(start, is->start); + madd_v3_v3v3fl(end, is->start, is->dir, is->dist); + ldx = is->dir[0] * is->dist; + o_lambda = is->dist; + u1 = 0.0f; + u2 = 1.0f; + + /* clip with octree cube */ + if (cliptest(-ldx, start[0] - oc->min[0], &u1, &u2)) { + if (cliptest(ldx, oc->max[0] - start[0], &u1, &u2)) { + ldy = is->dir[1] * is->dist; + if (cliptest(-ldy, start[1] - oc->min[1], &u1, &u2)) { + if (cliptest(ldy, oc->max[1] - start[1], &u1, &u2)) { + ldz = is->dir[2] * is->dist; + if (cliptest(-ldz, start[2] - oc->min[2], &u1, &u2)) { + if (cliptest(ldz, oc->max[2] - start[2], &u1, &u2)) { + c1 = 1; + if (u2 < 1.0f) { + end[0] = start[0] + u2 * ldx; + end[1] = start[1] + u2 * ldy; + end[2] = start[2] + u2 * ldz; + } + + if (u1 > 0.0f) { + start[0] += u1 * ldx; + start[1] += u1 * ldy; + start[2] += u1 * ldz; + } + } + } + } + } + } + } + + if (c1 == 0) return 0; + + /* reset static variables in ocread */ + //ocread(oc, oc->ocres, 0, 0); + + /* setup 3dda to traverse octree */ + ox1 = (start[0] - oc->min[0]) * oc->ocfacx; + oy1 = (start[1] - oc->min[1]) * oc->ocfacy; + oz1 = (start[2] - oc->min[2]) * oc->ocfacz; + ox2 = (end[0] - oc->min[0]) * oc->ocfacx; + oy2 = (end[1] - oc->min[1]) * oc->ocfacy; + oz2 = (end[2] - oc->min[2]) * oc->ocfacz; + + ocx1 = (int)ox1; + ocy1 = (int)oy1; + ocz1 = (int)oz1; + ocx2 = (int)ox2; + ocy2 = (int)oy2; + ocz2 = (int)oz2; + + if (ocx1 == ocx2 && ocy1 == ocy2 && ocz1 == ocz2) { + no = ocread(oc, ocx1, ocy1, ocz1); + if (no) { + /* exact intersection with node */ + vec1[0] = ox1; vec1[1] = oy1; vec1[2] = oz1; + vec2[0] = ox2; vec2[1] = oy2; vec2[2] = oz2; + calc_ocval_ray(&ocval, (float)ocx1, (float)ocy1, (float)ocz1, vec1, vec2); + if (testnode(oc, is, no, ocval) ) return 1; + } + } + else { + int found = 0; + //static int coh_ocx1, coh_ocx2, coh_ocy1, coh_ocy2, coh_ocz1, coh_ocz2; + float dox, doy, doz; + int eqval; + + /* calc lambda en ld */ + dox = ox1 - ox2; + doy = oy1 - oy2; + doz = oz1 - oz2; + + if (dox < -FLT_EPSILON) { + ldx = -1.0f / dox; + lambda_x = (ocx1 - ox1 + 1.0f) * ldx; + dx = 1; + } + else if (dox > FLT_EPSILON) { + ldx = 1.0f / dox; + lambda_x = (ox1 - ocx1) * ldx; + dx = -1; + } + else { + lambda_x = 1.0f; + ldx = 0; + dx = 0; + } + + if (doy < -FLT_EPSILON) { + ldy = -1.0f / doy; + lambda_y = (ocy1 - oy1 + 1.0f) * ldy; + dy = 1; + } + else if (doy > FLT_EPSILON) { + ldy = 1.0f / doy; + lambda_y = (oy1 - ocy1) * ldy; + dy = -1; + } + else { + lambda_y = 1.0f; + ldy = 0; + dy = 0; + } + + if (doz < -FLT_EPSILON) { + ldz = -1.0f / doz; + lambda_z = (ocz1 - oz1 + 1.0f) * ldz; + dz = 1; + } + else if (doz > FLT_EPSILON) { + ldz = 1.0f / doz; + lambda_z = (oz1 - ocz1) * ldz; + dz = -1; + } + else { + lambda_z = 1.0f; + ldz = 0; + dz = 0; + } + + xo = ocx1; yo = ocy1; zo = ocz1; + dda_lambda = min_fff(lambda_x, lambda_y, lambda_z); + + vec2[0] = ox1; + vec2[1] = oy1; + vec2[2] = oz1; + + /* this loop has been constructed to make sure the first and last node of ray + * are always included, even when dda_lambda==1.0f or larger */ + + while (true) { + + no = ocread(oc, xo, yo, zo); + if (no) { + + /* calculate ray intersection with octree node */ + copy_v3_v3(vec1, vec2); + // dox, y, z is negative + vec2[0] = ox1 - dda_lambda * dox; + vec2[1] = oy1 - dda_lambda * doy; + vec2[2] = oz1 - dda_lambda * doz; + calc_ocval_ray(&ocval, (float)xo, (float)yo, (float)zo, vec1, vec2); + + //is->dist = (u1 + dda_lambda * (u2 - u1)) * o_lambda; + if (testnode(oc, is, no, ocval) ) + found = 1; + + if (is->dist < (u1 + dda_lambda * (u2 - u1)) * o_lambda) + return found; + } + + + lambda_o = dda_lambda; + + /* traversing octree nodes need careful detection of smallest values, with proper + * exceptions for equal lambdas */ + eqval = (lambda_x == lambda_y); + if (lambda_y == lambda_z) eqval += 2; + if (lambda_x == lambda_z) eqval += 4; + + if (eqval) { // only 4 cases exist! + if (eqval == 7) { // x=y=z + xo += dx; lambda_x += ldx; + yo += dy; lambda_y += ldy; + zo += dz; lambda_z += ldz; + } + else if (eqval == 1) { // x=y + if (lambda_y < lambda_z) { + xo += dx; lambda_x += ldx; + yo += dy; lambda_y += ldy; + } + else { + zo += dz; lambda_z += ldz; + } + } + else if (eqval == 2) { // y=z + if (lambda_x < lambda_y) { + xo += dx; lambda_x += ldx; + } + else { + yo += dy; lambda_y += ldy; + zo += dz; lambda_z += ldz; + } + } + else { // x=z + if (lambda_y < lambda_x) { + yo += dy; lambda_y += ldy; + } + else { + xo += dx; lambda_x += ldx; + zo += dz; lambda_z += ldz; + } + } + } + else { // all three different, just three cases exist + eqval = (lambda_x < lambda_y); + if (lambda_y < lambda_z) eqval += 2; + if (lambda_x < lambda_z) eqval += 4; + + if (eqval == 7 || eqval == 5) { // x smallest + xo += dx; lambda_x += ldx; + } + else if (eqval == 2 || eqval == 6) { // y smallest + yo += dy; lambda_y += ldy; + } + else { // z smallest + zo += dz; lambda_z += ldz; + } + + } + + dda_lambda = min_fff(lambda_x, lambda_y, lambda_z); + if (dda_lambda == lambda_o) break; + /* to make sure the last node is always checked */ + if (lambda_o >= 1.0f) break; + } + } + + /* reached end, no intersections found */ + return 0; +} + + + diff --git a/source/blender/render/intern/raytrace/rayobject_qbvh.cpp b/source/blender/render/intern/raytrace/rayobject_qbvh.cpp new file mode 100644 index 00000000000..8e3dd87efd1 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_qbvh.cpp @@ -0,0 +1,160 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_qbvh.cpp + * \ingroup render + */ + + +#include "MEM_guardedalloc.h" + +#include "BLI_utildefines.h" + +#include "vbvh.h" +#include "svbvh.h" +#include "reorganize.h" + +#ifdef __SSE__ + +#define DFS_STACK_SIZE 256 + +struct QBVHTree { + RayObject rayobj; + + SVBVHNode *root; + MemArena *node_arena; + + float cost; + RTBuilder *builder; +}; + + +template<> +void bvh_done<QBVHTree>(QBVHTree *obj) +{ + rtbuild_done(obj->builder, &obj->rayobj.control); + + //TODO find a away to exactly calculate the needed memory + MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "qbvh arena"); + BLI_memarena_use_malloc(arena1); + + MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "qbvh arena 2"); + BLI_memarena_use_malloc(arena2); + BLI_memarena_use_align(arena2, 16); + + //Build and optimize the tree + //TODO do this in 1 pass (half memory usage during building) + VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder); + + if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) { + BLI_memarena_free(arena1); + BLI_memarena_free(arena2); + return; + } + + if (root) { + pushup_simd<VBVHNode, 4>(root); + obj->root = Reorganize_SVBVH<VBVHNode>(arena2).transform(root); + } + else + obj->root = NULL; + + //Free data + BLI_memarena_free(arena1); + + obj->node_arena = arena2; + obj->cost = 1.0; + + rtbuild_free(obj->builder); + obj->builder = NULL; +} + +template<int StackSize> +static int intersect(QBVHTree *obj, Isect *isec) +{ + //TODO renable hint support + if (RE_rayobject_isAligned(obj->root)) { + if (isec->mode == RE_RAY_SHADOW) + return svbvh_node_stack_raycast<StackSize, true>(obj->root, isec); + else + return svbvh_node_stack_raycast<StackSize, false>(obj->root, isec); + } + else + return RE_rayobject_intersect((RayObject *)obj->root, isec); +} + +template<class Tree> +static void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *UNUSED(min), float *UNUSED(max)) +{ + //TODO renable hint support + { + hint->size = 0; + hint->stack[hint->size++] = (RayObject *)tree->root; + } +} +/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */ +template<class Tree, int STACK_SIZE> +static RayObjectAPI make_api() +{ + static RayObjectAPI api = + { + (RE_rayobject_raycast_callback) ((int (*)(Tree *, Isect *)) & intersect<STACK_SIZE>), + (RE_rayobject_add_callback) ((void (*)(Tree *, RayObject *)) & bvh_add<Tree>), + (RE_rayobject_done_callback) ((void (*)(Tree *)) & bvh_done<Tree>), + (RE_rayobject_free_callback) ((void (*)(Tree *)) & bvh_free<Tree>), + (RE_rayobject_merge_bb_callback)((void (*)(Tree *, float *, float *)) & bvh_bb<Tree>), + (RE_rayobject_cost_callback) ((float (*)(Tree *)) & bvh_cost<Tree>), + (RE_rayobject_hint_bb_callback) ((void (*)(Tree *, LCTSHint *, float *, float *)) & bvh_hint_bb<Tree>) + }; + + return api; +} + +template<class Tree> +RayObjectAPI *bvh_get_api(int maxstacksize) +{ + static RayObjectAPI bvh_api256 = make_api<Tree, 1024>(); + + if (maxstacksize <= 1024) return &bvh_api256; + assert(maxstacksize <= 256); + return NULL; +} + +RayObject *RE_rayobject_qbvh_create(int size) +{ + return bvh_create_tree<QBVHTree, DFS_STACK_SIZE>(size); +} + +#else + +RayObject *RE_rayobject_qbvh_create(int UNUSED(size)) +{ + puts("WARNING: SSE disabled at compile time\n"); + return NULL; +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject_raycounter.cpp b/source/blender/render/intern/raytrace/rayobject_raycounter.cpp new file mode 100644 index 00000000000..429c47f1c0f --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_raycounter.cpp @@ -0,0 +1,91 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_raycounter.cpp + * \ingroup render + */ + + +#include "rayobject.h" +#include "raycounter.h" + +#ifdef RE_RAYCOUNTER + +void RE_RC_INFO(RayCounter *info) +{ + printf("----------- Raycast counter --------\n"); + printf("Rays total: %llu\n", info->raycast.test ); + printf("Rays hit: %llu\n", info->raycast.hit ); + printf("\n"); + printf("BB tests: %llu\n", info->bb.test ); + printf("BB hits: %llu\n", info->bb.hit ); + printf("\n"); + printf("SIMD BB tests: %llu\n", info->simd_bb.test ); + printf("SIMD BB hits: %llu\n", info->simd_bb.hit ); + printf("\n"); + printf("Primitives tests: %llu\n", info->faces.test ); + printf("Primitives hits: %llu\n", info->faces.hit ); + printf("------------------------------------\n"); + printf("Shadow last-hit tests per ray: %f\n", info->rayshadow_last_hit.test / ((float)info->raycast.test) ); + printf("Shadow last-hit hits per ray: %f\n", info->rayshadow_last_hit.hit / ((float)info->raycast.test) ); + printf("\n"); + printf("Hint tests per ray: %f\n", info->raytrace_hint.test / ((float)info->raycast.test) ); + printf("Hint hits per ray: %f\n", info->raytrace_hint.hit / ((float)info->raycast.test) ); + printf("\n"); + printf("BB tests per ray: %f\n", info->bb.test / ((float)info->raycast.test) ); + printf("BB hits per ray: %f\n", info->bb.hit / ((float)info->raycast.test) ); + printf("\n"); + printf("SIMD tests per ray: %f\n", info->simd_bb.test / ((float)info->raycast.test) ); + printf("SIMD hits per ray: %f\n", info->simd_bb.hit / ((float)info->raycast.test) ); + printf("\n"); + printf("Primitives tests per ray: %f\n", info->faces.test / ((float)info->raycast.test) ); + printf("Primitives hits per ray: %f\n", info->faces.hit / ((float)info->raycast.test) ); + printf("------------------------------------\n"); +} + +void RE_RC_MERGE(RayCounter *dest, RayCounter *tmp) +{ + dest->faces.test += tmp->faces.test; + dest->faces.hit += tmp->faces.hit; + + dest->bb.test += tmp->bb.test; + dest->bb.hit += tmp->bb.hit; + + dest->simd_bb.test += tmp->simd_bb.test; + dest->simd_bb.hit += tmp->simd_bb.hit; + + dest->raycast.test += tmp->raycast.test; + dest->raycast.hit += tmp->raycast.hit; + + dest->rayshadow_last_hit.test += tmp->rayshadow_last_hit.test; + dest->rayshadow_last_hit.hit += tmp->rayshadow_last_hit.hit; + + dest->raytrace_hint.test += tmp->raytrace_hint.test; + dest->raytrace_hint.hit += tmp->raytrace_hint.hit; +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp b/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp new file mode 100644 index 00000000000..51f89784674 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp @@ -0,0 +1,531 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_rtbuild.cpp + * \ingroup render + */ + + +#include <assert.h> +#include <stdlib.h> +#include <algorithm> + +#if __cplusplus >= 201103L +#include <cmath> +using std::isfinite; +#else +#include <math.h> +#endif + +#include "rayobject_rtbuild.h" + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +static bool selected_node(RTBuilder::Object *node) +{ + return node->selected; +} + +static void rtbuild_init(RTBuilder *b) +{ + b->split_axis = -1; + b->primitives.begin = NULL; + b->primitives.end = NULL; + b->primitives.maxsize = 0; + b->depth = 0; + + for (int i = 0; i < RTBUILD_MAX_CHILDS; i++) + b->child_offset[i] = 0; + + for (int i = 0; i < 3; i++) + b->sorted_begin[i] = b->sorted_end[i] = NULL; + + INIT_MINMAX(b->bb, b->bb + 3); +} + +RTBuilder *rtbuild_create(int size) +{ + RTBuilder *builder = (RTBuilder *) MEM_mallocN(sizeof(RTBuilder), "RTBuilder"); + RTBuilder::Object *memblock = (RTBuilder::Object *)MEM_mallocN(sizeof(RTBuilder::Object) * size, "RTBuilder.objects"); + + + rtbuild_init(builder); + + builder->primitives.begin = builder->primitives.end = memblock; + builder->primitives.maxsize = size; + + for (int i = 0; i < 3; i++) { + builder->sorted_begin[i] = (RTBuilder::Object **)MEM_mallocN(sizeof(RTBuilder::Object *) * size, "RTBuilder.sorted_objects"); + builder->sorted_end[i] = builder->sorted_begin[i]; + } + + + return builder; +} + +void rtbuild_free(RTBuilder *b) +{ + if (b->primitives.begin) MEM_freeN(b->primitives.begin); + + for (int i = 0; i < 3; i++) + if (b->sorted_begin[i]) + MEM_freeN(b->sorted_begin[i]); + + MEM_freeN(b); +} + +void rtbuild_add(RTBuilder *b, RayObject *o) +{ + float bb[6]; + + assert(b->primitives.begin + b->primitives.maxsize != b->primitives.end); + + INIT_MINMAX(bb, bb + 3); + RE_rayobject_merge_bb(o, bb, bb + 3); + + /* skip objects with invalid bounding boxes, nan causes DO_MINMAX + * to do nothing, so we get these invalid values. this shouldn't + * happen usually, but bugs earlier in the pipeline can cause it. */ + if (bb[0] > bb[3] || bb[1] > bb[4] || bb[2] > bb[5]) + return; + /* skip objects with inf bounding boxes */ + if (!isfinite(bb[0]) || !isfinite(bb[1]) || !isfinite(bb[2])) + return; + if (!isfinite(bb[3]) || !isfinite(bb[4]) || !isfinite(bb[5])) + return; + /* skip objects with zero bounding box, they are of no use, and + * will give problems in rtbuild_heuristic_object_split later */ + if (bb[0] == bb[3] && bb[1] == bb[4] && bb[2] == bb[5]) + return; + + copy_v3_v3(b->primitives.end->bb, bb); + copy_v3_v3(b->primitives.end->bb + 3, bb + 3); + b->primitives.end->obj = o; + b->primitives.end->cost = RE_rayobject_cost(o); + + for (int i = 0; i < 3; i++) { + *(b->sorted_end[i]) = b->primitives.end; + b->sorted_end[i]++; + } + b->primitives.end++; +} + +int rtbuild_size(RTBuilder *b) +{ + return b->sorted_end[0] - b->sorted_begin[0]; +} + + +template<class Obj, int Axis> +static bool obj_bb_compare(const Obj &a, const Obj &b) +{ + if (a->bb[Axis] != b->bb[Axis]) + return a->bb[Axis] < b->bb[Axis]; + return a->obj < b->obj; +} + +template<class Item> +static void object_sort(Item *begin, Item *end, int axis) +{ + if (axis == 0) return std::sort(begin, end, obj_bb_compare<Item, 0> ); + if (axis == 1) return std::sort(begin, end, obj_bb_compare<Item, 1> ); + if (axis == 2) return std::sort(begin, end, obj_bb_compare<Item, 2> ); + assert(false); +} + +void rtbuild_done(RTBuilder *b, RayObjectControl *ctrl) +{ + for (int i = 0; i < 3; i++) { + if (b->sorted_begin[i]) { + if (RE_rayobjectcontrol_test_break(ctrl)) break; + object_sort(b->sorted_begin[i], b->sorted_end[i], i); + } + } +} + +RayObject *rtbuild_get_primitive(RTBuilder *b, int index) +{ + return b->sorted_begin[0][index]->obj; +} + +RTBuilder *rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp) +{ + rtbuild_init(tmp); + + tmp->depth = b->depth + 1; + + for (int i = 0; i < 3; i++) + if (b->sorted_begin[i]) { + tmp->sorted_begin[i] = b->sorted_begin[i] + b->child_offset[child]; + tmp->sorted_end[i] = b->sorted_begin[i] + b->child_offset[child + 1]; + } + else { + tmp->sorted_begin[i] = NULL; + tmp->sorted_end[i] = NULL; + } + + return tmp; +} + +static void rtbuild_calc_bb(RTBuilder *b) +{ + if (b->bb[0] == 1.0e30f) { + for (RTBuilder::Object **index = b->sorted_begin[0]; index != b->sorted_end[0]; index++) + RE_rayobject_merge_bb( (*index)->obj, b->bb, b->bb + 3); + } +} + +void rtbuild_merge_bb(RTBuilder *b, float min[3], float max[3]) +{ + rtbuild_calc_bb(b); + DO_MIN(b->bb, min); + DO_MAX(b->bb + 3, max); +} + +#if 0 +int rtbuild_get_largest_axis(RTBuilder *b) +{ + rtbuild_calc_bb(b); + return bb_largest_axis(b->bb, b->bb + 3); +} + +//Left balanced tree +int rtbuild_mean_split(RTBuilder *b, int nchilds, int axis) +{ + int i; + int mleafs_per_child, Mleafs_per_child; + int tot_leafs = rtbuild_size(b); + int missing_leafs; + + long long s; + + assert(nchilds <= RTBUILD_MAX_CHILDS); + + //TODO optimize calc of leafs_per_child + for (s = nchilds; s < tot_leafs; s *= nchilds) ; + Mleafs_per_child = s / nchilds; + mleafs_per_child = Mleafs_per_child / nchilds; + + //split min leafs per child + b->child_offset[0] = 0; + for (i = 1; i <= nchilds; i++) + b->child_offset[i] = mleafs_per_child; + + //split remaining leafs + missing_leafs = tot_leafs - mleafs_per_child * nchilds; + for (i = 1; i <= nchilds; i++) + { + if (missing_leafs > Mleafs_per_child - mleafs_per_child) + { + b->child_offset[i] += Mleafs_per_child - mleafs_per_child; + missing_leafs -= Mleafs_per_child - mleafs_per_child; + } + else { + b->child_offset[i] += missing_leafs; + missing_leafs = 0; + break; + } + } + + //adjust for accumulative offsets + for (i = 1; i <= nchilds; i++) + b->child_offset[i] += b->child_offset[i - 1]; + + //Count created childs + for (i = nchilds; b->child_offset[i] == b->child_offset[i - 1]; i--) ; + split_leafs(b, b->child_offset, i, axis); + + assert(b->child_offset[0] == 0 && b->child_offset[i] == tot_leafs); + return i; +} + + +int rtbuild_mean_split_largest_axis(RTBuilder *b, int nchilds) +{ + int axis = rtbuild_get_largest_axis(b); + return rtbuild_mean_split(b, nchilds, axis); +} +#endif + +/* + * "separators" is an array of dim NCHILDS-1 + * and indicates where to cut the childs + */ +#if 0 +int rtbuild_median_split(RTBuilder *b, float *separators, int nchilds, int axis) +{ + int size = rtbuild_size(b); + + assert(nchilds <= RTBUILD_MAX_CHILDS); + if (size <= nchilds) + { + return rtbuild_mean_split(b, nchilds, axis); + } + else { + int i; + + b->split_axis = axis; + + //Calculate child offsets + b->child_offset[0] = 0; + for (i = 0; i < nchilds - 1; i++) + b->child_offset[i + 1] = split_leafs_by_plane(b, b->child_offset[i], size, separators[i]); + b->child_offset[nchilds] = size; + + for (i = 0; i < nchilds; i++) + if (b->child_offset[i + 1] - b->child_offset[i] == size) + return rtbuild_mean_split(b, nchilds, axis); + + return nchilds; + } +} + +int rtbuild_median_split_largest_axis(RTBuilder *b, int nchilds) +{ + int la, i; + float separators[RTBUILD_MAX_CHILDS]; + + rtbuild_calc_bb(b); + + la = bb_largest_axis(b->bb, b->bb + 3); + for (i = 1; i < nchilds; i++) + separators[i - 1] = (b->bb[la + 3] - b->bb[la]) * i / nchilds; + + return rtbuild_median_split(b, separators, nchilds, la); +} +#endif + +//Heuristics Object Splitter + + +struct SweepCost { + float bb[6]; + float cost; +}; + +/* Object Surface Area Heuristic splitter */ +int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds) +{ + int size = rtbuild_size(b); + assert(nchilds == 2); + assert(size > 1); + int baxis = -1, boffset = 0; + + if (size > nchilds) { + if (b->depth > RTBUILD_MAX_SAH_DEPTH) { + // for degenerate cases we avoid running out of stack space + // by simply splitting the children in the middle + b->child_offset[0] = 0; + b->child_offset[1] = (size+1)/2; + b->child_offset[2] = size; + return 2; + } + + float bcost = FLT_MAX; + baxis = -1; + boffset = size / 2; + + SweepCost *sweep = (SweepCost *)MEM_mallocN(sizeof(SweepCost) * size, "RTBuilder.HeuristicSweep"); + + for (int axis = 0; axis < 3; axis++) { + SweepCost sweep_left; + + RTBuilder::Object **obj = b->sorted_begin[axis]; + +// float right_cost = 0; + for (int i = size - 1; i >= 0; i--) { + if (i == size - 1) { + copy_v3_v3(sweep[i].bb, obj[i]->bb); + copy_v3_v3(sweep[i].bb + 3, obj[i]->bb + 3); + sweep[i].cost = obj[i]->cost; + } + else { + sweep[i].bb[0] = min_ff(obj[i]->bb[0], sweep[i + 1].bb[0]); + sweep[i].bb[1] = min_ff(obj[i]->bb[1], sweep[i + 1].bb[1]); + sweep[i].bb[2] = min_ff(obj[i]->bb[2], sweep[i + 1].bb[2]); + sweep[i].bb[3] = max_ff(obj[i]->bb[3], sweep[i + 1].bb[3]); + sweep[i].bb[4] = max_ff(obj[i]->bb[4], sweep[i + 1].bb[4]); + sweep[i].bb[5] = max_ff(obj[i]->bb[5], sweep[i + 1].bb[5]); + sweep[i].cost = obj[i]->cost + sweep[i + 1].cost; + } +// right_cost += obj[i]->cost; + } + + sweep_left.bb[0] = obj[0]->bb[0]; + sweep_left.bb[1] = obj[0]->bb[1]; + sweep_left.bb[2] = obj[0]->bb[2]; + sweep_left.bb[3] = obj[0]->bb[3]; + sweep_left.bb[4] = obj[0]->bb[4]; + sweep_left.bb[5] = obj[0]->bb[5]; + sweep_left.cost = obj[0]->cost; + +// right_cost -= obj[0]->cost; if (right_cost < 0) right_cost = 0; + + for (int i = 1; i < size; i++) { + //Worst case heuristic (cost of each child is linear) + float hcost, left_side, right_side; + + // not using log seems to have no impact on raytracing perf, but + // makes tree construction quicker, left out for now to test (brecht) + // left_side = bb_area(sweep_left.bb, sweep_left.bb + 3) * (sweep_left.cost + logf((float)i)); + // right_side = bb_area(sweep[i].bb, sweep[i].bb + 3) * (sweep[i].cost + logf((float)size - i)); + left_side = bb_area(sweep_left.bb, sweep_left.bb + 3) * (sweep_left.cost); + right_side = bb_area(sweep[i].bb, sweep[i].bb + 3) * (sweep[i].cost); + hcost = left_side + right_side; + + assert(left_side >= 0); + assert(right_side >= 0); + + if (left_side > bcost) break; //No way we can find a better heuristic in this axis + + assert(hcost >= 0); + // this makes sure the tree built is the same whatever is the order of the sorting axis + if (hcost < bcost || (hcost == bcost && axis < baxis)) { + bcost = hcost; + baxis = axis; + boffset = i; + } + DO_MIN(obj[i]->bb, sweep_left.bb); + DO_MAX(obj[i]->bb + 3, sweep_left.bb + 3); + + sweep_left.cost += obj[i]->cost; +// right_cost -= obj[i]->cost; if (right_cost < 0) right_cost = 0; + } + + //assert(baxis >= 0 && baxis < 3); + if (!(baxis >= 0 && baxis < 3)) + baxis = 0; + } + + + MEM_freeN(sweep); + } + else if (size == 2) { + baxis = 0; + boffset = 1; + } + else if (size == 1) { + b->child_offset[0] = 0; + b->child_offset[1] = 1; + return 1; + } + + b->child_offset[0] = 0; + b->child_offset[1] = boffset; + b->child_offset[2] = size; + + + /* Adjust sorted arrays for childs */ + for (int i = 0; i < boffset; i++) b->sorted_begin[baxis][i]->selected = true; + for (int i = boffset; i < size; i++) b->sorted_begin[baxis][i]->selected = false; + for (int i = 0; i < 3; i++) + std::stable_partition(b->sorted_begin[i], b->sorted_end[i], selected_node); + + return nchilds; +} + +/* + * Helper code + * PARTITION code / used on mean-split + * basically this a std::nth_element (like on C++ STL algorithm) + */ +#if 0 +static void split_leafs(RTBuilder *b, int *nth, int partitions, int split_axis) +{ + int i; + b->split_axis = split_axis; + + for (i = 0; i < partitions - 1; i++) + { + assert(nth[i] < nth[i + 1] && nth[i + 1] < nth[partitions]); + + if (split_axis == 0) std::nth_element(b, nth[i], nth[i + 1], nth[partitions], obj_bb_compare<RTBuilder::Object, 0>); + if (split_axis == 1) std::nth_element(b, nth[i], nth[i + 1], nth[partitions], obj_bb_compare<RTBuilder::Object, 1>); + if (split_axis == 2) std::nth_element(b, nth[i], nth[i + 1], nth[partitions], obj_bb_compare<RTBuilder::Object, 2>); + } +} +#endif + +/* + * Bounding Box utils + */ +float bb_volume(const float min[3], const float max[3]) +{ + return (max[0] - min[0]) * (max[1] - min[1]) * (max[2] - min[2]); +} + +float bb_area(const float min[3], const float max[3]) +{ + float sub[3], a; + sub[0] = max[0] - min[0]; + sub[1] = max[1] - min[1]; + sub[2] = max[2] - min[2]; + + a = (sub[0] * sub[1] + sub[0] * sub[2] + sub[1] * sub[2]) * 2.0f; + /* used to have an assert() here on negative results + * however, in this case its likely some overflow or ffast math error. + * so just return 0.0f instead. */ + return a < 0.0f ? 0.0f : a; +} + +int bb_largest_axis(const float min[3], const float max[3]) +{ + float sub[3]; + + sub[0] = max[0] - min[0]; + sub[1] = max[1] - min[1]; + sub[2] = max[2] - min[2]; + if (sub[0] > sub[1]) { + if (sub[0] > sub[2]) + return 0; + else + return 2; + } + else { + if (sub[1] > sub[2]) + return 1; + else + return 2; + } +} + +/* only returns 0 if merging inner and outerbox would create a box larger than outer box */ +int bb_fits_inside(const float outer_min[3], const float outer_max[3], + const float inner_min[3], const float inner_max[3]) +{ + int i; + for (i = 0; i < 3; i++) + if (outer_min[i] > inner_min[i]) return 0; + + for (i = 0; i < 3; i++) + if (outer_max[i] < inner_max[i]) return 0; + + return 1; +} diff --git a/source/blender/render/intern/raytrace/rayobject_rtbuild.h b/source/blender/render/intern/raytrace/rayobject_rtbuild.h new file mode 100644 index 00000000000..fc42bc36d92 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_rtbuild.h @@ -0,0 +1,125 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_rtbuild.h + * \ingroup render + */ + +#ifndef __RAYOBJECT_RTBUILD_H__ +#define __RAYOBJECT_RTBUILD_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "rayobject.h" + + +/* + * Ray Tree Builder + * this structs helps building any type of tree + * it contains several methods to organize/split nodes + * allowing to create a given tree on the fly. + * + * Idea is that other trees BVH, BIH can use this code to + * generate with simple calls, and then convert to the theirs + * specific structure on the fly. + */ +#define RTBUILD_MAX_CHILDS 32 +#define RTBUILD_MAX_SAH_DEPTH 256 + + +typedef struct RTBuilder { + struct Object { + RayObject *obj; + float cost; + float bb[6]; + int selected; + }; + + /* list to all primitives added in this tree */ + struct { + Object *begin, *end; + int maxsize; + } primitives; + + /* sorted list of rayobjects */ + struct Object **sorted_begin[3], **sorted_end[3]; + + /* axis used (if any) on the split method */ + int split_axis; + + /* child partitions calculated during splitting */ + int child_offset[RTBUILD_MAX_CHILDS + 1]; + +// int child_sorted_axis; /* -1 if not sorted */ + + float bb[6]; + + /* current depth */ + int depth; +} RTBuilder; + +/* used during creation */ +RTBuilder *rtbuild_create(int size); +void rtbuild_free(RTBuilder *b); +void rtbuild_add(RTBuilder *b, RayObject *o); +void rtbuild_done(RTBuilder *b, RayObjectControl *c); +void rtbuild_merge_bb(RTBuilder *b, float min[3], float max[3]); +int rtbuild_size(RTBuilder *b); + +RayObject *rtbuild_get_primitive(RTBuilder *b, int offset); + +/* used during tree reorganization */ +RTBuilder *rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp); + +/* Calculates child partitions and returns number of efectively needed partitions */ +int rtbuild_get_largest_axis(RTBuilder *b); + +//Object partition +int rtbuild_mean_split(RTBuilder *b, int nchilds, int axis); +int rtbuild_mean_split_largest_axis(RTBuilder *b, int nchilds); + +int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds); + +//Space partition +int rtbuild_median_split(RTBuilder *b, float *separators, int nchilds, int axis); +int rtbuild_median_split_largest_axis(RTBuilder *b, int nchilds); + + +/* bb utils */ +float bb_area(const float min[3], const float max[3]); +float bb_volume(const float min[3], const float max[3]); +int bb_largest_axis(const float min[3], const float max[3]); +int bb_fits_inside(const float outer_min[3], const float outer_max[3], + const float inner_min[3], const float inner_max[3]); + +#ifdef __cplusplus +} +#endif + +#endif /* __RAYOBJECT_RTBUILD_H__ */ diff --git a/source/blender/render/intern/raytrace/rayobject_svbvh.cpp b/source/blender/render/intern/raytrace/rayobject_svbvh.cpp new file mode 100644 index 00000000000..fcd692fac02 --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_svbvh.cpp @@ -0,0 +1,192 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_svbvh.cpp + * \ingroup render + */ + + +#include "MEM_guardedalloc.h" + +#include "BLI_utildefines.h" + +#include "vbvh.h" +#include "svbvh.h" +#include "reorganize.h" + +#ifdef __SSE__ + +#define DFS_STACK_SIZE 256 + +struct SVBVHTree { + RayObject rayobj; + + SVBVHNode *root; + MemArena *node_arena; + + float cost; + RTBuilder *builder; +}; + +/* + * Cost to test N childs + */ +struct PackCost { + float operator()(int n) + { + return (n / 4) + ((n % 4) > 2 ? 1 : n % 4); + } +}; + + +template<> +void bvh_done<SVBVHTree>(SVBVHTree *obj) +{ + rtbuild_done(obj->builder, &obj->rayobj.control); + + //TODO find a away to exactly calculate the needed memory + MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "svbvh arena"); + BLI_memarena_use_malloc(arena1); + + MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "svbvh arena2"); + BLI_memarena_use_malloc(arena2); + BLI_memarena_use_align(arena2, 16); + + //Build and optimize the tree + if (0) { + VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder); + + if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) { + BLI_memarena_free(arena1); + BLI_memarena_free(arena2); + return; + } + + reorganize(root); + remove_useless(root, &root); + bvh_refit(root); + + pushup(root); + pushdown(root); + pushup_simd<VBVHNode, 4>(root); + + obj->root = Reorganize_SVBVH<VBVHNode>(arena2).transform(root); + } + else { + //Finds the optimal packing of this tree using a given cost model + //TODO this uses quite a lot of memory, find ways to reduce memory usage during building + OVBVHNode *root = BuildBinaryVBVH<OVBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder); + + if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) { + BLI_memarena_free(arena1); + BLI_memarena_free(arena2); + return; + } + + if (root) { + VBVH_optimalPackSIMD<OVBVHNode, PackCost>(PackCost()).transform(root); + obj->root = Reorganize_SVBVH<OVBVHNode>(arena2).transform(root); + } + else + obj->root = NULL; + } + + //Free data + BLI_memarena_free(arena1); + + obj->node_arena = arena2; + obj->cost = 1.0; + + rtbuild_free(obj->builder); + obj->builder = NULL; +} + +template<int StackSize> +static int intersect(SVBVHTree *obj, Isect *isec) +{ + //TODO renable hint support + if (RE_rayobject_isAligned(obj->root)) { + if (isec->mode == RE_RAY_SHADOW) + return svbvh_node_stack_raycast<StackSize, true>(obj->root, isec); + else + return svbvh_node_stack_raycast<StackSize, false>(obj->root, isec); + } + else + return RE_rayobject_intersect( (RayObject *) obj->root, isec); +} + +template<class Tree> +static void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *UNUSED(min), float *UNUSED(max)) +{ + //TODO renable hint support + { + hint->size = 0; + hint->stack[hint->size++] = (RayObject *)tree->root; + } +} +/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */ +template<class Tree, int STACK_SIZE> +static RayObjectAPI make_api() +{ + static RayObjectAPI api = + { + (RE_rayobject_raycast_callback) ((int (*)(Tree *, Isect *)) & intersect<STACK_SIZE>), + (RE_rayobject_add_callback) ((void (*)(Tree *, RayObject *)) & bvh_add<Tree>), + (RE_rayobject_done_callback) ((void (*)(Tree *)) & bvh_done<Tree>), + (RE_rayobject_free_callback) ((void (*)(Tree *)) & bvh_free<Tree>), + (RE_rayobject_merge_bb_callback)((void (*)(Tree *, float *, float *)) & bvh_bb<Tree>), + (RE_rayobject_cost_callback) ((float (*)(Tree *)) & bvh_cost<Tree>), + (RE_rayobject_hint_bb_callback) ((void (*)(Tree *, LCTSHint *, float *, float *)) & bvh_hint_bb<Tree>) + }; + + return api; +} + +template<class Tree> +static RayObjectAPI *bvh_get_api(int maxstacksize) +{ + static RayObjectAPI bvh_api256 = make_api<Tree, 1024>(); + + if (maxstacksize <= 1024) return &bvh_api256; + assert(maxstacksize <= 256); + return NULL; +} + +RayObject *RE_rayobject_svbvh_create(int size) +{ + return bvh_create_tree<SVBVHTree, DFS_STACK_SIZE>(size); +} + +#else + +RayObject *RE_rayobject_svbvh_create(int UNUSED(size)) +{ + puts("WARNING: SSE disabled at compile time\n"); + return NULL; +} + +#endif diff --git a/source/blender/render/intern/raytrace/rayobject_vbvh.cpp b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp new file mode 100644 index 00000000000..b63a11047dd --- /dev/null +++ b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp @@ -0,0 +1,206 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/rayobject_vbvh.cpp + * \ingroup render + */ + + +int tot_pushup = 0; +int tot_pushdown = 0; +int tot_hints = 0; + +#include <assert.h> + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_memarena.h" +#include "BLI_utildefines.h" + +#include "BKE_global.h" + +#include "rayintersection.h" +#include "rayobject.h" +#include "rayobject_rtbuild.h" + +#include "reorganize.h" +#include "bvh.h" +#include "vbvh.h" + +#include <queue> +#include <algorithm> + +#define DFS_STACK_SIZE 256 + +struct VBVHTree { + RayObject rayobj; + VBVHNode *root; + MemArena *node_arena; + float cost; + RTBuilder *builder; +}; + +/* + * Cost to test N childs + */ +struct PackCost { + float operator()(int n) + { + return n; + } +}; + +template<> +void bvh_done<VBVHTree>(VBVHTree *obj) +{ + rtbuild_done(obj->builder, &obj->rayobj.control); + + //TODO find a away to exactly calculate the needed memory + MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "vbvh arena"); + BLI_memarena_use_malloc(arena1); + + //Build and optimize the tree + if (1) { + VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder); + if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) { + BLI_memarena_free(arena1); + return; + } + + if (root) { + reorganize(root); + remove_useless(root, &root); + bvh_refit(root); + + pushup(root); + pushdown(root); + obj->root = root; + } + else + obj->root = NULL; + } + else { + /* TODO */ +#if 0 + MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "vbvh arena2"); + BLI_memarena_use_malloc(arena2); + + //Finds the optimal packing of this tree using a given cost model + //TODO this uses quite a lot of memory, find ways to reduce memory usage during building + OVBVHNode *root = BuildBinaryVBVH<OVBVHNode>(arena2).transform(obj->builder); + VBVH_optimalPackSIMD<OVBVHNode, PackCost>(PackCost()).transform(root); + obj->root = Reorganize_VBVH<OVBVHNode>(arena1).transform(root); + + BLI_memarena_free(arena2); +#endif + } + + //Cleanup + rtbuild_free(obj->builder); + obj->builder = NULL; + + obj->node_arena = arena1; + obj->cost = 1.0; +} + +template<int StackSize> +static int intersect(VBVHTree *obj, Isect *isec) +{ + //TODO renable hint support + if (RE_rayobject_isAligned(obj->root)) { + if (isec->mode == RE_RAY_SHADOW) + return bvh_node_stack_raycast<VBVHNode, StackSize, false, true>(obj->root, isec); + else + return bvh_node_stack_raycast<VBVHNode, StackSize, false, false>(obj->root, isec); + } + else + return RE_rayobject_intersect( (RayObject *) obj->root, isec); +} + +template<class Tree> +static void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *UNUSED(min), float *UNUSED(max)) +{ + //TODO renable hint support + { + hint->size = 0; + hint->stack[hint->size++] = (RayObject *)tree->root; + } +} + +#if 0 /* UNUSED */ +static void bfree(VBVHTree *tree) +{ + if (tot_pushup + tot_pushdown + tot_hints + tot_moves) { + if (G.debug & G_DEBUG) { + printf("tot pushups: %d\n", tot_pushup); + printf("tot pushdowns: %d\n", tot_pushdown); + printf("tot moves: %d\n", tot_moves); + printf("tot hints created: %d\n", tot_hints); + } + + tot_pushup = 0; + tot_pushdown = 0; + tot_hints = 0; + tot_moves = 0; + } + bvh_free(tree); +} +#endif + +/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */ +template<class Tree, int STACK_SIZE> +static RayObjectAPI make_api() +{ + static RayObjectAPI api = + { + (RE_rayobject_raycast_callback) ((int (*)(Tree *, Isect *)) & intersect<STACK_SIZE>), + (RE_rayobject_add_callback) ((void (*)(Tree *, RayObject *)) & bvh_add<Tree>), + (RE_rayobject_done_callback) ((void (*)(Tree *)) & bvh_done<Tree>), + (RE_rayobject_free_callback) ((void (*)(Tree *)) & bvh_free<Tree>), + (RE_rayobject_merge_bb_callback)((void (*)(Tree *, float *, float *)) & bvh_bb<Tree>), + (RE_rayobject_cost_callback) ((float (*)(Tree *)) & bvh_cost<Tree>), + (RE_rayobject_hint_bb_callback) ((void (*)(Tree *, LCTSHint *, float *, float *)) & bvh_hint_bb<Tree>) + }; + + return api; +} + +template<class Tree> +RayObjectAPI *bvh_get_api(int maxstacksize) +{ + static RayObjectAPI bvh_api256 = make_api<Tree, 1024>(); + + if (maxstacksize <= 1024) return &bvh_api256; + assert(maxstacksize <= 256); + return 0; +} + +RayObject *RE_rayobject_vbvh_create(int size) +{ + return bvh_create_tree<VBVHTree, DFS_STACK_SIZE>(size); +} diff --git a/source/blender/render/intern/raytrace/reorganize.h b/source/blender/render/intern/raytrace/reorganize.h new file mode 100644 index 00000000000..3fdd3363edb --- /dev/null +++ b/source/blender/render/intern/raytrace/reorganize.h @@ -0,0 +1,513 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/reorganize.h + * \ingroup render + */ + + +#include <float.h> +#include <math.h> +#include <stdio.h> + +#include <algorithm> +#include <queue> +#include <vector> + +#include "BKE_global.h" + +#ifdef _WIN32 +# ifdef INFINITY +# undef INFINITY +# endif +# define INFINITY FLT_MAX // in mingw math.h: (1.0F/0.0F). This generates compile error, though. +#endif + +extern int tot_pushup; +extern int tot_pushdown; + +#if !defined(INFINITY) && defined(HUGE_VAL) +#define INFINITY HUGE_VAL +#endif + +template<class Node> +static bool node_fits_inside(Node *a, Node *b) +{ + return bb_fits_inside(b->bb, b->bb + 3, a->bb, a->bb + 3); +} + +template<class Node> +static void reorganize_find_fittest_parent(Node *tree, Node *node, std::pair<float, Node *> &cost) +{ + std::queue<Node *> q; + q.push(tree); + + while (!q.empty()) { + Node *parent = q.front(); + q.pop(); + + if (parent == node) continue; + if (node_fits_inside(node, parent) && RE_rayobject_isAligned(parent->child) ) { + float pcost = bb_area(parent->bb, parent->bb + 3); + cost = std::min(cost, std::make_pair(pcost, parent) ); + for (Node *child = parent->child; child; child = child->sibling) + q.push(child); + } + } +} + +template<class Node> +static void reorganize(Node *root) +{ + std::queue<Node *> q; + + q.push(root); + while (!q.empty()) { + Node *node = q.front(); + q.pop(); + + if (RE_rayobject_isAligned(node->child)) { + for (Node **prev = &node->child; *prev; ) { + assert(RE_rayobject_isAligned(*prev)); + q.push(*prev); + + std::pair<float, Node *> best(FLT_MAX, root); + reorganize_find_fittest_parent(root, *prev, best); + + if (best.second == node) { + //Already inside the fitnest BB + prev = &(*prev)->sibling; + } + else { + Node *tmp = *prev; + *prev = (*prev)->sibling; + + tmp->sibling = best.second->child; + best.second->child = tmp; + } + + + } + } + if (node != root) { + } + } +} + +/* + * Prunes useless nodes from trees: + * erases nodes with total amount of primitives = 0 + * prunes nodes with only one child (except if that child is a primitive) + */ +template<class Node> +static void remove_useless(Node *node, Node **new_node) +{ + if (RE_rayobject_isAligned(node->child) ) { + + for (Node **prev = &node->child; *prev; ) { + Node *next = (*prev)->sibling; + remove_useless(*prev, prev); + if (*prev == NULL) + *prev = next; + else { + (*prev)->sibling = next; + prev = &((*prev)->sibling); + } + } + } + if (node->child) { + if (RE_rayobject_isAligned(node->child) && node->child->sibling == 0) + *new_node = node->child; + } + else if (node->child == NULL) { + *new_node = NULL; + } +} + +/* + * Minimizes expected number of BBtest by colapsing nodes + * it uses surface area heuristic for determining whether a node should be colapsed + */ +template<class Node> +static void pushup(Node *parent) +{ + if (is_leaf(parent)) return; + + float p_area = bb_area(parent->bb, parent->bb + 3); + Node **prev = &parent->child; + for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) { + const float c_area = bb_area(child->bb, child->bb + 3); + const int nchilds = count_childs(child); + float original_cost = ((p_area != 0.0f) ? (c_area / p_area) * nchilds : 1.0f) + 1; + float flatten_cost = nchilds; + if (flatten_cost < original_cost && nchilds >= 2) { + append_sibling(child, child->child); + child = child->sibling; + *prev = child; + +// *prev = child->child; +// append_sibling( *prev, child->sibling ); +// child = *prev; + tot_pushup++; + } + else { + *prev = child; + prev = &(*prev)->sibling; + child = *prev; + } + } + + for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling) + pushup(child); +} + +/* + * try to optimize number of childs to be a multiple of SSize + */ +template<class Node, int SSize> +static void pushup_simd(Node *parent) +{ + if (is_leaf(parent)) return; + + int n = count_childs(parent); + + Node **prev = &parent->child; + for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) { + int cn = count_childs(child); + if (cn - 1 <= (SSize - (n % SSize) ) % SSize && RE_rayobject_isAligned(child->child) ) { + n += (cn - 1); + append_sibling(child, child->child); + child = child->sibling; + *prev = child; + } + else { + *prev = child; + prev = &(*prev)->sibling; + child = *prev; + } + } + + for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling) + pushup_simd<Node, SSize>(child); +} + + +/* + * Pushdown + * makes sure no child fits inside any of its sibling + */ +template<class Node> +static void pushdown(Node *parent) +{ + Node **s_child = &parent->child; + Node *child = parent->child; + + while (child && RE_rayobject_isAligned(child)) { + Node *next = child->sibling; + Node **next_s_child = &child->sibling; + + //assert(bb_fits_inside(parent->bb, parent->bb+3, child->bb, child->bb+3)); + + for (Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling) + if (child != i && bb_fits_inside(i->bb, i->bb + 3, child->bb, child->bb + 3) && RE_rayobject_isAligned(i->child)) { +// todo optimize (should the one with the smallest area?) +// float ia = bb_area(i->bb, i->bb+3) +// if (child->i) + *s_child = child->sibling; + child->sibling = i->child; + i->child = child; + next_s_child = s_child; + + tot_pushdown++; + break; + } + child = next; + s_child = next_s_child; + } + + for (Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling) { + pushdown(i); + } +} + + +/* + * BVH refit + * readjust nodes BB (useful if nodes childs where modified) + */ +template<class Node> +static float bvh_refit(Node *node) +{ + if (is_leaf(node)) return 0; + if (is_leaf(node->child)) return 0; + + float total = 0; + + for (Node *child = node->child; child; child = child->sibling) + total += bvh_refit(child); + + float old_area = bb_area(node->bb, node->bb + 3); + INIT_MINMAX(node->bb, node->bb + 3); + for (Node *child = node->child; child; child = child->sibling) { + DO_MIN(child->bb, node->bb); + DO_MAX(child->bb + 3, node->bb + 3); + } + total += old_area - bb_area(node->bb, node->bb + 3); + return total; +} + + +/* + * this finds the best way to packing a tree according to a given test cost function + * with the purpose to reduce the expected cost (eg.: number of BB tests). + */ +#include <vector> +#define MAX_CUT_SIZE 4 /* svbvh assumes max 4 children! */ +#define MAX_OPTIMIZE_CHILDS MAX_CUT_SIZE + +#define CUT_SIZE_IS_VALID(cut_size) ((cut_size) < MAX_CUT_SIZE && (cut_size) >= 0) +#define CUT_SIZE_INVALID -1 + + +struct OVBVHNode { + float bb[6]; + + OVBVHNode *child; + OVBVHNode *sibling; + + /* + * Returns min cost to represent the subtree starting at the given node, + * allowing it to have a given cutsize + */ + float cut_cost[MAX_CUT_SIZE]; + float get_cost(int cutsize) + { + assert(CUT_SIZE_IS_VALID(cutsize - 1)); + return cut_cost[cutsize - 1]; + } + + /* + * This saves the cut size of this child, when parent is reaching + * its minimum cut with the given cut size + */ + int cut_size[MAX_CUT_SIZE]; + int get_cut_size(int parent_cut_size) + { + assert(CUT_SIZE_IS_VALID(parent_cut_size - 1)); + return cut_size[parent_cut_size - 1]; + } + + /* + * Reorganize the node based on calculated cut costs + */ + int best_cutsize; + void set_cut(int cutsize, OVBVHNode ***cut) + { + if (cutsize == 1) { + **cut = this; + *cut = &(**cut)->sibling; + } + else { + if (cutsize > MAX_CUT_SIZE) { + for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + child->set_cut(1, cut); + cutsize--; + } + assert(cutsize == 0); + } + else { + for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + child->set_cut(child->get_cut_size(cutsize), cut); + } + } + } + } + + void optimize() + { + if (RE_rayobject_isAligned(this->child)) { + //Calc new childs + if (this->best_cutsize != CUT_SIZE_INVALID) { + OVBVHNode **cut = &(this->child); + set_cut(this->best_cutsize, &cut); + *cut = NULL; + } + + //Optimize new childs + for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) + child->optimize(); + } + } +}; + +/* + * Calculates an optimal SIMD packing + * + */ +template<class Node, class TestCost> +struct VBVH_optimalPackSIMD { + TestCost testcost; + + VBVH_optimalPackSIMD(TestCost testcost) + { + this->testcost = testcost; + } + + /* + * calc best cut on a node + */ + struct calc_best { + Node *child[MAX_OPTIMIZE_CHILDS]; + float child_hit_prob[MAX_OPTIMIZE_CHILDS]; + + calc_best(Node *node) + { + int nchilds = 0; + //Fetch childs and needed data + { + float parent_area = bb_area(node->bb, node->bb + 3); + for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + this->child[nchilds] = child; + this->child_hit_prob[nchilds] = (parent_area != 0.0f) ? bb_area(child->bb, child->bb + 3) / parent_area : 1.0f; + nchilds++; + } + + assert(nchilds >= 2 && nchilds <= MAX_OPTIMIZE_CHILDS); + } + + + //Build DP table to find minimum cost to represent this node with a given cutsize + int bt[MAX_OPTIMIZE_CHILDS + 1][MAX_CUT_SIZE + 1]; //backtrace table + float cost[MAX_OPTIMIZE_CHILDS + 1][MAX_CUT_SIZE + 1]; //cost table (can be reduced to float[2][MAX_CUT_COST]) + + for (int i = 0; i <= nchilds; i++) { + for (int j = 0; j <= MAX_CUT_SIZE; j++) { + cost[i][j] = INFINITY; + } + } + + cost[0][0] = 0; + + for (int i = 1; i <= nchilds; i++) { + for (int size = i - 1; size /*+(nchilds-i)*/ <= MAX_CUT_SIZE; size++) { + for (int cut = 1; cut + size /*+(nchilds-i)*/ <= MAX_CUT_SIZE; cut++) { + float new_cost = cost[i - 1][size] + child_hit_prob[i - 1] * child[i - 1]->get_cost(cut); + if (new_cost < cost[i][size + cut]) { + cost[i][size + cut] = new_cost; + bt[i][size + cut] = cut; + } + } + } + } + + /* Save the ways to archive the minimum cost with a given cutsize */ + for (int i = nchilds; i <= MAX_CUT_SIZE; i++) { + node->cut_cost[i - 1] = cost[nchilds][i]; + if (cost[nchilds][i] < INFINITY) { + int current_size = i; + for (int j = nchilds; j > 0; j--) { + child[j - 1]->cut_size[i - 1] = bt[j][current_size]; + current_size -= bt[j][current_size]; + } + } + } + } + }; + + void calc_costs(Node *node) + { + + if (RE_rayobject_isAligned(node->child) ) { + int nchilds = 0; + for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + calc_costs(child); + nchilds++; + } + + for (int i = 0; i < MAX_CUT_SIZE; i++) + node->cut_cost[i] = INFINITY; + + //We are not allowed to look on nodes with with so many childs + if (nchilds > MAX_CUT_SIZE) { + float cost = 0; + + float parent_area = bb_area(node->bb, node->bb + 3); + for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) { + cost += ((parent_area != 0.0f) ? (bb_area(child->bb, child->bb + 3) / parent_area) : 1.0f) * child->get_cost(1); + } + + cost += testcost(nchilds); + node->cut_cost[0] = cost; + node->best_cutsize = nchilds; + } + else { + calc_best calc(node); + + //calc expected cost if we optimaly pack this node + for (int cutsize = nchilds; cutsize <= MAX_CUT_SIZE; cutsize++) { + float m = node->get_cost(cutsize) + testcost(cutsize); + if (m < node->cut_cost[0]) { + node->cut_cost[0] = m; + node->best_cutsize = cutsize; + } + } + } + + if (node->cut_cost[0] == INFINITY) { + node->best_cutsize = CUT_SIZE_INVALID; + } + } + else { + node->cut_cost[0] = 1.0f; + for (int i = 1; i < MAX_CUT_SIZE; i++) + node->cut_cost[i] = INFINITY; + + /* node->best_cutsize can remain unset here */ + } + } + + Node *transform(Node *node) + { + if (RE_rayobject_isAligned(node->child)) { +#ifdef DEBUG + static int num = 0; + bool first = false; + if (num == 0) { num++; first = true; } +#endif + + calc_costs(node); + +#ifdef DEBUG + if (first && G.debug) { + printf("expected cost = %f (%d)\n", node->cut_cost[0], node->best_cutsize); + } +#endif + node->optimize(); + } + return node; + } +}; diff --git a/source/blender/render/intern/raytrace/svbvh.h b/source/blender/render/intern/raytrace/svbvh.h new file mode 100644 index 00000000000..0a5690deb46 --- /dev/null +++ b/source/blender/render/intern/raytrace/svbvh.h @@ -0,0 +1,317 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/svbvh.h + * \ingroup render + */ + +#ifndef __SVBVH_H__ +#define __SVBVH_H__ + +#ifdef __SSE__ + +#include "bvh.h" +#include "BLI_memarena.h" +#include <algorithm> + +struct SVBVHNode { + float child_bb[24]; + SVBVHNode *child[4]; + int nchilds; +}; + +static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group) +{ + const __m128 tmin0 = _mm_setzero_ps(); + const __m128 tmax0 = _mm_set_ps1(isec->dist); + + const __m128 start0 = _mm_set_ps1(isec->start[0]); + const __m128 start1 = _mm_set_ps1(isec->start[1]); + const __m128 start2 = _mm_set_ps1(isec->start[2]); + const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0); + const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0); + const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1); + const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1); + const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2); + const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2); + const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]); + const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]); + const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]); + const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0); + const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0); + const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1); + const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1); + const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2); + const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2); + const __m128 tmin1 = _mm_max_ps(tmin0, mul0); + const __m128 tmax1 = _mm_min_ps(tmax0, mul1); + const __m128 tmin2 = _mm_max_ps(tmin1, mul2); + const __m128 tmax2 = _mm_min_ps(tmax1, mul3); + const __m128 tmin3 = _mm_max_ps(tmin2, mul4); + const __m128 tmax3 = _mm_min_ps(tmax2, mul5); + + return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3)); +} + +static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb) +{ + const float *bb = _bb; + + float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0]; + float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0]; + float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1]; + float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1]; + float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2]; + float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2]; + + RE_RC_COUNT(isec->raycounter->bb.test); + + if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0; + if (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) return 0; + if (t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0; + + RE_RC_COUNT(isec->raycounter->bb.hit); + + return 1; +} + +static bool svbvh_node_is_leaf(const SVBVHNode *node) +{ + return !RE_rayobject_isAligned(node); +} + +template<int MAX_STACK_SIZE, bool SHADOW> +static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec) +{ + SVBVHNode *stack[MAX_STACK_SIZE], *node; + int hit = 0, stack_pos = 0; + + stack[stack_pos++] = root; + + while (stack_pos) { + node = stack[--stack_pos]; + + if (!svbvh_node_is_leaf(node)) { + int nchilds = node->nchilds; + + if (nchilds == 4) { + float *child_bb = node->child_bb; + int res = svbvh_bb_intersect_test_simd4(isec, ((__m128 *) (child_bb))); + SVBVHNode **child = node->child; + + RE_RC_COUNT(isec->raycounter->simd_bb.test); + + if (res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + if (res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + if (res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + if (res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } + } + else { + float *child_bb = node->child_bb; + SVBVHNode **child = node->child; + int i; + + for (i = 0; i < nchilds; i++) { + if (svbvh_bb_intersect_test(isec, (float *)child_bb + 6 * i)) { + stack[stack_pos++] = child[i]; + } + } + } + } + else { + hit |= RE_rayobject_intersect((RayObject *)node, isec); + if (SHADOW && hit) break; + } + } + + return hit; +} + + +template<> +inline void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float min[3], float max[3]) +{ + if (is_leaf(node)) { + RE_rayobject_merge_bb((RayObject *)node, min, max); + } + else { + int i; + for (i = 0; i + 4 <= node->nchilds; i += 4) { + float *res = node->child_bb + 6 * i; + for (int j = 0; j < 3; j++) { + min[j] = min_ff(min[j], + min_ffff(res[4 * j + 0], + res[4 * j + 1], + res[4 * j + 2], + res[4 * j + 3])); + } + for (int j = 0; j < 3; j++) { + max[j] = max_ff(max[j], + max_ffff(res[4 * (j + 3) + 0], + res[4 * (j + 3) + 1], + res[4 * (j + 3) + 2], + res[4 * (j + 3) + 3])); + } + } + + for (; i < node->nchilds; i++) { + DO_MIN(node->child_bb + 6 * i, min); + DO_MAX(node->child_bb + 3 + 6 * i, max); + } + } +} + + + +/* + * Builds a SVBVH tree form a VBVHTree + */ +template<class OldNode> +struct Reorganize_SVBVH { + MemArena *arena; + + float childs_per_node; + int nodes_with_childs[16]; + int useless_bb; + int nodes; + + Reorganize_SVBVH(MemArena *a) + { + arena = a; + nodes = 0; + childs_per_node = 0; + useless_bb = 0; + + for (int i = 0; i < 16; i++) { + nodes_with_childs[i] = 0; + } + } + + ~Reorganize_SVBVH() + { +#if 0 + { + printf("%f childs per node\n", childs_per_node / nodes); + printf("%d childs BB are useless\n", useless_bb); + for (int i = 0; i < 16; i++) { + printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes, nodes_with_childs[i] / float(nodes)); + } + } +#endif + } + + SVBVHNode *create_node(int nchilds) + { + SVBVHNode *node = (SVBVHNode *)BLI_memarena_alloc(arena, sizeof(SVBVHNode)); + node->nchilds = nchilds; + + return node; + } + + void copy_bb(float bb[6], const float old_bb[6]) + { + std::copy(old_bb, old_bb + 6, bb); + } + + void prepare_for_simd(SVBVHNode *node) + { + int i = 0; + while (i + 4 <= node->nchilds) { + float vec_tmp[4 * 6]; + float *res = node->child_bb + 6 * i; + std::copy(res, res + 6 * 4, vec_tmp); + + for (int j = 0; j < 6; j++) { + res[4 * j + 0] = vec_tmp[6 * 0 + j]; + res[4 * j + 1] = vec_tmp[6 * 1 + j]; + res[4 * j + 2] = vec_tmp[6 * 2 + j]; + res[4 * j + 3] = vec_tmp[6 * 3 + j]; + } + + i += 4; + } + } + + /* amt must be power of two */ + inline int padup(int num, int amt) + { + return ((num + (amt - 1)) & ~(amt - 1)); + } + + SVBVHNode *transform(OldNode *old) + { + if (is_leaf(old)) + return (SVBVHNode *)old; + if (is_leaf(old->child)) + return (SVBVHNode *)old->child; + + int nchilds = count_childs(old); + int alloc_childs = nchilds; + if (nchilds % 4 > 2) + alloc_childs = padup(nchilds, 4); + + SVBVHNode *node = create_node(alloc_childs); + + childs_per_node += nchilds; + nodes++; + if (nchilds < 16) + nodes_with_childs[nchilds]++; + + useless_bb += alloc_childs - nchilds; + while (alloc_childs > nchilds) { + const static float def_bb[6] = {FLT_MAX, FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX}; + alloc_childs--; + node->child[alloc_childs] = NULL; + copy_bb(node->child_bb + alloc_childs * 6, def_bb); + } + + int i = nchilds; + for (OldNode *o_child = old->child; o_child; o_child = o_child->sibling) { + i--; + node->child[i] = transform(o_child); + if (is_leaf(o_child)) { + float bb[6]; + INIT_MINMAX(bb, bb + 3); + RE_rayobject_merge_bb((RayObject *)o_child, bb, bb + 3); + copy_bb(node->child_bb + i * 6, bb); + break; + } + else { + copy_bb(node->child_bb + i * 6, o_child->bb); + } + } + assert(i == 0); + + prepare_for_simd(node); + + return node; + } +}; + +#endif /* __SSE__ */ + +#endif /* __SVBVH_H__ */ diff --git a/source/blender/render/intern/raytrace/vbvh.h b/source/blender/render/intern/raytrace/vbvh.h new file mode 100644 index 00000000000..0b0bbd19116 --- /dev/null +++ b/source/blender/render/intern/raytrace/vbvh.h @@ -0,0 +1,238 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2009 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): André Pinto. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/raytrace/vbvh.h + * \ingroup render + */ + + +#include <assert.h> +#include <algorithm> + +#include "BLI_memarena.h" + +#include "rayobject_rtbuild.h" + +/* + * VBVHNode represents a BVHNode with support for a variable number of childrens + */ +struct VBVHNode { + float bb[6]; + + VBVHNode *child; + VBVHNode *sibling; +}; + + +/* + * Push nodes (used on dfs) + */ +template<class Node> +inline static void bvh_node_push_childs(Node *node, Isect *UNUSED(isec), Node **stack, int &stack_pos) +{ + Node *child = node->child; + + if (is_leaf(child)) { + stack[stack_pos++] = child; + } + else { + while (child) { + /* Skips BB tests on primitives */ +#if 0 + if (is_leaf(child->child)) { + stack[stack_pos++] = child->child; + } + else +#endif + { + stack[stack_pos++] = child; + } + + child = child->sibling; + } + } +} + + +template<class Node> +static int count_childs(Node *parent) +{ + int n = 0; + for (Node *i = parent->child; i; i = i->sibling) { + n++; + if (is_leaf(i)) + break; + } + + return n; +} + + +template<class Node> +static void append_sibling(Node *node, Node *sibling) +{ + while (node->sibling) + node = node->sibling; + + node->sibling = sibling; +} + + +/* + * Builds a binary VBVH from a rtbuild + */ +template<class Node> +struct BuildBinaryVBVH { + MemArena *arena; + RayObjectControl *control; + + void test_break() + { + if (RE_rayobjectcontrol_test_break(control)) + throw "Stop"; + } + + BuildBinaryVBVH(MemArena *a, RayObjectControl *c) + { + arena = a; + control = c; + } + + Node *create_node() + { + Node *node = (Node *)BLI_memarena_alloc(arena, sizeof(Node) ); + assert(RE_rayobject_isAligned(node)); + + node->sibling = NULL; + node->child = NULL; + + return node; + } + + int rtbuild_split(RTBuilder *builder) + { + return ::rtbuild_heuristic_object_split(builder, 2); + } + + Node *transform(RTBuilder *builder) + { + try + { + return _transform(builder); + + } catch (...) + { + } + return NULL; + } + + Node *_transform(RTBuilder *builder) + { + int size = rtbuild_size(builder); + + if (size == 0) { + return NULL; + } + else if (size == 1) { + Node *node = create_node(); + INIT_MINMAX(node->bb, node->bb + 3); + rtbuild_merge_bb(builder, node->bb, node->bb + 3); + node->child = (Node *) rtbuild_get_primitive(builder, 0); + return node; + } + else { + test_break(); + + Node *node = create_node(); + + Node **child = &node->child; + + int nc = rtbuild_split(builder); + INIT_MINMAX(node->bb, node->bb + 3); + + assert(nc == 2); + for (int i = 0; i < nc; i++) { + RTBuilder tmp; + rtbuild_get_child(builder, i, &tmp); + + *child = _transform(&tmp); + DO_MIN((*child)->bb, node->bb); + DO_MAX((*child)->bb + 3, node->bb + 3); + child = &((*child)->sibling); + } + + *child = NULL; + return node; + } + } +}; + +#if 0 +template<class Tree, class OldNode> +struct Reorganize_VBVH { + Tree *tree; + + Reorganize_VBVH(Tree *t) + { + tree = t; + } + + VBVHNode *create_node() + { + VBVHNode *node = (VBVHNode *)BLI_memarena_alloc(tree->node_arena, sizeof(VBVHNode)); + return node; + } + + void copy_bb(VBVHNode *node, OldNode *old) + { + std::copy(old->bb, old->bb + 6, node->bb); + } + + VBVHNode *transform(OldNode *old) + { + if (is_leaf(old)) + return (VBVHNode *)old; + + VBVHNode *node = create_node(); + VBVHNode **child_ptr = &node->child; + node->sibling = 0; + + copy_bb(node, old); + + for (OldNode *o_child = old->child; o_child; o_child = o_child->sibling) + { + VBVHNode *n_child = transform(o_child); + *child_ptr = n_child; + if (is_leaf(n_child)) return node; + child_ptr = &n_child->sibling; + } + *child_ptr = 0; + + return node; + } +}; +#endif diff --git a/source/blender/render/intern/source/bake.c b/source/blender/render/intern/source/bake.c new file mode 100644 index 00000000000..4a7962b1776 --- /dev/null +++ b/source/blender/render/intern/source/bake.c @@ -0,0 +1,1342 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Contributors: 2004/2005/2006 Blender Foundation, full recode + * Contributors: Vertex color baking, Copyright 2011 AutoCRC + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/bake.c + * \ingroup render + */ + + +/* system includes */ +#include <stdio.h> +#include <string.h> + +/* External modules: */ +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_rand.h" +#include "BLI_threads.h" +#include "BLI_utildefines.h" + +#include "DNA_image_types.h" +#include "DNA_material_types.h" +#include "DNA_mesh_types.h" +#include "DNA_meshdata_types.h" + +#include "BKE_customdata.h" +#include "BKE_global.h" +#include "BKE_image.h" +#include "BKE_main.h" +#include "BKE_node.h" +#include "BKE_scene.h" +#include "BKE_library.h" + +#include "IMB_imbuf_types.h" +#include "IMB_imbuf.h" +#include "IMB_colormanagement.h" + +/* local include */ +#include "rayintersection.h" +#include "rayobject.h" +#include "render_types.h" +#include "renderdatabase.h" +#include "shading.h" +#include "zbuf.h" + +#include "PIL_time.h" + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + + +/* ************************* bake ************************ */ + + +typedef struct BakeShade { + int thread; + + ShadeSample ssamp; + ObjectInstanceRen *obi; + VlakRen *vlr; + + ZSpan *zspan; + Image *ima; + ImBuf *ibuf; + + int rectx, recty, quad, type, vdone; + bool ready; + + float dir[3]; + Object *actob; + + /* Output: vertex color or image data. If vcol is not NULL, rect and + * rect_float should be NULL. */ + MPoly *mpoly; + MLoop *mloop; + MLoopCol *vcol; + + unsigned int *rect; + float *rect_float; + + /* displacement buffer used for normalization with unknown maximal distance */ + bool use_displacement_buffer; + float *displacement_buffer; + float displacement_min, displacement_max; + + bool use_mask; + char *rect_mask; /* bake pixel mask */ + + float dxco[3], dyco[3]; + + short *do_update; + + struct ColorSpace *rect_colorspace; +} BakeShade; + +static void bake_set_shade_input(ObjectInstanceRen *obi, VlakRen *vlr, ShadeInput *shi, int quad, int UNUSED(isect), int x, int y, float u, float v) +{ + if (quad) + shade_input_set_triangle_i(shi, obi, vlr, 0, 2, 3); + else + shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2); + + /* cache for shadow */ + shi->samplenr = R.shadowsamplenr[shi->thread]++; + + shi->mask = 0xFFFF; /* all samples */ + + shi->u = -u; + shi->v = -v; + shi->xs = x; + shi->ys = y; + + shade_input_set_uv(shi); + shade_input_set_normals(shi); + + /* no normal flip */ + if (shi->flippednor) + shade_input_flip_normals(shi); + + /* set up view vector to look right at the surface (note that the normal + * is negated in the renderer so it does not need to be done here) */ + shi->view[0] = shi->vn[0]; + shi->view[1] = shi->vn[1]; + shi->view[2] = shi->vn[2]; +} + +static void bake_shade(void *handle, Object *ob, ShadeInput *shi, int UNUSED(quad), int x, int y, float UNUSED(u), float UNUSED(v), float *tvn, float *ttang) +{ + BakeShade *bs = handle; + ShadeSample *ssamp = &bs->ssamp; + ShadeResult shr; + VlakRen *vlr = shi->vlr; + + shade_input_init_material(shi); + + if (bs->type == RE_BAKE_AO) { + ambient_occlusion(shi); + + if (R.r.bake_flag & R_BAKE_NORMALIZE) { + copy_v3_v3(shr.combined, shi->ao); + } + else { + zero_v3(shr.combined); + environment_lighting_apply(shi, &shr); + } + } + else { + if (bs->type == RE_BAKE_SHADOW) /* Why do shadows set the color anyhow?, ignore material color for baking */ + shi->r = shi->g = shi->b = 1.0f; + + shade_input_set_shade_texco(shi); + + /* only do AO for a full bake (and obviously AO bakes) + * AO for light bakes is a leftover and might not be needed */ + if (ELEM(bs->type, RE_BAKE_ALL, RE_BAKE_AO, RE_BAKE_LIGHT)) + shade_samples_do_AO(ssamp); + + if (shi->mat->nodetree && shi->mat->use_nodes) { + ntreeShaderExecTree(shi->mat->nodetree, shi, &shr); + shi->mat = vlr->mat; /* shi->mat is being set in nodetree */ + } + else + shade_material_loop(shi, &shr); + + if (bs->type == RE_BAKE_NORMALS) { + float nor[3]; + + copy_v3_v3(nor, shi->vn); + + if (R.r.bake_normal_space == R_BAKE_SPACE_CAMERA) { + /* pass */ + } + else if (R.r.bake_normal_space == R_BAKE_SPACE_TANGENT) { + float mat[3][3], imat[3][3]; + + /* bitangent */ + if (tvn && ttang) { + copy_v3_v3(mat[0], ttang); + cross_v3_v3v3(mat[1], tvn, ttang); + mul_v3_fl(mat[1], ttang[3]); + copy_v3_v3(mat[2], tvn); + } + else { + copy_v3_v3(mat[0], shi->nmaptang); + cross_v3_v3v3(mat[1], shi->nmapnorm, shi->nmaptang); + mul_v3_fl(mat[1], shi->nmaptang[3]); + copy_v3_v3(mat[2], shi->nmapnorm); + } + + invert_m3_m3(imat, mat); + mul_m3_v3(imat, nor); + } + else if (R.r.bake_normal_space == R_BAKE_SPACE_OBJECT) + mul_mat3_m4_v3(ob->imat_ren, nor); /* ob->imat_ren includes viewinv! */ + else if (R.r.bake_normal_space == R_BAKE_SPACE_WORLD) + mul_mat3_m4_v3(R.viewinv, nor); + + normalize_v3(nor); /* in case object has scaling */ + + /* The invert of the red channel is to make + * the normal map compliant with the outside world. + * It needs to be done because in Blender + * the normal used in the renderer points inward. It is generated + * this way in calc_vertexnormals(). Should this ever change + * this negate must be removed. + * + * there is also a small 1e-5f bias for precision issues. otherwise + * we randomly get 127 or 128 for neutral colors. we choose 128 + * because it is the convention flat color. * */ + shr.combined[0] = (-nor[0]) / 2.0f + 0.5f + 1e-5f; + shr.combined[1] = nor[1] / 2.0f + 0.5f + 1e-5f; + shr.combined[2] = nor[2] / 2.0f + 0.5f + 1e-5f; + } + else if (bs->type == RE_BAKE_TEXTURE) { + copy_v3_v3(shr.combined, &shi->r); + shr.alpha = shi->alpha; + } + else if (bs->type == RE_BAKE_SHADOW) { + copy_v3_v3(shr.combined, shr.shad); + shr.alpha = shi->alpha; + } + else if (bs->type == RE_BAKE_SPEC_COLOR) { + copy_v3_v3(shr.combined, &shi->specr); + shr.alpha = 1.0f; + } + else if (bs->type == RE_BAKE_SPEC_INTENSITY) { + copy_v3_fl(shr.combined, shi->spec); + shr.alpha = 1.0f; + } + else if (bs->type == RE_BAKE_MIRROR_COLOR) { + copy_v3_v3(shr.combined, &shi->mirr); + shr.alpha = 1.0f; + } + else if (bs->type == RE_BAKE_MIRROR_INTENSITY) { + copy_v3_fl(shr.combined, shi->ray_mirror); + shr.alpha = 1.0f; + } + else if (bs->type == RE_BAKE_ALPHA) { + copy_v3_fl(shr.combined, shi->alpha); + shr.alpha = 1.0f; + } + else if (bs->type == RE_BAKE_EMIT) { + copy_v3_fl(shr.combined, shi->emit); + shr.alpha = 1.0f; + } + else if (bs->type == RE_BAKE_VERTEX_COLORS) { + copy_v3_v3(shr.combined, shi->vcol); + shr.alpha = shi->vcol[3]; + } + } + + if (bs->rect_float && !bs->vcol) { + float *col = bs->rect_float + 4 * (bs->rectx * y + x); + copy_v3_v3(col, shr.combined); + if (bs->type == RE_BAKE_ALL || bs->type == RE_BAKE_TEXTURE || bs->type == RE_BAKE_VERTEX_COLORS) { + col[3] = shr.alpha; + } + else { + col[3] = 1.0; + } + } + else { + /* Target is char (LDR). */ + unsigned char col[4]; + + if (ELEM(bs->type, RE_BAKE_ALL, RE_BAKE_TEXTURE)) { + float rgb[3]; + + copy_v3_v3(rgb, shr.combined); + if (R.scene_color_manage) { + /* Vertex colors have no way to specify color space, so they + * default to sRGB. */ + if (!bs->vcol) + IMB_colormanagement_scene_linear_to_colorspace_v3(rgb, bs->rect_colorspace); + else + linearrgb_to_srgb_v3_v3(rgb, rgb); + } + rgb_float_to_uchar(col, rgb); + } + else { + rgb_float_to_uchar(col, shr.combined); + } + + if (ELEM(bs->type, RE_BAKE_ALL, RE_BAKE_TEXTURE, RE_BAKE_VERTEX_COLORS)) { + col[3] = unit_float_to_uchar_clamp(shr.alpha); + } + else { + col[3] = 255; + } + + if (bs->vcol) { + /* Vertex color baking. Vcol has no useful alpha channel (it exists + * but is used only for vertex painting). */ + bs->vcol->r = col[0]; + bs->vcol->g = col[1]; + bs->vcol->b = col[2]; + } + else { + unsigned char *imcol = (unsigned char *)(bs->rect + bs->rectx * y + x); + copy_v4_v4_uchar(imcol, col); + } + + } + + if (bs->rect_mask) { + bs->rect_mask[bs->rectx * y + x] = FILTER_MASK_USED; + } + + if (bs->do_update) { + *bs->do_update = true; + } +} + +static void bake_displacement(void *handle, ShadeInput *UNUSED(shi), float dist, int x, int y) +{ + BakeShade *bs = handle; + float disp; + + if (R.r.bake_flag & R_BAKE_NORMALIZE) { + if (R.r.bake_maxdist) + disp = (dist + R.r.bake_maxdist) / (R.r.bake_maxdist * 2); /* alter the range from [-bake_maxdist, bake_maxdist] to [0, 1]*/ + else + disp = dist; + } + else { + disp = 0.5f + dist; /* alter the range from [-0.5,0.5] to [0,1]*/ + } + + if (bs->displacement_buffer) { + float *displacement = bs->displacement_buffer + (bs->rectx * y + x); + *displacement = disp; + bs->displacement_min = min_ff(bs->displacement_min, disp); + bs->displacement_max = max_ff(bs->displacement_max, disp); + } + + if (bs->rect_float && !bs->vcol) { + float *col = bs->rect_float + 4 * (bs->rectx * y + x); + col[0] = col[1] = col[2] = disp; + col[3] = 1.0f; + } + else { + /* Target is char (LDR). */ + unsigned char col[4]; + col[0] = col[1] = col[2] = unit_float_to_uchar_clamp(disp); + col[3] = 255; + + if (bs->vcol) { + /* Vertex color baking. Vcol has no useful alpha channel (it exists + * but is used only for vertex painting). */ + bs->vcol->r = col[0]; + bs->vcol->g = col[1]; + bs->vcol->b = col[2]; + } + else { + unsigned char *imcol = (unsigned char *)(bs->rect + bs->rectx * y + x); + copy_v4_v4_uchar(imcol, col); + } + } + if (bs->rect_mask) { + bs->rect_mask[bs->rectx * y + x] = FILTER_MASK_USED; + } +} + +static int bake_intersect_tree(RayObject *raytree, Isect *isect, float *start, float *dir, float sign, float *hitco, float *dist) +{ + float maxdist; + int hit; + + /* might be useful to make a user setting for maxsize*/ + if (R.r.bake_maxdist > 0.0f) + maxdist = R.r.bake_maxdist; + else + maxdist = RE_RAYTRACE_MAXDIST + R.r.bake_biasdist; + + /* 'dir' is always normalized */ + madd_v3_v3v3fl(isect->start, start, dir, -R.r.bake_biasdist); + + mul_v3_v3fl(isect->dir, dir, sign); + + isect->dist = maxdist; + + hit = RE_rayobject_raycast(raytree, isect); + if (hit) { + madd_v3_v3v3fl(hitco, isect->start, isect->dir, isect->dist); + + *dist = isect->dist; + } + + return hit; +} + +static void bake_set_vlr_dxyco(BakeShade *bs, float *uv1, float *uv2, float *uv3) +{ + VlakRen *vlr = bs->vlr; + float A, d1, d2, d3, *v1, *v2, *v3; + + if (bs->quad) { + v1 = vlr->v1->co; + v2 = vlr->v3->co; + v3 = vlr->v4->co; + } + else { + v1 = vlr->v1->co; + v2 = vlr->v2->co; + v3 = vlr->v3->co; + } + + /* formula derived from barycentric coordinates: + * (uvArea1*v1 + uvArea2*v2 + uvArea3*v3)/uvArea + * then taking u and v partial derivatives to get dxco and dyco */ + A = (uv2[0] - uv1[0]) * (uv3[1] - uv1[1]) - (uv3[0] - uv1[0]) * (uv2[1] - uv1[1]); + + if (fabsf(A) > FLT_EPSILON) { + A = 0.5f / A; + + d1 = uv2[1] - uv3[1]; + d2 = uv3[1] - uv1[1]; + d3 = uv1[1] - uv2[1]; + bs->dxco[0] = (v1[0] * d1 + v2[0] * d2 + v3[0] * d3) * A; + bs->dxco[1] = (v1[1] * d1 + v2[1] * d2 + v3[1] * d3) * A; + bs->dxco[2] = (v1[2] * d1 + v2[2] * d2 + v3[2] * d3) * A; + + d1 = uv3[0] - uv2[0]; + d2 = uv1[0] - uv3[0]; + d3 = uv2[0] - uv1[0]; + bs->dyco[0] = (v1[0] * d1 + v2[0] * d2 + v3[0] * d3) * A; + bs->dyco[1] = (v1[1] * d1 + v2[1] * d2 + v3[1] * d3) * A; + bs->dyco[2] = (v1[2] * d1 + v2[2] * d2 + v3[2] * d3) * A; + } + else { + bs->dxco[0] = bs->dxco[1] = bs->dxco[2] = 0.0f; + bs->dyco[0] = bs->dyco[1] = bs->dyco[2] = 0.0f; + } + + if (bs->obi->flag & R_TRANSFORMED) { + mul_m3_v3(bs->obi->nmat, bs->dxco); + mul_m3_v3(bs->obi->nmat, bs->dyco); + } +} + +static void do_bake_shade(void *handle, int x, int y, float u, float v) +{ + BakeShade *bs = handle; + VlakRen *vlr = bs->vlr; + ObjectInstanceRen *obi = bs->obi; + Object *ob = obi->obr->ob; + float l, *v1, *v2, *v3, tvn[3], ttang[4]; + int quad; + ShadeSample *ssamp = &bs->ssamp; + ShadeInput *shi = ssamp->shi; + + /* fast threadsafe break test */ + if (R.test_break(R.tbh)) + return; + + /* setup render coordinates */ + if (bs->quad) { + v1 = vlr->v1->co; + v2 = vlr->v3->co; + v3 = vlr->v4->co; + } + else { + v1 = vlr->v1->co; + v2 = vlr->v2->co; + v3 = vlr->v3->co; + } + + l = 1.0f - u - v; + + /* shrink barycentric coordinates inwards slightly to avoid some issues + * where baking selected to active might just miss the other face at the + * near the edge of a face */ + if (bs->actob) { + const float eps = 1.0f - 1e-4f; + float invsum; + + u = (u - 0.5f) * eps + 0.5f; + v = (v - 0.5f) * eps + 0.5f; + l = (l - 0.5f) * eps + 0.5f; + + invsum = 1.0f / (u + v + l); + + u *= invsum; + v *= invsum; + l *= invsum; + } + + /* renderco */ + shi->co[0] = l * v3[0] + u * v1[0] + v * v2[0]; + shi->co[1] = l * v3[1] + u * v1[1] + v * v2[1]; + shi->co[2] = l * v3[2] + u * v1[2] + v * v2[2]; + + /* avoid self shadow with vertex bake from adjacent faces [#33729] */ + if ((bs->vcol != NULL) && (bs->actob == NULL)) { + madd_v3_v3fl(shi->co, vlr->n, 0.0001f); + } + + if (obi->flag & R_TRANSFORMED) + mul_m4_v3(obi->mat, shi->co); + + copy_v3_v3(shi->dxco, bs->dxco); + copy_v3_v3(shi->dyco, bs->dyco); + + quad = bs->quad; + bake_set_shade_input(obi, vlr, shi, quad, 0, x, y, u, v); + + if (bs->type == RE_BAKE_NORMALS && R.r.bake_normal_space == R_BAKE_SPACE_TANGENT) { + shade_input_set_shade_texco(shi); + copy_v3_v3(tvn, shi->nmapnorm); + copy_v4_v4(ttang, shi->nmaptang); + } + + /* if we are doing selected to active baking, find point on other face */ + if (bs->actob) { + Isect isec, minisec; + float co[3], minco[3], dist, mindist = 0.0f; + int hit, sign, dir = 1; + + /* intersect with ray going forward and backward*/ + hit = 0; + memset(&minisec, 0, sizeof(minisec)); + minco[0] = minco[1] = minco[2] = 0.0f; + + copy_v3_v3(bs->dir, shi->vn); + + for (sign = -1; sign <= 1; sign += 2) { + memset(&isec, 0, sizeof(isec)); + isec.mode = RE_RAY_MIRROR; + + isec.orig.ob = obi; + isec.orig.face = vlr; + isec.userdata = bs->actob; + isec.check = RE_CHECK_VLR_BAKE; + isec.skip = RE_SKIP_VLR_NEIGHBOUR; + + if (bake_intersect_tree(R.raytree, &isec, shi->co, shi->vn, sign, co, &dist)) { + if (!hit || len_squared_v3v3(shi->co, co) < len_squared_v3v3(shi->co, minco)) { + minisec = isec; + mindist = dist; + copy_v3_v3(minco, co); + hit = 1; + dir = sign; + } + } + } + + if (ELEM(bs->type, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE)) { + if (hit) + bake_displacement(handle, shi, (dir == -1) ? mindist : -mindist, x, y); + else + bake_displacement(handle, shi, 0.0f, x, y); + return; + } + + /* if hit, we shade from the new point, otherwise from point one starting face */ + if (hit) { + obi = (ObjectInstanceRen *)minisec.hit.ob; + vlr = (VlakRen *)minisec.hit.face; + quad = (minisec.isect == 2); + copy_v3_v3(shi->co, minco); + + u = -minisec.u; + v = -minisec.v; + bake_set_shade_input(obi, vlr, shi, quad, 1, x, y, u, v); + } + } + + if (bs->type == RE_BAKE_NORMALS && R.r.bake_normal_space == R_BAKE_SPACE_TANGENT) + bake_shade(handle, ob, shi, quad, x, y, u, v, tvn, ttang); + else + bake_shade(handle, ob, shi, quad, x, y, u, v, NULL, NULL); +} + +static int get_next_bake_face(BakeShade *bs) +{ + ObjectRen *obr; + VlakRen *vlr; + MTFace *tface; + static int v = 0, vdone = false; + static ObjectInstanceRen *obi = NULL; + + if (bs == NULL) { + vlr = NULL; + v = vdone = false; + obi = R.instancetable.first; + return 0; + } + + BLI_thread_lock(LOCK_CUSTOM1); + + for (; obi; obi = obi->next, v = 0) { + obr = obi->obr; + + /* only allow non instances here */ + if (obr->flag & R_INSTANCEABLE) + continue; + + for (; v < obr->totvlak; v++) { + vlr = RE_findOrAddVlak(obr, v); + + if ((bs->actob && bs->actob == obr->ob) || (!bs->actob && (obr->ob->flag & SELECT))) { + if (R.r.bake_flag & R_BAKE_VCOL) { + /* Gather face data for vertex color bake */ + Mesh *me; + int *origindex, vcollayer; + CustomDataLayer *cdl; + + if (obr->ob->type != OB_MESH) + continue; + me = obr->ob->data; + + origindex = RE_vlakren_get_origindex(obr, vlr, 0); + if (origindex == NULL) + continue; + if (*origindex >= me->totpoly) { + /* Small hack for Array modifier, which gives false + * original indices - z0r */ + continue; + } +#if 0 + /* Only shade selected faces. */ + if ((me->mface[*origindex].flag & ME_FACE_SEL) == 0) + continue; +#endif + + vcollayer = CustomData_get_render_layer_index(&me->ldata, CD_MLOOPCOL); + if (vcollayer == -1) + continue; + + cdl = &me->ldata.layers[vcollayer]; + bs->mpoly = me->mpoly + *origindex; + bs->vcol = ((MLoopCol *)cdl->data) + bs->mpoly->loopstart; + bs->mloop = me->mloop + bs->mpoly->loopstart; + + /* Tag mesh for reevaluation. */ + me->id.tag |= LIB_TAG_DOIT; + } + else { + Image *ima = NULL; + ImBuf *ibuf = NULL; + const float vec_alpha[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + const float vec_solid[4] = {0.0f, 0.0f, 0.0f, 1.0f}; + const float nor_alpha[4] = {0.5f, 0.5f, 1.0f, 0.0f}; + const float nor_solid[4] = {0.5f, 0.5f, 1.0f, 1.0f}; + const float disp_alpha[4] = {0.5f, 0.5f, 0.5f, 0.0f}; + const float disp_solid[4] = {0.5f, 0.5f, 0.5f, 1.0f}; + + tface = RE_vlakren_get_tface(obr, vlr, obr->bakemtface, NULL, 0); + + if (!tface || !tface->tpage) + continue; + + ima = tface->tpage; + ibuf = BKE_image_acquire_ibuf(ima, NULL, NULL); + + if (ibuf == NULL) + continue; + + if (ibuf->rect == NULL && ibuf->rect_float == NULL) { + BKE_image_release_ibuf(ima, ibuf, NULL); + continue; + } + + if (ibuf->rect_float && !(ibuf->channels == 0 || ibuf->channels == 4)) { + BKE_image_release_ibuf(ima, ibuf, NULL); + continue; + } + + if (ima->flag & IMA_USED_FOR_RENDER) { + ima->id.tag &= ~LIB_TAG_DOIT; + BKE_image_release_ibuf(ima, ibuf, NULL); + continue; + } + + /* find the image for the first time? */ + if (ima->id.tag & LIB_TAG_DOIT) { + ima->id.tag &= ~LIB_TAG_DOIT; + + /* we either fill in float or char, this ensures things go fine */ + if (ibuf->rect_float) + imb_freerectImBuf(ibuf); + /* clear image */ + if (R.r.bake_flag & R_BAKE_CLEAR) { + if (R.r.bake_mode == RE_BAKE_NORMALS && R.r.bake_normal_space == R_BAKE_SPACE_TANGENT) + IMB_rectfill(ibuf, (ibuf->planes == R_IMF_PLANES_RGBA) ? nor_alpha : nor_solid); + else if (ELEM(R.r.bake_mode, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE)) + IMB_rectfill(ibuf, (ibuf->planes == R_IMF_PLANES_RGBA) ? disp_alpha : disp_solid); + else + IMB_rectfill(ibuf, (ibuf->planes == R_IMF_PLANES_RGBA) ? vec_alpha : vec_solid); + } + /* might be read by UI to set active image for display */ + R.bakebuf = ima; + } + + /* Tag image for redraw. */ + ibuf->userflags |= IB_DISPLAY_BUFFER_INVALID; + BKE_image_release_ibuf(ima, ibuf, NULL); + } + + bs->obi = obi; + bs->vlr = vlr; + bs->vdone++; /* only for error message if nothing was rendered */ + v++; + BLI_thread_unlock(LOCK_CUSTOM1); + return 1; + } + } + } + + BLI_thread_unlock(LOCK_CUSTOM1); + return 0; +} + +static void bake_single_vertex(BakeShade *bs, VertRen *vert, float u, float v) +{ + int *origindex, i; + MLoopCol *basevcol; + MLoop *mloop; + + /* per vertex fixed seed */ + BLI_thread_srandom(bs->thread, vert->index); + + origindex = RE_vertren_get_origindex(bs->obi->obr, vert, 0); + if (!origindex || *origindex == ORIGINDEX_NONE) + return; + + /* Search for matching vertex index and apply shading. */ + for (i = 0; i < bs->mpoly->totloop; i++) { + mloop = bs->mloop + i; + if (mloop->v != *origindex) + continue; + basevcol = bs->vcol; + bs->vcol = basevcol + i; + do_bake_shade(bs, 0, 0, u, v); + bs->vcol = basevcol; + break; + } +} + +/* Bake all vertices of a face. Actually, this still works on a face-by-face + * basis, and each vertex on each face is shaded. Vertex colors are a property + * of loops, not vertices. */ +static void shade_verts(BakeShade *bs) +{ + VlakRen *vlr = bs->vlr; + + /* Disable baking to image; write to vcol instead. vcol pointer is set in + * bake_single_vertex. */ + bs->ima = NULL; + bs->rect = NULL; + bs->rect_float = NULL; + bs->displacement_buffer = NULL; + bs->displacement_min = FLT_MAX; + bs->displacement_max = -FLT_MAX; + + bs->quad = 0; + + /* No anti-aliasing for vertices. */ + zero_v3(bs->dxco); + zero_v3(bs->dyco); + + /* Shade each vertex of the face. u and v are barycentric coordinates; since + * we're only interested in vertices, these will be 0 or 1. */ + if ((vlr->flag & R_FACE_SPLIT) == 0) { + /* Processing triangle face, whole quad, or first half of split quad. */ + + bake_single_vertex(bs, bs->vlr->v1, 1.0f, 0.0f); + bake_single_vertex(bs, bs->vlr->v2, 0.0f, 1.0f); + bake_single_vertex(bs, bs->vlr->v3, 0.0f, 0.0f); + + if (vlr->v4) { + bs->quad = 1; + bake_single_vertex(bs, bs->vlr->v4, 0.0f, 0.0f); + } + } + else { + /* Processing second half of split quad. Only one vertex to go. */ + if (vlr->flag & R_DIVIDE_24) { + bake_single_vertex(bs, bs->vlr->v2, 0.0f, 1.0f); + } + else { + bake_single_vertex(bs, bs->vlr->v3, 0.0f, 0.0f); + } + } +} + +/* already have tested for tface and ima and zspan */ +static void shade_tface(BakeShade *bs) +{ + VlakRen *vlr = bs->vlr; + ObjectInstanceRen *obi = bs->obi; + ObjectRen *obr = obi->obr; + MTFace *tface = RE_vlakren_get_tface(obr, vlr, obr->bakemtface, NULL, 0); + Image *ima = tface->tpage; + float vec[4][2]; + int a, i1, i2, i3; + + /* per face fixed seed */ + BLI_thread_srandom(bs->thread, vlr->index); + + /* check valid zspan */ + if (ima != bs->ima) { + BKE_image_release_ibuf(bs->ima, bs->ibuf, NULL); + + bs->ima = ima; + bs->ibuf = BKE_image_acquire_ibuf(ima, NULL, NULL); + /* note, these calls only free/fill contents of zspan struct, not zspan itself */ + zbuf_free_span(bs->zspan); + zbuf_alloc_span(bs->zspan, bs->ibuf->x, bs->ibuf->y, R.clipcrop); + } + + bs->rectx = bs->ibuf->x; + bs->recty = bs->ibuf->y; + bs->rect = bs->ibuf->rect; + bs->rect_colorspace = bs->ibuf->rect_colorspace; + bs->rect_float = bs->ibuf->rect_float; + bs->vcol = NULL; + bs->quad = 0; + bs->rect_mask = NULL; + bs->displacement_buffer = NULL; + + if (bs->use_mask || bs->use_displacement_buffer) { + BakeImBufuserData *userdata = bs->ibuf->userdata; + if (userdata == NULL) { + BLI_thread_lock(LOCK_CUSTOM1); + userdata = bs->ibuf->userdata; + if (userdata == NULL) /* since the thread was locked, its possible another thread alloced the value */ + userdata = MEM_callocN(sizeof(BakeImBufuserData), "BakeImBufuserData"); + + if (bs->use_mask) { + if (userdata->mask_buffer == NULL) { + userdata->mask_buffer = MEM_callocN(sizeof(char) * bs->rectx * bs->recty, "BakeMask"); + } + } + + if (bs->use_displacement_buffer) { + if (userdata->displacement_buffer == NULL) { + userdata->displacement_buffer = MEM_callocN(sizeof(float) * bs->rectx * bs->recty, "BakeDisp"); + } + } + + bs->ibuf->userdata = userdata; + + BLI_thread_unlock(LOCK_CUSTOM1); + } + + bs->rect_mask = userdata->mask_buffer; + bs->displacement_buffer = userdata->displacement_buffer; + } + + /* get pixel level vertex coordinates */ + for (a = 0; a < 4; a++) { + /* Note, workaround for pixel aligned UVs which are common and can screw up our intersection tests + * where a pixel gets in between 2 faces or the middle of a quad, + * camera aligned quads also have this problem but they are less common. + * Add a small offset to the UVs, fixes bug #18685 - Campbell */ + vec[a][0] = tface->uv[a][0] * (float)bs->rectx - (0.5f + 0.001f); + vec[a][1] = tface->uv[a][1] * (float)bs->recty - (0.5f + 0.002f); + } + + /* UV indices have to be corrected for possible quad->tria splits */ + i1 = 0; i2 = 1; i3 = 2; + vlr_set_uv_indices(vlr, &i1, &i2, &i3); + bake_set_vlr_dxyco(bs, vec[i1], vec[i2], vec[i3]); + zspan_scanconvert(bs->zspan, bs, vec[i1], vec[i2], vec[i3], do_bake_shade); + + if (vlr->v4) { + bs->quad = 1; + bake_set_vlr_dxyco(bs, vec[0], vec[2], vec[3]); + zspan_scanconvert(bs->zspan, bs, vec[0], vec[2], vec[3], do_bake_shade); + } +} + +static void *do_bake_thread(void *bs_v) +{ + BakeShade *bs = bs_v; + + while (get_next_bake_face(bs)) { + if (R.r.bake_flag & R_BAKE_VCOL) { + shade_verts(bs); + } + else { + shade_tface(bs); + } + + /* fast threadsafe break test */ + if (R.test_break(R.tbh)) + break; + + /* access is not threadsafe but since its just true/false probably ok + * only used for interactive baking */ + if (bs->do_update) { + *bs->do_update = true; + } + } + bs->ready = true; + + BKE_image_release_ibuf(bs->ima, bs->ibuf, NULL); + + return NULL; +} + +void RE_bake_ibuf_filter(ImBuf *ibuf, char *mask, const int filter) +{ + /* must check before filtering */ + const bool is_new_alpha = (ibuf->planes != R_IMF_PLANES_RGBA) && BKE_imbuf_alpha_test(ibuf); + + /* Margin */ + if (filter) { + IMB_filter_extend(ibuf, mask, filter); + } + + /* if the bake results in new alpha then change the image setting */ + if (is_new_alpha) { + ibuf->planes = R_IMF_PLANES_RGBA; + } + else { + if (filter && ibuf->planes != R_IMF_PLANES_RGBA) { + /* clear alpha added by filtering */ + IMB_rectfill_alpha(ibuf, 1.0f); + } + } +} + +void RE_bake_ibuf_normalize_displacement(ImBuf *ibuf, float *displacement, char *mask, float displacement_min, float displacement_max) +{ + int i; + const float *current_displacement = displacement; + const char *current_mask = mask; + float max_distance; + + max_distance = max_ff(fabsf(displacement_min), fabsf(displacement_max)); + + for (i = 0; i < ibuf->x * ibuf->y; i++) { + if (*current_mask == FILTER_MASK_USED) { + float normalized_displacement; + + if (max_distance > 1e-5f) + normalized_displacement = (*current_displacement + max_distance) / (max_distance * 2); + else + normalized_displacement = 0.5f; + + if (ibuf->rect_float) { + /* currently baking happens to RGBA only */ + float *fp = ibuf->rect_float + i * 4; + fp[0] = fp[1] = fp[2] = normalized_displacement; + fp[3] = 1.0f; + } + + if (ibuf->rect) { + unsigned char *cp = (unsigned char *) (ibuf->rect + i); + cp[0] = cp[1] = cp[2] = unit_float_to_uchar_clamp(normalized_displacement); + cp[3] = 255; + } + } + + current_displacement++; + current_mask++; + } +} + +/* using object selection tags, the faces with UV maps get baked */ +/* render should have been setup */ +/* returns 0 if nothing was handled */ +int RE_bake_shade_all_selected(Render *re, int type, Object *actob, short *do_update, float *progress) +{ + BakeShade *handles; + ListBase threads; + Image *ima; + int a, vdone = false, result = BAKE_RESULT_OK; + bool use_mask = false; + bool use_displacement_buffer = false; + bool do_manage = false; + + if (ELEM(type, RE_BAKE_ALL, RE_BAKE_TEXTURE)) { + do_manage = BKE_scene_check_color_management_enabled(re->scene); + } + + re->scene_color_manage = BKE_scene_check_color_management_enabled(re->scene); + + /* initialize render global */ + R = *re; + R.bakebuf = NULL; + + /* initialize static vars */ + get_next_bake_face(NULL); + + /* do we need a mask? */ + if (re->r.bake_filter) + use_mask = true; + + /* do we need buffer to store displacements */ + if (ELEM(type, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE)) { + if (((R.r.bake_flag & R_BAKE_NORMALIZE) && R.r.bake_maxdist == 0.0f) || + (type == RE_BAKE_DERIVATIVE)) + { + use_displacement_buffer = true; + use_mask = true; + } + } + + /* baker uses this flag to detect if image was initialized */ + if ((R.r.bake_flag & R_BAKE_VCOL) == 0) { + for (ima = G.main->image.first; ima; ima = ima->id.next) { + ImBuf *ibuf = BKE_image_acquire_ibuf(ima, NULL, NULL); + ima->id.tag |= LIB_TAG_DOIT; + ima->flag &= ~IMA_USED_FOR_RENDER; + if (ibuf) { + ibuf->userdata = NULL; /* use for masking if needed */ + } + BKE_image_release_ibuf(ima, ibuf, NULL); + } + } + + if (R.r.bake_flag & R_BAKE_VCOL) { + /* untag all meshes */ + BKE_main_id_tag_listbase(&G.main->mesh, LIB_TAG_DOIT, false); + } + + BLI_threadpool_init(&threads, do_bake_thread, re->r.threads); + + handles = MEM_callocN(sizeof(BakeShade) * re->r.threads, "BakeShade"); + + /* get the threads running */ + for (a = 0; a < re->r.threads; a++) { + handles[a].thread = a; + + /* set defaults in handles */ + handles[a].ssamp.shi[0].lay = re->lay; + + if (type == RE_BAKE_SHADOW) { + handles[a].ssamp.shi[0].passflag = SCE_PASS_SHADOW; + } + else { + handles[a].ssamp.shi[0].passflag = SCE_PASS_COMBINED; + } + handles[a].ssamp.shi[0].combinedflag = ~(SCE_PASS_SPEC); + handles[a].ssamp.shi[0].thread = a; + handles[a].ssamp.shi[0].do_manage = do_manage; + handles[a].ssamp.tot = 1; + + handles[a].type = type; + handles[a].actob = actob; + if (R.r.bake_flag & R_BAKE_VCOL) + handles[a].zspan = NULL; + else + handles[a].zspan = MEM_callocN(sizeof(ZSpan), "zspan for bake"); + + handles[a].use_mask = use_mask; + handles[a].use_displacement_buffer = use_displacement_buffer; + + handles[a].do_update = do_update; /* use to tell the view to update */ + + handles[a].displacement_min = FLT_MAX; + handles[a].displacement_max = -FLT_MAX; + + BLI_threadpool_insert(&threads, &handles[a]); + } + + /* wait for everything to be done */ + a = 0; + while (a != re->r.threads) { + PIL_sleep_ms(50); + + /* calculate progress */ + for (vdone = false, a = 0; a < re->r.threads; a++) + vdone += handles[a].vdone; + if (progress) + *progress = (float)(vdone / (float)re->totvlak); + + for (a = 0; a < re->r.threads; a++) { + if (handles[a].ready == false) { + break; + } + } + } + + /* filter and refresh images */ + if ((R.r.bake_flag & R_BAKE_VCOL) == 0) { + float displacement_min = FLT_MAX, displacement_max = -FLT_MAX; + + if (use_displacement_buffer) { + for (a = 0; a < re->r.threads; a++) { + displacement_min = min_ff(displacement_min, handles[a].displacement_min); + displacement_max = max_ff(displacement_max, handles[a].displacement_max); + } + } + + for (ima = G.main->image.first; ima; ima = ima->id.next) { + if ((ima->id.tag & LIB_TAG_DOIT) == 0) { + ImBuf *ibuf = BKE_image_acquire_ibuf(ima, NULL, NULL); + BakeImBufuserData *userdata; + + if (ima->flag & IMA_USED_FOR_RENDER) + result = BAKE_RESULT_FEEDBACK_LOOP; + + if (!ibuf) + continue; + + userdata = (BakeImBufuserData *)ibuf->userdata; + if (userdata) { + if (use_displacement_buffer) { + if (type == RE_BAKE_DERIVATIVE) { + float user_scale = (R.r.bake_flag & R_BAKE_USERSCALE) ? R.r.bake_user_scale : -1.0f; + RE_bake_make_derivative(ibuf, userdata->displacement_buffer, userdata->mask_buffer, + displacement_min, displacement_max, user_scale); + } + else { + RE_bake_ibuf_normalize_displacement(ibuf, userdata->displacement_buffer, userdata->mask_buffer, + displacement_min, displacement_max); + } + } + + RE_bake_ibuf_filter(ibuf, userdata->mask_buffer, re->r.bake_filter); + } + + ibuf->userflags |= IB_BITMAPDIRTY; + BKE_image_release_ibuf(ima, ibuf, NULL); + } + } + + /* calculate return value */ + for (a = 0; a < re->r.threads; a++) { + zbuf_free_span(handles[a].zspan); + MEM_freeN(handles[a].zspan); + } + } + + MEM_freeN(handles); + + BLI_threadpool_end(&threads); + + if (vdone == 0) { + result = BAKE_RESULT_NO_OBJECTS; + } + + return result; +} + +struct Image *RE_bake_shade_get_image(void) +{ + return R.bakebuf; +} + +/* **************** Derivative Maps Baker **************** */ + +static void add_single_heights_margin(const ImBuf *ibuf, const char *mask, float *heights_buffer) +{ + int x, y; + + for (y = 0; y < ibuf->y; y++) { + for (x = 0; x < ibuf->x; x++) { + int index = ibuf->x * y + x; + + /* If unassigned pixel, look for neighbors. */ + if (mask[index] != FILTER_MASK_USED) { + float height_acc = 0; + int denom = 0; + int i, j; + + for (j = -1; j <= 1; j++) + for (i = -1; i <= 1; i++) { + int w = (i == 0 ? 1 : 0) + (j == 0 ? 1 : 0) + 1; + + if (i != 0 || j != 0) { + int index2 = 0; + int x0 = x + i; + int y0 = y + j; + + CLAMP(x0, 0, ibuf->x - 1); + CLAMP(y0, 0, ibuf->y - 1); + + index2 = ibuf->x * y0 + x0; + + if (mask[index2] == FILTER_MASK_USED) { + height_acc += w * heights_buffer[index2]; + denom += w; + } + } + } + + /* Insert final value. */ + if (denom > 0) { + heights_buffer[index] = height_acc / denom; + } + } + } + } +} + +/* returns user-scale */ +float RE_bake_make_derivative(ImBuf *ibuf, float *heights_buffer, const char *mask, + const float height_min, const float height_max, + const float fmult) +{ + const float delta_height = height_max - height_min; + const float denom = delta_height > 0.0f ? (8 * delta_height) : 1.0f; + bool auto_range_fit = fmult <= 0.0f; + float max_num_deriv = -1.0f; + int x, y, index; + + /* Need a single margin to calculate good derivatives. */ + add_single_heights_margin(ibuf, mask, heights_buffer); + + if (auto_range_fit) { + /* If automatic range fitting is enabled. */ + for (y = 0; y < ibuf->y; y++) { + const int Yu = y == (ibuf->y - 1) ? (ibuf->y - 1) : (y + 1); + const int Yc = y; + const int Yd = y == 0 ? 0 : (y - 1); + + for (x = 0; x < ibuf->x; x++) { + const int Xl = x == 0 ? 0 : (x - 1); + const int Xc = x; + const int Xr = x == (ibuf->x - 1) ? (ibuf->x - 1) : (x + 1); + + const float Hcy = heights_buffer[Yc * ibuf->x + Xr] - heights_buffer[Yc * ibuf->x + Xl]; + const float Hu = heights_buffer[Yu * ibuf->x + Xr] - heights_buffer[Yu * ibuf->x + Xl]; + const float Hd = heights_buffer[Yd * ibuf->x + Xr] - heights_buffer[Yd * ibuf->x + Xl]; + + const float Hl = heights_buffer[Yu * ibuf->x + Xl] - heights_buffer[Yd * ibuf->x + Xl]; + const float Hcx = heights_buffer[Yu * ibuf->x + Xc] - heights_buffer[Yd * ibuf->x + Xc]; + const float Hr = heights_buffer[Yu * ibuf->x + Xr] - heights_buffer[Yd * ibuf->x + Xr]; + + /* This corresponds to using the sobel kernel on the heights buffer + * to obtain the derivative multiplied by 8. + */ + const float deriv_x = Hu + 2 * Hcy + Hd; + const float deriv_y = Hr + 2 * Hcx + Hl; + + /* early out */ + index = ibuf->x * y + x; + if (mask[index] != FILTER_MASK_USED) { + continue; + } + + /* Widen bound. */ + if (fabsf(deriv_x) > max_num_deriv) { + max_num_deriv = fabsf(deriv_x); + } + + if (fabsf(deriv_y) > max_num_deriv) { + max_num_deriv = fabsf(deriv_y); + } + } + } + } + + /* Output derivatives. */ + auto_range_fit &= (max_num_deriv > 0); + for (y = 0; y < ibuf->y; y++) { + const int Yu = y == (ibuf->y - 1) ? (ibuf->y - 1) : (y + 1); + const int Yc = y; + const int Yd = y == 0 ? 0 : (y - 1); + + for (x = 0; x < ibuf->x; x++) { + const int Xl = x == 0 ? 0 : (x - 1); + const int Xc = x; + const int Xr = x == (ibuf->x - 1) ? (ibuf->x - 1) : (x + 1); + + const float Hcy = heights_buffer[Yc * ibuf->x + Xr] - heights_buffer[Yc * ibuf->x + Xl]; + const float Hu = heights_buffer[Yu * ibuf->x + Xr] - heights_buffer[Yu * ibuf->x + Xl]; + const float Hd = heights_buffer[Yd * ibuf->x + Xr] - heights_buffer[Yd * ibuf->x + Xl]; + + const float Hl = heights_buffer[Yu * ibuf->x + Xl] - heights_buffer[Yd * ibuf->x + Xl]; + const float Hcx = heights_buffer[Yu * ibuf->x + Xc] - heights_buffer[Yd * ibuf->x + Xc]; + const float Hr = heights_buffer[Yu * ibuf->x + Xr] - heights_buffer[Yd * ibuf->x + Xr]; + + /* This corresponds to using the sobel kernel on the heights buffer + * to obtain the derivative multiplied by 8. + */ + float deriv_x = Hu + 2 * Hcy + Hd; + float deriv_y = Hr + 2 * Hcx + Hl; + + /* Early out. */ + index = ibuf->x * y + x; + if (mask[index] != FILTER_MASK_USED) { + continue; + } + + if (auto_range_fit) { + deriv_x /= max_num_deriv; + deriv_y /= max_num_deriv; + } + else { + deriv_x *= (fmult / denom); + deriv_y *= (fmult / denom); + } + + deriv_x = deriv_x * 0.5f + 0.5f; + deriv_y = deriv_y * 0.5f + 0.5f; + + /* Clamp. */ + CLAMP(deriv_x, 0.0f, 1.0f); + CLAMP(deriv_y, 0.0f, 1.0f); + + /* Write out derivatives. */ + if (ibuf->rect_float) { + float *rrgbf = ibuf->rect_float + index * 4; + + rrgbf[0] = deriv_x; + rrgbf[1] = deriv_y; + rrgbf[2] = 0.0f; + rrgbf[3] = 1.0f; + } + else { + char *rrgb = (char *)ibuf->rect + index * 4; + + rrgb[0] = unit_float_to_uchar_clamp(deriv_x); + rrgb[1] = unit_float_to_uchar_clamp(deriv_y); + rrgb[2] = 0; + rrgb[3] = 255; + } + } + } + + /* Eeturn user-scale (for rendering). */ + return auto_range_fit ? (max_num_deriv / denom) : (fmult > 0.0f ? (1.0f / fmult) : 0.0f); +} diff --git a/source/blender/render/intern/source/convertblender.c b/source/blender/render/intern/source/convertblender.c new file mode 100644 index 00000000000..8675ffec313 --- /dev/null +++ b/source/blender/render/intern/source/convertblender.c @@ -0,0 +1,6014 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributors: 2004/2005/2006 Blender Foundation, full recode + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/convertblender.c + * \ingroup render + */ + +#include <math.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> + +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_blenlib.h" +#include "BLI_utildefines.h" +#include "BLI_rand.h" +#include "BLI_memarena.h" +#ifdef WITH_FREESTYLE +# include "BLI_edgehash.h" +#endif + +#include "BLT_translation.h" + +#include "DNA_material_types.h" +#include "DNA_curve_types.h" +#include "DNA_group_types.h" +#include "DNA_lamp_types.h" +#include "DNA_image_types.h" +#include "DNA_mesh_types.h" +#include "DNA_meshdata_types.h" +#include "DNA_modifier_types.h" +#include "DNA_node_types.h" +#include "DNA_object_types.h" +#include "DNA_object_fluidsim_types.h" +#include "DNA_particle_types.h" +#include "DNA_scene_types.h" +#include "DNA_texture_types.h" + +#include "BKE_anim.h" +#include "BKE_curve.h" +#include "BKE_customdata.h" +#include "BKE_colortools.h" +#include "BKE_displist.h" +#include "BKE_depsgraph.h" +#include "BKE_DerivedMesh.h" +#include "BKE_global.h" +#include "BKE_key.h" +#include "BKE_image.h" +#include "BKE_lattice.h" +#include "BKE_material.h" +#include "BKE_main.h" +#include "BKE_mball.h" +#include "BKE_mesh.h" +#include "BKE_modifier.h" +#include "BKE_node.h" +#include "BKE_object.h" +#include "BKE_particle.h" +#include "BKE_scene.h" + +#include "PIL_time.h" + +#include "envmap.h" +#include "occlusion.h" +#include "pointdensity.h" +#include "voxeldata.h" +#include "render_types.h" +#include "rendercore.h" +#include "renderdatabase.h" +#include "renderpipeline.h" +#include "shadbuf.h" +#include "shading.h" +#include "strand.h" +#include "texture.h" +#include "volume_precache.h" +#include "sss.h" +#include "zbuf.h" +#include "sunsky.h" + +/* 10 times larger than normal epsilon, test it on default nurbs sphere with ray_transp (for quad detection) */ +/* or for checking vertex normal flips */ +#define FLT_EPSILON10 1.19209290e-06F + +/* could enable at some point but for now there are far too many conversions */ +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wdouble-promotion" +#endif + +/* ------------------------------------------------------------------------- */ +/* tool functions/defines for ad hoc simplification and possible future + * cleanup */ +/* ------------------------------------------------------------------------- */ + +#define UVTOINDEX(u, v) (startvlak + (u) * sizev + (v)) +/* + * + * NOTE THAT U/V COORDINATES ARE SOMETIMES SWAPPED !! + * + * ^ ()----p4----p3----() + * | | | | | + * u | | F1 | F2 | + * | | | | + * ()----p1----p2----() + * v -> + */ + +/* ------------------------------------------------------------------------- */ + +#define CD_MASK_RENDER_INTERNAL \ + (CD_MASK_BAREMESH | CD_MASK_MFACE | CD_MASK_MTFACE | CD_MASK_MCOL) + +static void split_v_renderfaces(ObjectRen *obr, int startvlak, int UNUSED(startvert), int UNUSED(usize), int vsize, int uIndex, int UNUSED(cyclu), int cyclv) +{ + int vLen = vsize-1+(!!cyclv); + int v; + + for (v=0; v<vLen; v++) { + VlakRen *vlr = RE_findOrAddVlak(obr, startvlak + vLen*uIndex + v); + VlakRen *vlr_other; + VertRen *vert = RE_vertren_copy(obr, vlr->v2); + + if (cyclv) { + vlr->v2 = vert; + + if (v == vLen - 1) { + vlr_other = RE_findOrAddVlak(obr, startvlak + vLen*uIndex + 0); + vlr_other->v1 = vert; + } + else { + vlr_other = RE_findOrAddVlak(obr, startvlak + vLen*uIndex + v+1); + vlr_other->v1 = vert; + } + } + else { + vlr->v2 = vert; + + if (v < vLen - 1) { + vlr_other = RE_findOrAddVlak(obr, startvlak + vLen*uIndex + v+1); + vlr_other->v1 = vert; + } + + if (v == 0) { + vlr->v1 = RE_vertren_copy(obr, vlr->v1); + } + } + } +} + +/* ------------------------------------------------------------------------- */ +/* Stress, tangents and normals */ +/* ------------------------------------------------------------------------- */ + +static void calc_edge_stress_add(float *accum, VertRen *v1, VertRen *v2) +{ + float len= len_v3v3(v1->co, v2->co)/len_v3v3(v1->orco, v2->orco); + float *acc; + + acc= accum + 2*v1->index; + acc[0]+= len; + acc[1]+= 1.0f; + + acc= accum + 2*v2->index; + acc[0]+= len; + acc[1]+= 1.0f; +} + +static void calc_edge_stress(Render *UNUSED(re), ObjectRen *obr, Mesh *me) +{ + float loc[3], size[3], *accum, *acc, *accumoffs, *stress; + int a; + + if (obr->totvert==0) return; + + BKE_mesh_texspace_get(me, loc, NULL, size); + + accum= MEM_callocN(2*sizeof(float)*obr->totvert, "temp accum for stress"); + + /* de-normalize orco */ + for (a=0; a<obr->totvert; a++) { + VertRen *ver= RE_findOrAddVert(obr, a); + if (ver->orco) { + ver->orco[0]= ver->orco[0]*size[0] +loc[0]; + ver->orco[1]= ver->orco[1]*size[1] +loc[1]; + ver->orco[2]= ver->orco[2]*size[2] +loc[2]; + } + } + + /* add stress values */ + accumoffs= accum; /* so we can use vertex index */ + for (a=0; a<obr->totvlak; a++) { + VlakRen *vlr= RE_findOrAddVlak(obr, a); + + if (vlr->v1->orco && vlr->v4) { + calc_edge_stress_add(accumoffs, vlr->v1, vlr->v2); + calc_edge_stress_add(accumoffs, vlr->v2, vlr->v3); + calc_edge_stress_add(accumoffs, vlr->v3, vlr->v1); + if (vlr->v4) { + calc_edge_stress_add(accumoffs, vlr->v3, vlr->v4); + calc_edge_stress_add(accumoffs, vlr->v4, vlr->v1); + calc_edge_stress_add(accumoffs, vlr->v2, vlr->v4); + } + } + } + + for (a=0; a<obr->totvert; a++) { + VertRen *ver= RE_findOrAddVert(obr, a); + if (ver->orco) { + /* find stress value */ + acc= accumoffs + 2*ver->index; + if (acc[1]!=0.0f) + acc[0]/= acc[1]; + stress= RE_vertren_get_stress(obr, ver, 1); + *stress= *acc; + + /* restore orcos */ + ver->orco[0] = (ver->orco[0]-loc[0])/size[0]; + ver->orco[1] = (ver->orco[1]-loc[1])/size[1]; + ver->orco[2] = (ver->orco[2]-loc[2])/size[2]; + } + } + + MEM_freeN(accum); +} + +/* gets tangent from tface or orco */ +static void calc_tangent_vector(ObjectRen *obr, VlakRen *vlr, int do_tangent) +{ + MTFace *tface= RE_vlakren_get_tface(obr, vlr, obr->actmtface, NULL, 0); + VertRen *v1=vlr->v1, *v2=vlr->v2, *v3=vlr->v3, *v4=vlr->v4; + float tang[3], *tav; + float *uv1, *uv2, *uv3, *uv4; + float uv[4][2]; + + if (tface) { + uv1= tface->uv[0]; + uv2= tface->uv[1]; + uv3= tface->uv[2]; + uv4= tface->uv[3]; + } + else if (v1->orco) { + uv1= uv[0]; uv2= uv[1]; uv3= uv[2]; uv4= uv[3]; + map_to_sphere(&uv[0][0], &uv[0][1], v1->orco[0], v1->orco[1], v1->orco[2]); + map_to_sphere(&uv[1][0], &uv[1][1], v2->orco[0], v2->orco[1], v2->orco[2]); + map_to_sphere(&uv[2][0], &uv[2][1], v3->orco[0], v3->orco[1], v3->orco[2]); + if (v4) + map_to_sphere(&uv[3][0], &uv[3][1], v4->orco[0], v4->orco[1], v4->orco[2]); + } + else return; + + tangent_from_uv_v3(uv1, uv2, uv3, v1->co, v2->co, v3->co, vlr->n, tang); + + if (do_tangent) { + tav= RE_vertren_get_tangent(obr, v1, 1); + add_v3_v3(tav, tang); + tav= RE_vertren_get_tangent(obr, v2, 1); + add_v3_v3(tav, tang); + tav= RE_vertren_get_tangent(obr, v3, 1); + add_v3_v3(tav, tang); + } + + if (v4) { + tangent_from_uv_v3(uv1, uv3, uv4, v1->co, v3->co, v4->co, vlr->n, tang); + + if (do_tangent) { + tav= RE_vertren_get_tangent(obr, v1, 1); + add_v3_v3(tav, tang); + tav= RE_vertren_get_tangent(obr, v3, 1); + add_v3_v3(tav, tang); + tav= RE_vertren_get_tangent(obr, v4, 1); + add_v3_v3(tav, tang); + } + } +} + + + +/**************************************************************** + ************ tangent space generation interface **************** + ****************************************************************/ + +typedef struct { + ObjectRen *obr; + int mtface_index; +} SRenderMeshToTangent; + +/* interface */ +#include "mikktspace.h" + +static int GetNumFaces(const SMikkTSpaceContext *pContext) +{ + SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData; + return pMesh->obr->totvlak; +} + +static int GetNumVertsOfFace(const SMikkTSpaceContext *pContext, const int face_num) +{ + SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData; + VlakRen *vlr= RE_findOrAddVlak(pMesh->obr, face_num); + return vlr->v4!=NULL ? 4 : 3; +} + +static void GetPosition(const SMikkTSpaceContext *pContext, float r_co[3], const int face_num, const int vert_index) +{ + //assert(vert_index>=0 && vert_index<4); + SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData; + VlakRen *vlr= RE_findOrAddVlak(pMesh->obr, face_num); + const float *co = (&vlr->v1)[vert_index]->co; + copy_v3_v3(r_co, co); +} + +static void GetTextureCoordinate(const SMikkTSpaceContext *pContext, float r_uv[2], const int face_num, const int vert_index) +{ + //assert(vert_index>=0 && vert_index<4); + SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData; + VlakRen *vlr= RE_findOrAddVlak(pMesh->obr, face_num); + MTFace *tface= RE_vlakren_get_tface(pMesh->obr, vlr, pMesh->mtface_index, NULL, 0); + const float *coord; + + if (tface != NULL) { + coord= tface->uv[vert_index]; + copy_v2_v2(r_uv, coord); + } + else if ((coord = (&vlr->v1)[vert_index]->orco)) { + map_to_sphere(&r_uv[0], &r_uv[1], coord[0], coord[1], coord[2]); + } + else { /* else we get un-initialized value, 0.0 ok default? */ + zero_v2(r_uv); + } +} + +static void GetNormal(const SMikkTSpaceContext *pContext, float r_no[3], const int face_num, const int vert_index) +{ + //assert(vert_index>=0 && vert_index<4); + SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData; + VlakRen *vlr= RE_findOrAddVlak(pMesh->obr, face_num); + + if (vlr->flag & ME_SMOOTH) { + const float *n = (&vlr->v1)[vert_index]->n; + copy_v3_v3(r_no, n); + } + else { + negate_v3_v3(r_no, vlr->n); + } +} +static void SetTSpace(const SMikkTSpaceContext *pContext, const float fvTangent[3], const float fSign, const int face_num, const int iVert) +{ + //assert(vert_index>=0 && vert_index<4); + SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData; + VlakRen *vlr = RE_findOrAddVlak(pMesh->obr, face_num); + float *ftang = RE_vlakren_get_nmap_tangent(pMesh->obr, vlr, pMesh->mtface_index, true); + if (ftang!=NULL) { + copy_v3_v3(&ftang[iVert*4+0], fvTangent); + ftang[iVert*4+3]=fSign; + } +} + +static void calc_vertexnormals(Render *UNUSED(re), ObjectRen *obr, bool do_vertex_normal, bool do_tangent, bool do_nmap_tangent) +{ + int a; + + /* clear all vertex normals */ + if (do_vertex_normal) { + for (a=0; a<obr->totvert; a++) { + VertRen *ver= RE_findOrAddVert(obr, a); + ver->n[0]=ver->n[1]=ver->n[2]= 0.0f; + } + } + + /* calculate cos of angles and point-masses, use as weight factor to + * add face normal to vertex */ + for (a=0; a<obr->totvlak; a++) { + VlakRen *vlr= RE_findOrAddVlak(obr, a); + if (do_vertex_normal && vlr->flag & ME_SMOOTH) { + float *n4= (vlr->v4)? vlr->v4->n: NULL; + const float *c4= (vlr->v4)? vlr->v4->co: NULL; + + accumulate_vertex_normals_v3(vlr->v1->n, vlr->v2->n, vlr->v3->n, n4, + vlr->n, vlr->v1->co, vlr->v2->co, vlr->v3->co, c4); + } + if (do_tangent) { + /* tangents still need to be calculated for flat faces too */ + /* weighting removed, they are not vertexnormals */ + calc_tangent_vector(obr, vlr, do_tangent); + } + } + + /* do solid faces */ + for (a=0; a<obr->totvlak; a++) { + VlakRen *vlr= RE_findOrAddVlak(obr, a); + + if (do_vertex_normal && (vlr->flag & ME_SMOOTH)==0) { + if (is_zero_v3(vlr->v1->n)) copy_v3_v3(vlr->v1->n, vlr->n); + if (is_zero_v3(vlr->v2->n)) copy_v3_v3(vlr->v2->n, vlr->n); + if (is_zero_v3(vlr->v3->n)) copy_v3_v3(vlr->v3->n, vlr->n); + if (vlr->v4 && is_zero_v3(vlr->v4->n)) copy_v3_v3(vlr->v4->n, vlr->n); + } + } + + /* normalize vertex normals */ + for (a=0; a<obr->totvert; a++) { + VertRen *ver= RE_findOrAddVert(obr, a); + normalize_v3(ver->n); + if (do_tangent) { + float *tav= RE_vertren_get_tangent(obr, ver, 0); + if (tav) { + /* orthonorm. */ + const float tdn = dot_v3v3(tav, ver->n); + tav[0] -= ver->n[0]*tdn; + tav[1] -= ver->n[1]*tdn; + tav[2] -= ver->n[2]*tdn; + normalize_v3(tav); + } + } + } + + /* normal mapping tangent with mikktspace */ + if (do_nmap_tangent != false) { + SRenderMeshToTangent mesh2tangent; + SMikkTSpaceContext sContext; + SMikkTSpaceInterface sInterface; + memset(&mesh2tangent, 0, sizeof(SRenderMeshToTangent)); + memset(&sContext, 0, sizeof(SMikkTSpaceContext)); + memset(&sInterface, 0, sizeof(SMikkTSpaceInterface)); + + mesh2tangent.obr = obr; + + sContext.m_pUserData = &mesh2tangent; + sContext.m_pInterface = &sInterface; + sInterface.m_getNumFaces = GetNumFaces; + sInterface.m_getNumVerticesOfFace = GetNumVertsOfFace; + sInterface.m_getPosition = GetPosition; + sInterface.m_getTexCoord = GetTextureCoordinate; + sInterface.m_getNormal = GetNormal; + sInterface.m_setTSpaceBasic = SetTSpace; + + for (a = 0; a < MAX_MTFACE; a++) { + if (obr->tangent_mask & 1 << a) { + mesh2tangent.mtface_index = a; + genTangSpaceDefault(&sContext); + } + } + } +} + +/* ------------------------------------------------------------------------- */ +/* Autosmoothing: */ +/* ------------------------------------------------------------------------- */ + +typedef struct ASvert { + int totface; + ListBase faces; +} ASvert; + +typedef struct ASface { + struct ASface *next, *prev; + VlakRen *vlr[4]; + VertRen *nver[4]; +} ASface; + +static int as_addvert(ASvert *asv, VertRen *v1, VlakRen *vlr) +{ + ASface *asf; + int a = -1; + + if (v1 == NULL) + return a; + + asf = asv->faces.last; + if (asf) { + for (a = 0; a < 4 && asf->vlr[a]; a++) { + } + } + else { + a = 4; + } + + /* new face struct */ + if (a == 4) { + a = 0; + asf = MEM_callocN(sizeof(ASface), "asface"); + BLI_addtail(&asv->faces, asf); + } + + asf->vlr[a] = vlr; + asv->totface++; + + return a; +} + +static VertRen *as_findvertex_lnor(VlakRen *vlr, VertRen *ver, ASvert *asv, const float lnor[3]) +{ + /* return when new vertex already was made, or existing one is OK */ + ASface *asf; + int a; + + /* First face, we can use existing vert and assign it current lnor! */ + if (asv->totface == 1) { + copy_v3_v3(ver->n, lnor); + return ver; + } + + /* In case existing ver has same normal as current lnor, we can simply use it! */ + if (equals_v3v3(lnor, ver->n)) { + return ver; + } + + asf = asv->faces.first; + while (asf) { + for (a = 0; a < 4; a++) { + if (asf->vlr[a] && asf->vlr[a] != vlr) { + /* this face already made a copy for this vertex! */ + if (asf->nver[a]) { + if (equals_v3v3(lnor, asf->nver[a]->n)) { + return asf->nver[a]; + } + } + } + } + asf = asf->next; + } + + return NULL; +} + +static void as_addvert_lnor(ObjectRen *obr, ASvert *asv, VertRen *ver, VlakRen *vlr, const short _lnor[3]) +{ + VertRen *v1; + ASface *asf; + int asf_idx; + float lnor[3]; + + normal_short_to_float_v3(lnor, _lnor); + + asf_idx = as_addvert(asv, ver, vlr); + if (asf_idx < 0) { + return; + } + asf = asv->faces.last; + + /* already made a new vertex within threshold? */ + v1 = as_findvertex_lnor(vlr, ver, asv, lnor); + if (v1 == NULL) { + /* make a new vertex */ + v1 = RE_vertren_copy(obr, ver); + copy_v3_v3(v1->n, lnor); + } + if (v1 != ver) { + asf->nver[asf_idx] = v1; + if (vlr->v1 == ver) vlr->v1 = v1; + if (vlr->v2 == ver) vlr->v2 = v1; + if (vlr->v3 == ver) vlr->v3 = v1; + if (vlr->v4 == ver) vlr->v4 = v1; + } +} + +/* note; autosmooth happens in object space still, after applying autosmooth we rotate */ +/* note2; actually, when original mesh and displist are equal sized, face normals are from original mesh */ +static void autosmooth(Render *UNUSED(re), ObjectRen *obr, float mat[4][4], short (*lnors)[4][3]) +{ + ASvert *asverts; + VertRen *ver; + VlakRen *vlr; + int a, totvert; + + float rot[3][3]; + + /* Note: For normals, we only want rotation, not scaling component. + * Negative scales (aka mirroring) give wrong results, see T44102. */ + if (lnors) { + float mat3[3][3], size[3]; + + copy_m3_m4(mat3, mat); + mat3_to_rot_size(rot, size, mat3); + } + + if (obr->totvert == 0) + return; + + totvert = obr->totvert; + asverts = MEM_callocN(sizeof(ASvert) * totvert, "all smooth verts"); + + if (lnors) { + /* We construct listbase of all vertices and pointers to faces, and add new verts when needed + * (i.e. when existing ones do not share the same (loop)normal). + */ + for (a = 0; a < obr->totvlak; a++, lnors++) { + vlr = RE_findOrAddVlak(obr, a); + /* skip wire faces */ + if (vlr->v2 != vlr->v3) { + as_addvert_lnor(obr, asverts+vlr->v1->index, vlr->v1, vlr, (const short*)lnors[0][0]); + as_addvert_lnor(obr, asverts+vlr->v2->index, vlr->v2, vlr, (const short*)lnors[0][1]); + as_addvert_lnor(obr, asverts+vlr->v3->index, vlr->v3, vlr, (const short*)lnors[0][2]); + if (vlr->v4) + as_addvert_lnor(obr, asverts+vlr->v4->index, vlr->v4, vlr, (const short*)lnors[0][3]); + } + } + } + + /* free */ + for (a = 0; a < totvert; a++) { + BLI_freelistN(&asverts[a].faces); + } + MEM_freeN(asverts); + + /* rotate vertices and calculate normal of faces */ + for (a = 0; a < obr->totvert; a++) { + ver = RE_findOrAddVert(obr, a); + mul_m4_v3(mat, ver->co); + if (lnors) { + mul_m3_v3(rot, ver->n); + negate_v3(ver->n); + } + } + for (a = 0; a < obr->totvlak; a++) { + vlr = RE_findOrAddVlak(obr, a); + + /* skip wire faces */ + if (vlr->v2 != vlr->v3) { + if (vlr->v4) + normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co); + else + normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co); + } + } +} + +/* ------------------------------------------------------------------------- */ +/* Orco hash and Materials */ +/* ------------------------------------------------------------------------- */ + +static float *get_object_orco(Render *re, void *ob) +{ + if (!re->orco_hash) { + return NULL; + } + + return BLI_ghash_lookup(re->orco_hash, ob); +} + +static void set_object_orco(Render *re, void *ob, float *orco) +{ + if (!re->orco_hash) + re->orco_hash = BLI_ghash_ptr_new("set_object_orco gh"); + + BLI_ghash_insert(re->orco_hash, ob, orco); +} + +static void free_mesh_orco_hash(Render *re) +{ + if (re->orco_hash) { + BLI_ghash_free(re->orco_hash, NULL, MEM_freeN); + re->orco_hash = NULL; + } +} + +static void check_material_mapto(Material *ma) +{ + int a; + ma->mapto_textured = 0; + + /* cache which inputs are actually textured. + * this can avoid a bit of time spent iterating through all the texture slots, map inputs and map tos + * every time a property which may or may not be textured is accessed */ + + for (a=0; a<MAX_MTEX; a++) { + if (ma->mtex[a] && ma->mtex[a]->tex) { + /* currently used only in volume render, so we'll check for those flags */ + if (ma->mtex[a]->mapto & MAP_DENSITY) ma->mapto_textured |= MAP_DENSITY; + if (ma->mtex[a]->mapto & MAP_EMISSION) ma->mapto_textured |= MAP_EMISSION; + if (ma->mtex[a]->mapto & MAP_EMISSION_COL) ma->mapto_textured |= MAP_EMISSION_COL; + if (ma->mtex[a]->mapto & MAP_SCATTERING) ma->mapto_textured |= MAP_SCATTERING; + if (ma->mtex[a]->mapto & MAP_TRANSMISSION_COL) ma->mapto_textured |= MAP_TRANSMISSION_COL; + if (ma->mtex[a]->mapto & MAP_REFLECTION) ma->mapto_textured |= MAP_REFLECTION; + if (ma->mtex[a]->mapto & MAP_REFLECTION_COL) ma->mapto_textured |= MAP_REFLECTION_COL; + } + } +} +static void flag_render_node_material(Render *re, bNodeTree *ntree) +{ + bNode *node; + + for (node = ntree->nodes.first; node; node = node->next) { + if (node->id) { + if (GS(node->id->name)==ID_MA) { + Material *ma= (Material *)node->id; + + if ((ma->mode & MA_TRANSP) && (ma->mode & MA_ZTRANSP)) + re->flag |= R_ZTRA; + + ma->flag |= MA_IS_USED; + } + else if (node->type==NODE_GROUP) + flag_render_node_material(re, (bNodeTree *)node->id); + } + } +} + +static Material *give_render_material(Render *re, Object *ob, short nr) +{ + extern Material defmaterial; /* material.c */ + Material *ma; + + ma= give_current_material(ob, nr); + if (ma==NULL) + ma= &defmaterial; + + if (re->r.mode & R_SPEED) ma->texco |= NEED_UV; + + if (ma->material_type == MA_TYPE_VOLUME) { + ma->mode |= MA_TRANSP; + ma->mode &= ~MA_SHADBUF; + } + if ((ma->mode & MA_TRANSP) && (ma->mode & MA_ZTRANSP)) + re->flag |= R_ZTRA; + + /* for light groups and SSS */ + ma->flag |= MA_IS_USED; + + if (ma->nodetree && ma->use_nodes) + flag_render_node_material(re, ma->nodetree); + + check_material_mapto(ma); + + return ma; +} + +/* ------------------------------------------------------------------------- */ +/* Particles */ +/* ------------------------------------------------------------------------- */ +typedef struct ParticleStrandData { + struct MCol *mcol; + float *orco, *uvco, *surfnor; + float time, adapt_angle, adapt_pix, size; + int totuv, totcol; + int first, line, adapt, override_uv; +} +ParticleStrandData; +/* future thread problem... */ +static void static_particle_strand(Render *re, ObjectRen *obr, Material *ma, ParticleStrandData *sd, const float vec[3], const float vec1[3]) +{ + static VertRen *v1= NULL, *v2= NULL; + VlakRen *vlr= NULL; + float nor[3], cross[3], crosslen, w, dx, dy, width; + static float anor[3], avec[3]; + int flag, i; + static int second=0; + + sub_v3_v3v3(nor, vec, vec1); + normalize_v3(nor); /* nor needed as tangent */ + cross_v3_v3v3(cross, vec, nor); + + /* turn cross in pixelsize */ + w= vec[2]*re->winmat[2][3] + re->winmat[3][3]; + dx= re->winx*cross[0]*re->winmat[0][0]; + dy= re->winy*cross[1]*re->winmat[1][1]; + w = sqrtf(dx * dx + dy * dy) / w; + + if (w!=0.0f) { + float fac; + if (ma->strand_ease!=0.0f) { + if (ma->strand_ease<0.0f) + fac= pow(sd->time, 1.0f+ma->strand_ease); + else + fac= pow(sd->time, 1.0f/(1.0f-ma->strand_ease)); + } + else fac= sd->time; + + width= ((1.0f-fac)*ma->strand_sta + (fac)*ma->strand_end); + + /* use actual Blender units for strand width and fall back to minimum width */ + if (ma->mode & MA_STR_B_UNITS) { + crosslen= len_v3(cross); + w= 2.0f*crosslen*ma->strand_min/w; + + if (width < w) + width= w; + + /*cross is the radius of the strand so we want it to be half of full width */ + mul_v3_fl(cross, 0.5f/crosslen); + } + else + width/=w; + + mul_v3_fl(cross, width); + } + + if (ma->mode & MA_TANGENT_STR) + flag= R_SMOOTH|R_TANGENT; + else + flag= R_SMOOTH; + + /* only 1 pixel wide strands filled in as quads now, otherwise zbuf errors */ + if (ma->strand_sta==1.0f) + flag |= R_STRAND; + + /* single face line */ + if (sd->line) { + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->flag= flag; + vlr->v1= RE_findOrAddVert(obr, obr->totvert++); + vlr->v2= RE_findOrAddVert(obr, obr->totvert++); + vlr->v3= RE_findOrAddVert(obr, obr->totvert++); + vlr->v4= RE_findOrAddVert(obr, obr->totvert++); + + copy_v3_v3(vlr->v1->co, vec); + add_v3_v3(vlr->v1->co, cross); + copy_v3_v3(vlr->v1->n, nor); + vlr->v1->orco= sd->orco; + vlr->v1->accum = -1.0f; /* accum abuse for strand texco */ + + copy_v3_v3(vlr->v2->co, vec); + sub_v3_v3v3(vlr->v2->co, vlr->v2->co, cross); + copy_v3_v3(vlr->v2->n, nor); + vlr->v2->orco= sd->orco; + vlr->v2->accum= vlr->v1->accum; + + copy_v3_v3(vlr->v4->co, vec1); + add_v3_v3(vlr->v4->co, cross); + copy_v3_v3(vlr->v4->n, nor); + vlr->v4->orco= sd->orco; + vlr->v4->accum = 1.0f; /* accum abuse for strand texco */ + + copy_v3_v3(vlr->v3->co, vec1); + sub_v3_v3v3(vlr->v3->co, vlr->v3->co, cross); + copy_v3_v3(vlr->v3->n, nor); + vlr->v3->orco= sd->orco; + vlr->v3->accum= vlr->v4->accum; + + normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co); + + vlr->mat= ma; + vlr->ec= ME_V2V3; + + if (sd->surfnor) { + float *snor= RE_vlakren_get_surfnor(obr, vlr, 1); + copy_v3_v3(snor, sd->surfnor); + } + + if (sd->uvco) { + for (i=0; i<sd->totuv; i++) { + MTFace *mtf; + mtf=RE_vlakren_get_tface(obr, vlr, i, NULL, 1); + mtf->uv[0][0]=mtf->uv[1][0]= + mtf->uv[2][0]=mtf->uv[3][0]=(sd->uvco+2*i)[0]; + mtf->uv[0][1]=mtf->uv[1][1]= + mtf->uv[2][1]=mtf->uv[3][1]=(sd->uvco+2*i)[1]; + } + if (sd->override_uv>=0) { + MTFace *mtf; + mtf=RE_vlakren_get_tface(obr, vlr, sd->override_uv, NULL, 0); + + mtf->uv[0][0]=mtf->uv[3][0]=0.0f; + mtf->uv[1][0]=mtf->uv[2][0]=1.0f; + + mtf->uv[0][1]=mtf->uv[1][1]=0.0f; + mtf->uv[2][1]=mtf->uv[3][1]=1.0f; + } + } + if (sd->mcol) { + for (i=0; i<sd->totcol; i++) { + MCol *mc; + mc=RE_vlakren_get_mcol(obr, vlr, i, NULL, 1); + mc[0]=mc[1]=mc[2]=mc[3]=sd->mcol[i]; + mc[0]=mc[1]=mc[2]=mc[3]=sd->mcol[i]; + } + } + } + /* first two vertices of a strand */ + else if (sd->first) { + if (sd->adapt) { + copy_v3_v3(anor, nor); + copy_v3_v3(avec, vec); + second=1; + } + + v1= RE_findOrAddVert(obr, obr->totvert++); + v2= RE_findOrAddVert(obr, obr->totvert++); + + copy_v3_v3(v1->co, vec); + add_v3_v3(v1->co, cross); + copy_v3_v3(v1->n, nor); + v1->orco= sd->orco; + v1->accum = -1.0f; /* accum abuse for strand texco */ + + copy_v3_v3(v2->co, vec); + sub_v3_v3v3(v2->co, v2->co, cross); + copy_v3_v3(v2->n, nor); + v2->orco= sd->orco; + v2->accum= v1->accum; + } + /* more vertices & faces to strand */ + else { + if (sd->adapt==0 || second) { + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->flag= flag; + vlr->v1= v1; + vlr->v2= v2; + vlr->v3= RE_findOrAddVert(obr, obr->totvert++); + vlr->v4= RE_findOrAddVert(obr, obr->totvert++); + + v1= vlr->v4; /* cycle */ + v2= vlr->v3; /* cycle */ + + + if (sd->adapt) { + second=0; + copy_v3_v3(anor, nor); + copy_v3_v3(avec, vec); + } + + } + else if (sd->adapt) { + float dvec[3], pvec[3]; + sub_v3_v3v3(dvec, avec, vec); + project_v3_v3v3(pvec, dvec, vec); + sub_v3_v3v3(dvec, dvec, pvec); + + w= vec[2]*re->winmat[2][3] + re->winmat[3][3]; + dx= re->winx*dvec[0]*re->winmat[0][0]/w; + dy= re->winy*dvec[1]*re->winmat[1][1]/w; + w = sqrtf(dx * dx + dy * dy); + if (dot_v3v3(anor, nor)<sd->adapt_angle && w>sd->adapt_pix) { + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->flag= flag; + vlr->v1= v1; + vlr->v2= v2; + vlr->v3= RE_findOrAddVert(obr, obr->totvert++); + vlr->v4= RE_findOrAddVert(obr, obr->totvert++); + + v1= vlr->v4; /* cycle */ + v2= vlr->v3; /* cycle */ + + copy_v3_v3(anor, nor); + copy_v3_v3(avec, vec); + } + else { + vlr= RE_findOrAddVlak(obr, obr->totvlak-1); + } + } + + copy_v3_v3(vlr->v4->co, vec); + add_v3_v3(vlr->v4->co, cross); + copy_v3_v3(vlr->v4->n, nor); + vlr->v4->orco= sd->orco; + vlr->v4->accum= -1.0f + 2.0f * sd->time; /* accum abuse for strand texco */ + + copy_v3_v3(vlr->v3->co, vec); + sub_v3_v3v3(vlr->v3->co, vlr->v3->co, cross); + copy_v3_v3(vlr->v3->n, nor); + vlr->v3->orco= sd->orco; + vlr->v3->accum= vlr->v4->accum; + + normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co); + + vlr->mat= ma; + vlr->ec= ME_V2V3; + + if (sd->surfnor) { + float *snor= RE_vlakren_get_surfnor(obr, vlr, 1); + copy_v3_v3(snor, sd->surfnor); + } + + if (sd->uvco) { + for (i=0; i<sd->totuv; i++) { + MTFace *mtf; + mtf=RE_vlakren_get_tface(obr, vlr, i, NULL, 1); + mtf->uv[0][0]=mtf->uv[1][0]= + mtf->uv[2][0]=mtf->uv[3][0]=(sd->uvco+2*i)[0]; + mtf->uv[0][1]=mtf->uv[1][1]= + mtf->uv[2][1]=mtf->uv[3][1]=(sd->uvco+2*i)[1]; + } + if (sd->override_uv>=0) { + MTFace *mtf; + mtf=RE_vlakren_get_tface(obr, vlr, sd->override_uv, NULL, 0); + + mtf->uv[0][0]=mtf->uv[3][0]=0.0f; + mtf->uv[1][0]=mtf->uv[2][0]=1.0f; + + mtf->uv[0][1]=mtf->uv[1][1]=(vlr->v1->accum+1.0f)/2.0f; + mtf->uv[2][1]=mtf->uv[3][1]=(vlr->v3->accum+1.0f)/2.0f; + } + } + if (sd->mcol) { + for (i=0; i<sd->totcol; i++) { + MCol *mc; + mc=RE_vlakren_get_mcol(obr, vlr, i, NULL, 1); + mc[0]=mc[1]=mc[2]=mc[3]=sd->mcol[i]; + mc[0]=mc[1]=mc[2]=mc[3]=sd->mcol[i]; + } + } + } +} + +static void static_particle_wire(ObjectRen *obr, Material *ma, const float vec[3], const float vec1[3], int first, int line) +{ + VlakRen *vlr; + static VertRen *v1; + + if (line) { + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->v1= RE_findOrAddVert(obr, obr->totvert++); + vlr->v2= RE_findOrAddVert(obr, obr->totvert++); + vlr->v3= vlr->v2; + vlr->v4= NULL; + + copy_v3_v3(vlr->v1->co, vec); + copy_v3_v3(vlr->v2->co, vec1); + + sub_v3_v3v3(vlr->n, vec, vec1); + normalize_v3(vlr->n); + copy_v3_v3(vlr->v1->n, vlr->n); + copy_v3_v3(vlr->v2->n, vlr->n); + + vlr->mat= ma; + vlr->ec= ME_V1V2; + + } + else if (first) { + v1= RE_findOrAddVert(obr, obr->totvert++); + copy_v3_v3(v1->co, vec); + } + else { + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->v1= v1; + vlr->v2= RE_findOrAddVert(obr, obr->totvert++); + vlr->v3= vlr->v2; + vlr->v4= NULL; + + v1= vlr->v2; /* cycle */ + copy_v3_v3(v1->co, vec); + + sub_v3_v3v3(vlr->n, vec, vec1); + normalize_v3(vlr->n); + copy_v3_v3(v1->n, vlr->n); + + vlr->mat= ma; + vlr->ec= ME_V1V2; + } + +} + +static void particle_curve(Render *re, ObjectRen *obr, DerivedMesh *dm, Material *ma, ParticleStrandData *sd, + const float loc[3], const float loc1[3], int seed, float *pa_co) +{ + HaloRen *har = NULL; + + if (ma->material_type == MA_TYPE_WIRE) + static_particle_wire(obr, ma, loc, loc1, sd->first, sd->line); + else if (ma->material_type == MA_TYPE_HALO) { + har= RE_inithalo_particle(re, obr, dm, ma, loc, loc1, sd->orco, sd->uvco, sd->size, 1.0, seed, pa_co); + if (har) har->lay= obr->ob->lay; + } + else + static_particle_strand(re, obr, ma, sd, loc, loc1); +} +static void particle_billboard(Render *re, ObjectRen *obr, Material *ma, ParticleBillboardData *bb) +{ + VlakRen *vlr; + MTFace *mtf; + float xvec[3], yvec[3], zvec[3], bb_center[3]; + /* Number of tiles */ + int totsplit = bb->uv_split * bb->uv_split; + int tile, x, y; + /* Tile offsets */ + float uvx = 0.0f, uvy = 0.0f, uvdx = 1.0f, uvdy = 1.0f, time = 0.0f; + + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->v1= RE_findOrAddVert(obr, obr->totvert++); + vlr->v2= RE_findOrAddVert(obr, obr->totvert++); + vlr->v3= RE_findOrAddVert(obr, obr->totvert++); + vlr->v4= RE_findOrAddVert(obr, obr->totvert++); + + psys_make_billboard(bb, xvec, yvec, zvec, bb_center); + + add_v3_v3v3(vlr->v1->co, bb_center, xvec); + add_v3_v3(vlr->v1->co, yvec); + mul_m4_v3(re->viewmat, vlr->v1->co); + + sub_v3_v3v3(vlr->v2->co, bb_center, xvec); + add_v3_v3(vlr->v2->co, yvec); + mul_m4_v3(re->viewmat, vlr->v2->co); + + sub_v3_v3v3(vlr->v3->co, bb_center, xvec); + sub_v3_v3v3(vlr->v3->co, vlr->v3->co, yvec); + mul_m4_v3(re->viewmat, vlr->v3->co); + + add_v3_v3v3(vlr->v4->co, bb_center, xvec); + sub_v3_v3(vlr->v4->co, yvec); + mul_m4_v3(re->viewmat, vlr->v4->co); + + normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co); + copy_v3_v3(vlr->v1->n, vlr->n); + copy_v3_v3(vlr->v2->n, vlr->n); + copy_v3_v3(vlr->v3->n, vlr->n); + copy_v3_v3(vlr->v4->n, vlr->n); + + vlr->mat= ma; + vlr->ec= ME_V2V3; + + if (bb->uv_split > 1) { + uvdx = uvdy = 1.0f / (float)bb->uv_split; + + if (ELEM(bb->anim, PART_BB_ANIM_AGE, PART_BB_ANIM_FRAME)) { + if (bb->anim == PART_BB_ANIM_FRAME) + time = ((int)(bb->time * bb->lifetime) % totsplit)/(float)totsplit; + else + time = bb->time; + } + else if (bb->anim == PART_BB_ANIM_ANGLE) { + if (bb->align == PART_BB_VIEW) { + time = (float)fmod((bb->tilt + 1.0f) / 2.0f, 1.0); + } + else { + float axis1[3] = {0.0f, 0.0f, 0.0f}; + float axis2[3] = {0.0f, 0.0f, 0.0f}; + + axis1[(bb->align + 1) % 3] = 1.0f; + axis2[(bb->align + 2) % 3] = 1.0f; + + if (bb->lock == 0) { + zvec[bb->align] = 0.0f; + normalize_v3(zvec); + } + + time = saacos(dot_v3v3(zvec, axis1)) / (float)M_PI; + + if (dot_v3v3(zvec, axis2) < 0.0f) + time = 1.0f - time / 2.0f; + else + time /= 2.0f; + } + } + + if (bb->split_offset == PART_BB_OFF_LINEAR) + time = (float)fmod(time + (float)bb->num / (float)totsplit, 1.0f); + else if (bb->split_offset==PART_BB_OFF_RANDOM) + time = (float)fmod(time + bb->random, 1.0f); + + /* Find the coordinates in tile space (integer), then convert to UV + * space (float). Note that Y is flipped. */ + tile = (int)((time + FLT_EPSILON10) * totsplit); + x = tile % bb->uv_split; + y = tile / bb->uv_split; + y = (bb->uv_split - 1) - y; + uvx = uvdx * x; + uvy = uvdy * y; + } + + /* normal UVs */ + if (bb->uv[0] >= 0) { + mtf = RE_vlakren_get_tface(obr, vlr, bb->uv[0], NULL, 1); + mtf->uv[0][0] = 1.0f; + mtf->uv[0][1] = 1.0f; + mtf->uv[1][0] = 0.0f; + mtf->uv[1][1] = 1.0f; + mtf->uv[2][0] = 0.0f; + mtf->uv[2][1] = 0.0f; + mtf->uv[3][0] = 1.0f; + mtf->uv[3][1] = 0.0f; + } + + /* time-index UVs */ + if (bb->uv[1] >= 0) { + mtf = RE_vlakren_get_tface(obr, vlr, bb->uv[1], NULL, 1); + mtf->uv[0][0] = mtf->uv[1][0] = mtf->uv[2][0] = mtf->uv[3][0] = bb->time; + mtf->uv[0][1] = mtf->uv[1][1] = mtf->uv[2][1] = mtf->uv[3][1] = (float)bb->num/(float)bb->totnum; + } + + /* split UVs */ + if (bb->uv_split > 1 && bb->uv[2] >= 0) { + mtf = RE_vlakren_get_tface(obr, vlr, bb->uv[2], NULL, 1); + mtf->uv[0][0] = uvx + uvdx; + mtf->uv[0][1] = uvy + uvdy; + mtf->uv[1][0] = uvx; + mtf->uv[1][1] = uvy + uvdy; + mtf->uv[2][0] = uvx; + mtf->uv[2][1] = uvy; + mtf->uv[3][0] = uvx + uvdx; + mtf->uv[3][1] = uvy; + } +} +static void particle_normal_ren(short ren_as, ParticleSettings *part, Render *re, ObjectRen *obr, DerivedMesh *dm, Material *ma, ParticleStrandData *sd, ParticleBillboardData *bb, ParticleKey *state, int seed, float hasize, float *pa_co) +{ + float loc[3], loc0[3], loc1[3], vel[3]; + + copy_v3_v3(loc, state->co); + + if (ren_as != PART_DRAW_BB) + mul_m4_v3(re->viewmat, loc); + + switch (ren_as) { + case PART_DRAW_LINE: + sd->line = 1; + sd->time = 0.0f; + sd->size = hasize; + + mul_v3_mat3_m4v3(vel, re->viewmat, state->vel); + normalize_v3(vel); + + if (part->draw & PART_DRAW_VEL_LENGTH) + mul_v3_fl(vel, len_v3(state->vel)); + + madd_v3_v3v3fl(loc0, loc, vel, -part->draw_line[0]); + madd_v3_v3v3fl(loc1, loc, vel, part->draw_line[1]); + + particle_curve(re, obr, dm, ma, sd, loc0, loc1, seed, pa_co); + + break; + + case PART_DRAW_BB: + + copy_v3_v3(bb->vec, loc); + copy_v3_v3(bb->vel, state->vel); + + particle_billboard(re, obr, ma, bb); + + break; + + default: + { + HaloRen *har = NULL; + + har = RE_inithalo_particle(re, obr, dm, ma, loc, NULL, sd->orco, sd->uvco, hasize, 0.0, seed, pa_co); + + if (har) har->lay= obr->ob->lay; + + break; + } + } +} +static void get_particle_uvco_mcol(short from, DerivedMesh *dm, float *fuv, int num, ParticleStrandData *sd) +{ + int i; + + /* get uvco */ + if (sd->uvco && ELEM(from, PART_FROM_FACE, PART_FROM_VOLUME)) { + for (i=0; i<sd->totuv; i++) { + if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) { + MFace *mface = dm->getTessFaceData(dm, num, CD_MFACE); + MTFace *mtface = (MTFace*)CustomData_get_layer_n(&dm->faceData, CD_MTFACE, i); + mtface += num; + + psys_interpolate_uvs(mtface, mface->v4, fuv, sd->uvco + 2 * i); + } + else { + sd->uvco[2*i] = 0.0f; + sd->uvco[2*i + 1] = 0.0f; + } + } + } + + /* get mcol */ + if (sd->mcol && ELEM(from, PART_FROM_FACE, PART_FROM_VOLUME)) { + for (i=0; i<sd->totcol; i++) { + if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) { + MFace *mface = dm->getTessFaceData(dm, num, CD_MFACE); + MCol *mc = (MCol*)CustomData_get_layer_n(&dm->faceData, CD_MCOL, i); + mc += num * 4; + + psys_interpolate_mcol(mc, mface->v4, fuv, sd->mcol + i); + } + else + memset(&sd->mcol[i], 0, sizeof(MCol)); + } + } +} +static int render_new_particle_system(Render *re, ObjectRen *obr, ParticleSystem *psys, int timeoffset) +{ + Object *ob= obr->ob; +// Object *tob=0; + Material *ma = NULL; + ParticleSystemModifierData *psmd; + ParticleSystem *tpsys = NULL; + ParticleSettings *part, *tpart = NULL; + ParticleData *pars, *pa = NULL, *tpa = NULL; + ParticleKey *states = NULL; + ParticleKey state; + ParticleCacheKey *cache = NULL; + ParticleBillboardData bb; + ParticleSimulationData sim = {NULL}; + ParticleStrandData sd; + StrandBuffer *strandbuf = NULL; + StrandVert *svert = NULL; + StrandBound *sbound = NULL; + StrandRen *strand = NULL; + RNG *rng = NULL; + float loc[3], loc1[3], loc0[3], mat[4][4], nmat[3][3], co[3], nor[3], duplimat[4][4]; + float strandlen=0.0f, curlen=0.0f; + float hasize, pa_size, r_tilt, r_length; + float pa_time, pa_birthtime, pa_dietime; + float random, simplify[2], pa_co[3]; + const float cfra= BKE_scene_frame_get(re->scene); + int i, a, k, max_k=0, totpart; + bool do_simplify = false, do_surfacecache = false, use_duplimat = false; + int totchild=0, step_nbr; + int seed, path_nbr=0, orco1=0, num; + int totface; + + const int *index_mf_to_mpoly = NULL; + const int *index_mp_to_orig = NULL; + +/* 1. check that everything is ok & updated */ + if (psys==NULL) + return 0; + + part=psys->part; + pars=psys->particles; + + if (part==NULL || pars==NULL || !psys_check_enabled(ob, psys, G.is_rendering)) + return 0; + + if (part->ren_as==PART_DRAW_OB || part->ren_as==PART_DRAW_GR || part->ren_as==PART_DRAW_NOT) + return 1; + + if ((re->r.scemode & R_VIEWPORT_PREVIEW) && (ob->mode & OB_MODE_PARTICLE_EDIT)) + return 0; + + if (part->ren_as == PART_DRAW_BB && part->bb_ob == NULL && RE_GetCamera(re) == NULL) + return 0; + +/* 2. start initializing things */ + + /* last possibility to bail out! */ + psmd = psys_get_modifier(ob, psys); + if (!(psmd->modifier.mode & eModifierMode_Render)) + return 0; + + sim.scene= re->scene; + sim.ob= ob; + sim.psys= psys; + sim.psmd= psmd; + + if (part->phystype==PART_PHYS_KEYED) + psys_count_keyed_targets(&sim); + + totchild=psys->totchild; + + /* can happen for disconnected/global hair */ + if (part->type==PART_HAIR && !psys->childcache) + totchild= 0; + + if (re->r.scemode & R_VIEWPORT_PREVIEW) { /* preview render */ + totchild = (int)((float)totchild * (float)part->disp / 100.0f); + step_nbr = 1 << part->draw_step; + } + else { + step_nbr = 1 << part->ren_step; + } + if (ELEM(part->kink, PART_KINK_SPIRAL)) + step_nbr += part->kink_extra_steps; + + psys->flag |= PSYS_DRAWING; + + rng= BLI_rng_new(psys->seed); + + totpart=psys->totpart; + + memset(&sd, 0, sizeof(ParticleStrandData)); + sd.override_uv = -1; + +/* 2.1 setup material stff */ + ma= give_render_material(re, ob, part->omat); + +#if 0 /* XXX old animation system */ + if (ma->ipo) { + calc_ipo(ma->ipo, cfra); + execute_ipo((ID *)ma, ma->ipo); + } +#endif /* XXX old animation system */ + + hasize = ma->hasize; + seed = ma->seed1; + + re->flag |= R_HALO; + + RE_set_customdata_names(obr, &psmd->dm_final->faceData); + sd.totuv = CustomData_number_of_layers(&psmd->dm_final->faceData, CD_MTFACE); + sd.totcol = CustomData_number_of_layers(&psmd->dm_final->faceData, CD_MCOL); + + if (ma->texco & TEXCO_UV && sd.totuv) { + sd.uvco = MEM_callocN(sd.totuv * 2 * sizeof(float), "particle_uvs"); + + if (ma->strand_uvname[0]) { + sd.override_uv = CustomData_get_named_layer_index(&psmd->dm_final->faceData, CD_MTFACE, ma->strand_uvname); + sd.override_uv -= CustomData_get_layer_index(&psmd->dm_final->faceData, CD_MTFACE); + } + } + else + sd.uvco = NULL; + + if (sd.totcol) + sd.mcol = MEM_callocN(sd.totcol * sizeof(MCol), "particle_mcols"); + +/* 2.2 setup billboards */ + if (part->ren_as == PART_DRAW_BB) { + int first_uv = CustomData_get_layer_index(&psmd->dm_final->faceData, CD_MTFACE); + + bb.uv[0] = CustomData_get_named_layer_index(&psmd->dm_final->faceData, CD_MTFACE, psys->bb_uvname[0]); + if (bb.uv[0] < 0) + bb.uv[0] = CustomData_get_active_layer_index(&psmd->dm_final->faceData, CD_MTFACE); + + bb.uv[1] = CustomData_get_named_layer_index(&psmd->dm_final->faceData, CD_MTFACE, psys->bb_uvname[1]); + + bb.uv[2] = CustomData_get_named_layer_index(&psmd->dm_final->faceData, CD_MTFACE, psys->bb_uvname[2]); + + if (first_uv >= 0) { + bb.uv[0] -= first_uv; + bb.uv[1] -= first_uv; + bb.uv[2] -= first_uv; + } + + bb.align = part->bb_align; + bb.anim = part->bb_anim; + bb.lock = part->draw & PART_DRAW_BB_LOCK; + bb.ob = (part->bb_ob ? part->bb_ob : RE_GetCamera(re)); + bb.split_offset = part->bb_split_offset; + bb.totnum = totpart+totchild; + bb.uv_split = part->bb_uv_split; + } + +/* 2.5 setup matrices */ + mul_m4_m4m4(mat, re->viewmat, ob->obmat); + invert_m4_m4(ob->imat, mat); /* need to be that way, for imat texture */ + transpose_m3_m4(nmat, ob->imat); + + if (psys->flag & PSYS_USE_IMAT) { + /* psys->imat is the original emitter's inverse matrix, ob->obmat is the duplicated object's matrix */ + mul_m4_m4m4(duplimat, ob->obmat, psys->imat); + use_duplimat = true; + } + +/* 2.6 setup strand rendering */ + if (part->ren_as == PART_DRAW_PATH && psys->pathcache) { + path_nbr = step_nbr; + + if (path_nbr) { + if (!ELEM(ma->material_type, MA_TYPE_HALO, MA_TYPE_WIRE)) { + sd.orco = get_object_orco(re, psys); + if (!sd.orco) { + sd.orco = MEM_mallocN(3*sizeof(float)*(totpart+totchild), "particle orcos"); + set_object_orco(re, psys, sd.orco); + } + } + } + + if (part->draw & PART_DRAW_REN_ADAPT) { + sd.adapt = 1; + sd.adapt_pix = (float)part->adapt_pix; + sd.adapt_angle = cosf(DEG2RADF((float)part->adapt_angle)); + } + + if (part->draw & PART_DRAW_REN_STRAND) { + strandbuf= RE_addStrandBuffer(obr, (totpart+totchild)*(path_nbr+1)); + strandbuf->ma= ma; + strandbuf->lay= ob->lay; + copy_m4_m4(strandbuf->winmat, re->winmat); + strandbuf->winx= re->winx; + strandbuf->winy= re->winy; + strandbuf->maxdepth= 2; + strandbuf->adaptcos= cosf(DEG2RADF((float)part->adapt_angle)); + strandbuf->overrideuv= sd.override_uv; + strandbuf->minwidth= ma->strand_min; + + if (ma->strand_widthfade == 0.0f) + strandbuf->widthfade= -1.0f; + else if (ma->strand_widthfade >= 1.0f) + strandbuf->widthfade= 2.0f - ma->strand_widthfade; + else + strandbuf->widthfade= 1.0f/MAX2(ma->strand_widthfade, 1e-5f); + + if (part->flag & PART_HAIR_BSPLINE) + strandbuf->flag |= R_STRAND_BSPLINE; + if (ma->mode & MA_STR_B_UNITS) + strandbuf->flag |= R_STRAND_B_UNITS; + + svert= strandbuf->vert; + + if (re->r.mode & R_SPEED) + do_surfacecache = true; + else if ((re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) && (re->wrld.ao_gather_method == WO_AOGATHER_APPROX)) + if (ma->amb != 0.0f) + do_surfacecache = true; + + totface= psmd->dm_final->getNumTessFaces(psmd->dm_final); + index_mf_to_mpoly = psmd->dm_final->getTessFaceDataArray(psmd->dm_final, CD_ORIGINDEX); + index_mp_to_orig = psmd->dm_final->getPolyDataArray(psmd->dm_final, CD_ORIGINDEX); + if (index_mf_to_mpoly == NULL) { + index_mp_to_orig = NULL; + } + for (a=0; a<totface; a++) + strandbuf->totbound = max_ii(strandbuf->totbound, (index_mf_to_mpoly) ? DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, a): a); + + strandbuf->totbound++; + strandbuf->bound= MEM_callocN(sizeof(StrandBound)*strandbuf->totbound, "StrandBound"); + sbound= strandbuf->bound; + sbound->start= sbound->end= 0; + } + } + + if (sd.orco == NULL) { + sd.orco = MEM_mallocN(3 * sizeof(float), "particle orco"); + orco1 = 1; + } + + if (path_nbr == 0) + psys->lattice_deform_data = psys_create_lattice_deform_data(&sim); + +/* 3. start creating renderable things */ + for (a=0, pa=pars; a<totpart+totchild; a++, pa++, seed++) { + random = BLI_rng_get_float(rng); + /* setup per particle individual stuff */ + if (a<totpart) { + if (pa->flag & PARS_UNEXIST) continue; + + pa_time=(cfra-pa->time)/pa->lifetime; + pa_birthtime = pa->time; + pa_dietime = pa->dietime; + + hasize = ma->hasize; + + /* XXX 'tpsys' is alwyas NULL, this code won't run! */ + /* get orco */ + if (tpsys && part->phystype == PART_PHYS_NO) { + tpa = tpsys->particles + pa->num; + psys_particle_on_emitter( + psmd, + tpart->from, tpa->num, pa->num_dmcache, tpa->fuv, + tpa->foffset, co, nor, NULL, NULL, sd.orco, NULL); + } + else { + psys_particle_on_emitter( + psmd, + part->from, pa->num, pa->num_dmcache, + pa->fuv, pa->foffset, co, nor, NULL, NULL, sd.orco, NULL); + } + + /* get uvco & mcol */ + num= pa->num_dmcache; + + if (num == DMCACHE_NOTFOUND) + if (pa->num < psmd->dm_final->getNumTessFaces(psmd->dm_final)) + num= pa->num; + + get_particle_uvco_mcol(part->from, psmd->dm_final, pa->fuv, num, &sd); + + pa_size = pa->size; + + r_tilt = 2.0f*(psys_frand(psys, a) - 0.5f); + r_length = psys_frand(psys, a+1); + + if (path_nbr) { + cache = psys->pathcache[a]; + max_k = (int)cache->segments; + } + + if (totchild && (part->draw&PART_DRAW_PARENT)==0) continue; + } + else { + ChildParticle *cpa= psys->child+a-totpart; + + if (path_nbr) { + cache = psys->childcache[a-totpart]; + + if (cache->segments < 0) + continue; + + max_k = (int)cache->segments; + } + + pa_time = psys_get_child_time(psys, cpa, cfra, &pa_birthtime, &pa_dietime); + pa_size = psys_get_child_size(psys, cpa, cfra, &pa_time); + + r_tilt = 2.0f*(psys_frand(psys, a + 21) - 0.5f); + r_length = psys_frand(psys, a + 22); + + num = cpa->num; + + /* get orco */ + if (part->childtype == PART_CHILD_FACES) { + psys_particle_on_emitter( + psmd, + PART_FROM_FACE, cpa->num, DMCACHE_ISCHILD, + cpa->fuv, cpa->foffset, co, nor, NULL, NULL, sd.orco, NULL); + } + else { + ParticleData *par = psys->particles + cpa->parent; + psys_particle_on_emitter( + psmd, + part->from, par->num, DMCACHE_ISCHILD, par->fuv, + par->foffset, co, nor, NULL, NULL, sd.orco, NULL); + } + + /* get uvco & mcol */ + if (part->childtype==PART_CHILD_FACES) { + get_particle_uvco_mcol(PART_FROM_FACE, psmd->dm_final, cpa->fuv, cpa->num, &sd); + } + else { + ParticleData *parent = psys->particles + cpa->parent; + num = parent->num_dmcache; + + if (num == DMCACHE_NOTFOUND) + if (parent->num < psmd->dm_final->getNumTessFaces(psmd->dm_final)) + num = parent->num; + + get_particle_uvco_mcol(part->from, psmd->dm_final, parent->fuv, num, &sd); + } + + do_simplify = psys_render_simplify_params(psys, cpa, simplify); + + if (strandbuf) { + int orignum = (index_mf_to_mpoly) ? DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, cpa->num) : cpa->num; + + if ((orignum > sbound - strandbuf->bound) && + (orignum < strandbuf->totbound)) + { + sbound = &strandbuf->bound[orignum]; + sbound->start = sbound->end = obr->totstrand; + } + } + } + + /* TEXCO_PARTICLE */ + pa_co[0] = pa_time; + pa_co[1] = 0.f; + pa_co[2] = 0.f; + + /* surface normal shading setup */ + if (ma->mode_l & MA_STR_SURFDIFF) { + mul_m3_v3(nmat, nor); + sd.surfnor= nor; + } + else + sd.surfnor= NULL; + + /* strand render setup */ + if (strandbuf) { + strand= RE_findOrAddStrand(obr, obr->totstrand++); + strand->buffer= strandbuf; + strand->vert= svert; + copy_v3_v3(strand->orco, sd.orco); + + if (do_simplify) { + float *ssimplify= RE_strandren_get_simplify(obr, strand, 1); + ssimplify[0]= simplify[0]; + ssimplify[1]= simplify[1]; + } + + if (sd.surfnor) { + float *snor= RE_strandren_get_surfnor(obr, strand, 1); + copy_v3_v3(snor, sd.surfnor); + } + + if (do_surfacecache && num >= 0) { + int *facenum= RE_strandren_get_face(obr, strand, 1); + *facenum= num; + } + + if (sd.uvco) { + for (i=0; i<sd.totuv; i++) { + if (i != sd.override_uv) { + float *uv= RE_strandren_get_uv(obr, strand, i, NULL, 1); + + uv[0]= sd.uvco[2*i]; + uv[1]= sd.uvco[2*i+1]; + } + } + } + if (sd.mcol) { + for (i=0; i<sd.totcol; i++) { + MCol *mc= RE_strandren_get_mcol(obr, strand, i, NULL, 1); + *mc = sd.mcol[i]; + } + } + + sbound->end++; + } + + /* strandco computation setup */ + if (path_nbr) { + strandlen= 0.0f; + curlen= 0.0f; + for (k=1; k<=path_nbr; k++) + if (k<=max_k) + strandlen += len_v3v3((cache+k-1)->co, (cache+k)->co); + } + + if (path_nbr) { + /* render strands */ + for (k=0; k<=path_nbr; k++) { + float time; + + if (k<=max_k) { + copy_v3_v3(state.co, (cache+k)->co); + copy_v3_v3(state.vel, (cache+k)->vel); + } + else + continue; + + if (k > 0) + curlen += len_v3v3((cache+k-1)->co, (cache+k)->co); + time= curlen/strandlen; + + copy_v3_v3(loc, state.co); + mul_m4_v3(re->viewmat, loc); + + if (strandbuf) { + copy_v3_v3(svert->co, loc); + svert->strandco= -1.0f + 2.0f*time; + svert++; + strand->totvert++; + } + else { + sd.size = hasize; + + if (k==1) { + sd.first = 1; + sd.time = 0.0f; + sub_v3_v3v3(loc0, loc1, loc); + add_v3_v3v3(loc0, loc1, loc0); + + particle_curve(re, obr, psmd->dm_final, ma, &sd, loc1, loc0, seed, pa_co); + } + + sd.first = 0; + sd.time = time; + + if (k) + particle_curve(re, obr, psmd->dm_final, ma, &sd, loc, loc1, seed, pa_co); + + copy_v3_v3(loc1, loc); + } + } + + } + else { + /* render normal particles */ + if (part->trail_count > 1) { + float length = part->path_end * (1.0f - part->randlength * r_length); + int trail_count = part->trail_count * (1.0f - part->randlength * r_length); + float ct = (part->draw & PART_ABS_PATH_TIME) ? cfra : pa_time; + float dt = length / (trail_count ? (float)trail_count : 1.0f); + + /* make sure we have pointcache in memory before getting particle on path */ + psys_make_temp_pointcache(ob, psys); + + for (i=0; i < trail_count; i++, ct -= dt) { + if (part->draw & PART_ABS_PATH_TIME) { + if (ct < pa_birthtime || ct > pa_dietime) + continue; + } + else if (ct < 0.0f || ct > 1.0f) + continue; + + state.time = (part->draw & PART_ABS_PATH_TIME) ? -ct : ct; + psys_get_particle_on_path(&sim, a, &state, 1); + + if (psys->parent) + mul_m4_v3(psys->parent->obmat, state.co); + + if (use_duplimat) + mul_m4_v4(duplimat, state.co); + + if (part->ren_as == PART_DRAW_BB) { + bb.random = random; + bb.offset[0] = part->bb_offset[0]; + bb.offset[1] = part->bb_offset[1]; + bb.size[0] = part->bb_size[0] * pa_size; + if (part->bb_align==PART_BB_VEL) { + float pa_vel = len_v3(state.vel); + float head = part->bb_vel_head*pa_vel; + float tail = part->bb_vel_tail*pa_vel; + bb.size[1] = part->bb_size[1]*pa_size + head + tail; + /* use offset to adjust the particle center. this is relative to size, so need to divide! */ + if (bb.size[1] > 0.0f) + bb.offset[1] += (head-tail) / bb.size[1]; + } + else + bb.size[1] = part->bb_size[1] * pa_size; + bb.tilt = part->bb_tilt * (1.0f - part->bb_rand_tilt * r_tilt); + bb.time = ct; + bb.num = a; + } + + pa_co[0] = (part->draw & PART_ABS_PATH_TIME) ? (ct-pa_birthtime)/(pa_dietime-pa_birthtime) : ct; + pa_co[1] = (float)i/(float)(trail_count-1); + + particle_normal_ren(part->ren_as, part, re, obr, psmd->dm_final, ma, &sd, &bb, &state, seed, hasize, pa_co); + } + } + else { + state.time=cfra; + if (psys_get_particle_state(&sim, a, &state, 0)==0) + continue; + + if (psys->parent) + mul_m4_v3(psys->parent->obmat, state.co); + + if (use_duplimat) + mul_m4_v3(duplimat, state.co); + + if (part->ren_as == PART_DRAW_BB) { + bb.random = random; + bb.offset[0] = part->bb_offset[0]; + bb.offset[1] = part->bb_offset[1]; + bb.size[0] = part->bb_size[0] * pa_size; + if (part->bb_align==PART_BB_VEL) { + float pa_vel = len_v3(state.vel); + float head = part->bb_vel_head*pa_vel; + float tail = part->bb_vel_tail*pa_vel; + bb.size[1] = part->bb_size[1]*pa_size + head + tail; + /* use offset to adjust the particle center. this is relative to size, so need to divide! */ + if (bb.size[1] > 0.0f) + bb.offset[1] += (head-tail) / bb.size[1]; + } + else + bb.size[1] = part->bb_size[1] * pa_size; + bb.tilt = part->bb_tilt * (1.0f - part->bb_rand_tilt * r_tilt); + bb.time = pa_time; + bb.num = a; + bb.lifetime = pa_dietime-pa_birthtime; + } + + particle_normal_ren(part->ren_as, part, re, obr, psmd->dm_final, ma, &sd, &bb, &state, seed, hasize, pa_co); + } + } + + if (orco1==0) + sd.orco+=3; + + if (re->test_break(re->tbh)) + break; + } + + if (do_surfacecache) + strandbuf->surface= cache_strand_surface(re, obr, psmd->dm_final, mat, timeoffset); + +/* 4. clean up */ +#if 0 /* XXX old animation system */ + if (ma) do_mat_ipo(re->scene, ma); +#endif /* XXX old animation system */ + + if (orco1) + MEM_freeN(sd.orco); + + if (sd.uvco) + MEM_freeN(sd.uvco); + + if (sd.mcol) + MEM_freeN(sd.mcol); + + if (states) + MEM_freeN(states); + + BLI_rng_free(rng); + + psys->flag &= ~PSYS_DRAWING; + + if (psys->lattice_deform_data) { + end_latt_deform(psys->lattice_deform_data); + psys->lattice_deform_data = NULL; + } + + if (path_nbr && (ma->mode_l & MA_TANGENT_STR)==0) + calc_vertexnormals(re, obr, 1, 0, 0); + + return 1; +} + +/* ------------------------------------------------------------------------- */ +/* Halo's */ +/* ------------------------------------------------------------------------- */ + +static void make_render_halos(Render *re, ObjectRen *obr, Mesh *UNUSED(me), int totvert, MVert *mvert, Material *ma, float *orco) +{ + Object *ob= obr->ob; + HaloRen *har; + float xn, yn, zn, nor[3], view[3]; + float vec[3], hasize, mat[4][4], imat[3][3]; + int a, ok, seed= ma->seed1; + + mul_m4_m4m4(mat, re->viewmat, ob->obmat); + copy_m3_m4(imat, ob->imat); + + re->flag |= R_HALO; + + for (a=0; a<totvert; a++, mvert++) { + ok= 1; + + if (ok) { + hasize= ma->hasize; + + copy_v3_v3(vec, mvert->co); + mul_m4_v3(mat, vec); + + if (ma->mode & MA_HALOPUNO) { + xn= mvert->no[0]; + yn= mvert->no[1]; + zn= mvert->no[2]; + + /* transpose ! */ + nor[0]= imat[0][0]*xn+imat[0][1]*yn+imat[0][2]*zn; + nor[1]= imat[1][0]*xn+imat[1][1]*yn+imat[1][2]*zn; + nor[2]= imat[2][0]*xn+imat[2][1]*yn+imat[2][2]*zn; + normalize_v3(nor); + + copy_v3_v3(view, vec); + normalize_v3(view); + + zn = dot_v3v3(nor, view); + if (zn>=0.0f) hasize= 0.0f; + else hasize*= zn*zn*zn*zn; + } + + if (orco) har= RE_inithalo(re, obr, ma, vec, NULL, orco, hasize, 0.0, seed); + else har= RE_inithalo(re, obr, ma, vec, NULL, mvert->co, hasize, 0.0, seed); + if (har) har->lay= ob->lay; + } + if (orco) orco+= 3; + seed++; + } +} + +static int verghalo(const void *a1, const void *a2) +{ + const HaloRen *har1= *(const HaloRen**)a1; + const HaloRen *har2= *(const HaloRen**)a2; + + if (har1->zs < har2->zs) return 1; + else if (har1->zs > har2->zs) return -1; + return 0; +} + +static void sort_halos(Render *re, int totsort) +{ + ObjectRen *obr; + HaloRen *har= NULL, **haso; + int a; + + if (re->tothalo==0) return; + + re->sortedhalos= MEM_callocN(sizeof(HaloRen*)*re->tothalo, "sorthalos"); + haso= re->sortedhalos; + + for (obr=re->objecttable.first; obr; obr=obr->next) { + for (a=0; a<obr->tothalo; a++) { + if ((a & 255)==0) har= obr->bloha[a>>8]; + else har++; + + *(haso++)= har; + } + } + + qsort(re->sortedhalos, totsort, sizeof(HaloRen*), verghalo); +} + +/* ------------------------------------------------------------------------- */ +/* Displacement Mapping */ +/* ------------------------------------------------------------------------- */ + +static short test_for_displace(Render *re, Object *ob) +{ + /* return 1 when this object uses displacement textures. */ + Material *ma; + int i; + + for (i=1; i<=ob->totcol; i++) { + ma=give_render_material(re, ob, i); + /* ma->mapto is ORed total of all mapto channels */ + if (ma && (ma->mapto & MAP_DISPLACE)) return 1; + } + return 0; +} + +static void displace_render_vert(Render *re, ObjectRen *obr, ShadeInput *shi, VertRen *vr, int vindex, float *scale) +{ + MTFace *tface; + short texco= shi->mat->texco; + float sample=0, displace[3]; + char *name; + int i; + + /* shi->co is current render coord, just make sure at least some vector is here */ + copy_v3_v3(shi->co, vr->co); + /* vertex normal is used for textures type 'col' and 'var' */ + copy_v3_v3(shi->vn, vr->n); + + if (texco & TEXCO_UV) { + shi->totuv= 0; + shi->actuv= obr->actmtface; + + for (i=0; (tface=RE_vlakren_get_tface(obr, shi->vlr, i, &name, 0)); i++) { + ShadeInputUV *suv= &shi->uv[i]; + + /* shi.uv needs scale correction from tface uv */ + suv->uv[0]= 2*tface->uv[vindex][0]-1.0f; + suv->uv[1]= 2*tface->uv[vindex][1]-1.0f; + suv->uv[2]= 0.0f; + suv->name= name; + shi->totuv++; + } + } + + /* set all rendercoords, 'texco' is an ORed value for all textures needed */ + if ((texco & TEXCO_ORCO) && (vr->orco)) { + copy_v3_v3(shi->lo, vr->orco); + } + if (texco & TEXCO_GLOB) { + copy_v3_v3(shi->gl, shi->co); + mul_m4_v3(re->viewinv, shi->gl); + } + if (texco & TEXCO_NORM) { + copy_v3_v3(shi->orn, shi->vn); + } + if (texco & TEXCO_REFL) { + /* not (yet?) */ + } + if (texco & TEXCO_STRESS) { + const float *s= RE_vertren_get_stress(obr, vr, 0); + + if (s) { + shi->stress= *s; + if (shi->stress<1.0f) shi->stress-= 1.0f; + else shi->stress= (shi->stress-1.0f)/shi->stress; + } + else + shi->stress= 0.0f; + } + + shi->displace[0]= shi->displace[1]= shi->displace[2]= 0.0; + + do_material_tex(shi, re); + + //printf("no=%f, %f, %f\nbefore co=%f, %f, %f\n", vr->n[0], vr->n[1], vr->n[2], + //vr->co[0], vr->co[1], vr->co[2]); + + displace[0]= shi->displace[0] * scale[0]; + displace[1]= shi->displace[1] * scale[1]; + displace[2]= shi->displace[2] * scale[2]; + + /* 0.5 could become button once? */ + vr->co[0] += displace[0]; + vr->co[1] += displace[1]; + vr->co[2] += displace[2]; + + //printf("after co=%f, %f, %f\n", vr->co[0], vr->co[1], vr->co[2]); + + /* we just don't do this vertex again, bad luck for other face using same vertex with + * different material... */ + vr->flag |= 1; + + /* Pass sample back so displace_face can decide which way to split the quad */ + sample = shi->displace[0]*shi->displace[0]; + sample += shi->displace[1]*shi->displace[1]; + sample += shi->displace[2]*shi->displace[2]; + + vr->accum=sample; + /* Should be sqrt(sample), but I'm only looking for "bigger". Save the cycles. */ + return; +} + +static void displace_render_face(Render *re, ObjectRen *obr, VlakRen *vlr, float *scale) +{ + ShadeInput shi; + + /* Warning, This is not that nice, and possibly a bit slow, + * however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */ + memset(&shi, 0, sizeof(ShadeInput)); + /* end warning! - Campbell */ + + /* set up shadeinput struct for multitex() */ + + /* memset above means we don't need this */ + /*shi.osatex= 0;*/ /* signal not to use dx[] and dy[] texture AA vectors */ + + shi.obr= obr; + shi.vlr= vlr; /* current render face */ + shi.mat= vlr->mat; /* current input material */ + shi.thread= 0; + + /* TODO, assign these, displacement with new bumpmap is skipped without - campbell */ +#if 0 + /* order is not known ? */ + shi.v1= vlr->v1; + shi.v2= vlr->v2; + shi.v3= vlr->v3; +#endif + + /* Displace the verts, flag is set when done */ + if (!vlr->v1->flag) + displace_render_vert(re, obr, &shi, vlr->v1, 0, scale); + + if (!vlr->v2->flag) + displace_render_vert(re, obr, &shi, vlr->v2, 1, scale); + + if (!vlr->v3->flag) + displace_render_vert(re, obr, &shi, vlr->v3, 2, scale); + + if (vlr->v4) { + if (!vlr->v4->flag) + displace_render_vert(re, obr, &shi, vlr->v4, 3, scale); + + /* closest in displace value. This will help smooth edges. */ + if (fabsf(vlr->v1->accum - vlr->v3->accum) > fabsf(vlr->v2->accum - vlr->v4->accum)) vlr->flag |= R_DIVIDE_24; + else vlr->flag &= ~R_DIVIDE_24; + } + + /* Recalculate the face normal - if flipped before, flip now */ + if (vlr->v4) { + normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co); + } + else { + normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co); + } +} + +static void displace(Render *re, ObjectRen *obr) +{ + VertRen *vr; + VlakRen *vlr; +// float min[3]={1e30, 1e30, 1e30}, max[3]={-1e30, -1e30, -1e30}; + float scale[3]={1.0f, 1.0f, 1.0f}, temp[3];//, xn + int i; //, texflag=0; + Object *obt; + + /* Object Size with parenting */ + obt=obr->ob; + while (obt) { + mul_v3_v3v3(temp, obt->size, obt->dscale); + scale[0]*=temp[0]; scale[1]*=temp[1]; scale[2]*=temp[2]; + obt=obt->parent; + } + + /* Clear all flags */ + for (i=0; i<obr->totvert; i++) { + vr= RE_findOrAddVert(obr, i); + vr->flag= 0; + } + + for (i=0; i<obr->totvlak; i++) { + vlr=RE_findOrAddVlak(obr, i); + displace_render_face(re, obr, vlr, scale); + } + + /* Recalc vertex normals */ + calc_vertexnormals(re, obr, 1, 0, 0); +} + +/* ------------------------------------------------------------------------- */ +/* Metaball */ +/* ------------------------------------------------------------------------- */ + +static void init_render_mball(Render *re, ObjectRen *obr) +{ + Object *ob= obr->ob; + DispList *dl; + VertRen *ver; + VlakRen *vlr, *vlr1; + Material *ma; + float *data, *nors, *orco=NULL, mat[4][4], imat[3][3], xn, yn, zn; + int a, need_orco, vlakindex, *index, negative_scale; + ListBase dispbase= {NULL, NULL}; + + if (ob!=BKE_mball_basis_find(re->eval_ctx, re->scene, ob)) + return; + + mul_m4_m4m4(mat, re->viewmat, ob->obmat); + invert_m4_m4(ob->imat, mat); + copy_m3_m4(imat, ob->imat); + negative_scale = is_negative_m4(mat); + + ma= give_render_material(re, ob, 1); + + need_orco= 0; + if (ma->texco & TEXCO_ORCO) { + need_orco= 1; + } + + BKE_displist_make_mball_forRender(re->eval_ctx, re->scene, ob, &dispbase); + dl= dispbase.first; + if (dl == NULL) return; + + data= dl->verts; + nors= dl->nors; + if (need_orco) { + orco= get_object_orco(re, ob); + + if (!orco) { + /* orco hasn't been found in cache - create new one and add to cache */ + orco= BKE_mball_make_orco(ob, &dispbase); + set_object_orco(re, ob, orco); + } + } + + for (a=0; a<dl->nr; a++, data+=3, nors+=3) { + + ver= RE_findOrAddVert(obr, obr->totvert++); + copy_v3_v3(ver->co, data); + mul_m4_v3(mat, ver->co); + + /* render normals are inverted */ + xn= -nors[0]; + yn= -nors[1]; + zn= -nors[2]; + + /* transpose ! */ + ver->n[0]= imat[0][0]*xn+imat[0][1]*yn+imat[0][2]*zn; + ver->n[1]= imat[1][0]*xn+imat[1][1]*yn+imat[1][2]*zn; + ver->n[2]= imat[2][0]*xn+imat[2][1]*yn+imat[2][2]*zn; + normalize_v3(ver->n); + //if (ob->transflag & OB_NEG_SCALE) negate_v3(ver->n); + + if (need_orco) { + ver->orco= orco; + orco+=3; + } + } + + index= dl->index; + for (a=0; a<dl->parts; a++, index+=4) { + + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->v1= RE_findOrAddVert(obr, index[0]); + vlr->v2= RE_findOrAddVert(obr, index[1]); + vlr->v3= RE_findOrAddVert(obr, index[2]); + vlr->v4 = NULL; + + if (negative_scale) + normal_tri_v3(vlr->n, vlr->v1->co, vlr->v2->co, vlr->v3->co); + else + normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co); + + vlr->mat= ma; + vlr->flag= ME_SMOOTH; + vlr->ec= 0; + + /* mball -too bad- always has triangles, because quads can be non-planar */ + if (index[3] && index[3]!=index[2]) { + vlr1= RE_findOrAddVlak(obr, obr->totvlak++); + vlakindex= vlr1->index; + *vlr1= *vlr; + vlr1->index= vlakindex; + vlr1->v2= vlr1->v3; + vlr1->v3= RE_findOrAddVert(obr, index[3]); + if (negative_scale) + normal_tri_v3(vlr1->n, vlr1->v1->co, vlr1->v2->co, vlr1->v3->co); + else + normal_tri_v3(vlr1->n, vlr1->v3->co, vlr1->v2->co, vlr1->v1->co); + } + } + + /* enforce display lists remade */ + BKE_displist_free(&dispbase); +} + +/* ------------------------------------------------------------------------- */ +/* Surfaces and Curves */ +/* ------------------------------------------------------------------------- */ + +/* returns amount of vertices added for orco */ +static int dl_surf_to_renderdata(ObjectRen *obr, DispList *dl, Material **matar, float *orco, float mat[4][4]) +{ + VertRen *v1, *v2, *v3, *v4, *ver; + VlakRen *vlr, *vlr1, *vlr2, *vlr3; + float *data, n1[3]; + int u, v, orcoret= 0; + int p1, p2, p3, p4, a; + int sizeu, nsizeu, sizev, nsizev; + int startvert, startvlak; + + startvert= obr->totvert; + nsizeu = sizeu = dl->parts; nsizev = sizev = dl->nr; + + data= dl->verts; + for (u = 0; u < sizeu; u++) { + v1 = RE_findOrAddVert(obr, obr->totvert++); /* save this for possible V wrapping */ + copy_v3_v3(v1->co, data); data += 3; + if (orco) { + v1->orco= orco; orco+= 3; orcoret++; + } + mul_m4_v3(mat, v1->co); + + for (v = 1; v < sizev; v++) { + ver= RE_findOrAddVert(obr, obr->totvert++); + copy_v3_v3(ver->co, data); data += 3; + if (orco) { + ver->orco= orco; orco+= 3; orcoret++; + } + mul_m4_v3(mat, ver->co); + } + /* if V-cyclic, add extra vertices at end of the row */ + if (dl->flag & DL_CYCL_U) { + ver= RE_findOrAddVert(obr, obr->totvert++); + copy_v3_v3(ver->co, v1->co); + if (orco) { + ver->orco= orco; orco+=3; orcoret++; //orcobase + 3*(u*sizev + 0); + } + } + } + + /* Done before next loop to get corner vert */ + if (dl->flag & DL_CYCL_U) nsizev++; + if (dl->flag & DL_CYCL_V) nsizeu++; + + /* if U cyclic, add extra row at end of column */ + if (dl->flag & DL_CYCL_V) { + for (v = 0; v < nsizev; v++) { + v1= RE_findOrAddVert(obr, startvert + v); + ver= RE_findOrAddVert(obr, obr->totvert++); + copy_v3_v3(ver->co, v1->co); + if (orco) { + ver->orco= orco; orco+=3; orcoret++; //ver->orco= orcobase + 3*(0*sizev + v); + } + } + } + + sizeu = nsizeu; + sizev = nsizev; + + startvlak= obr->totvlak; + + for (u = 0; u < sizeu - 1; u++) { + p1 = startvert + u * sizev; /* walk through face list */ + p2 = p1 + 1; + p3 = p2 + sizev; + p4 = p3 - 1; + + for (v = 0; v < sizev - 1; v++) { + v1= RE_findOrAddVert(obr, p1); + v2= RE_findOrAddVert(obr, p2); + v3= RE_findOrAddVert(obr, p3); + v4= RE_findOrAddVert(obr, p4); + + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->v1= v1; vlr->v2= v2; vlr->v3= v3; vlr->v4= v4; + + normal_quad_v3(n1, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co); + + copy_v3_v3(vlr->n, n1); + + vlr->mat= matar[ dl->col]; + vlr->ec= ME_V1V2+ME_V2V3; + vlr->flag= dl->rt; + + add_v3_v3(v1->n, n1); + add_v3_v3(v2->n, n1); + add_v3_v3(v3->n, n1); + add_v3_v3(v4->n, n1); + + p1++; p2++; p3++; p4++; + } + } + /* fix normals for U resp. V cyclic faces */ + sizeu--; sizev--; /* dec size for face array */ + if (dl->flag & DL_CYCL_V) { + + for (v = 0; v < sizev; v++) { + /* optimize! :*/ + vlr= RE_findOrAddVlak(obr, UVTOINDEX(sizeu - 1, v)); + vlr1= RE_findOrAddVlak(obr, UVTOINDEX(0, v)); + add_v3_v3(vlr1->v1->n, vlr->n); + add_v3_v3(vlr1->v2->n, vlr->n); + add_v3_v3(vlr->v3->n, vlr1->n); + add_v3_v3(vlr->v4->n, vlr1->n); + } + } + if (dl->flag & DL_CYCL_U) { + + for (u = 0; u < sizeu; u++) { + /* optimize! :*/ + vlr= RE_findOrAddVlak(obr, UVTOINDEX(u, 0)); + vlr1= RE_findOrAddVlak(obr, UVTOINDEX(u, sizev-1)); + add_v3_v3(vlr1->v2->n, vlr->n); + add_v3_v3(vlr1->v3->n, vlr->n); + add_v3_v3(vlr->v1->n, vlr1->n); + add_v3_v3(vlr->v4->n, vlr1->n); + } + } + + /* last vertex is an extra case: + * + * ^ ()----()----()----() + * | | | || | + * u | |(0,n)||(0,0)| + * | | || | + * ()====()====[]====() + * | | || | + * | |(m,n)||(m,0)| + * | | || | + * ()----()----()----() + * v -> + * + * vertex [] is no longer shared, therefore distribute + * normals of the surrounding faces to all of the duplicates of [] + */ + + if ((dl->flag & DL_CYCL_V) && (dl->flag & DL_CYCL_U)) { + vlr= RE_findOrAddVlak(obr, UVTOINDEX(sizeu - 1, sizev - 1)); /* (m, n) */ + vlr1= RE_findOrAddVlak(obr, UVTOINDEX(0, 0)); /* (0, 0) */ + add_v3_v3v3(n1, vlr->n, vlr1->n); + vlr2= RE_findOrAddVlak(obr, UVTOINDEX(0, sizev-1)); /* (0, n) */ + add_v3_v3(n1, vlr2->n); + vlr3= RE_findOrAddVlak(obr, UVTOINDEX(sizeu-1, 0)); /* (m, 0) */ + add_v3_v3(n1, vlr3->n); + copy_v3_v3(vlr->v3->n, n1); + copy_v3_v3(vlr1->v1->n, n1); + copy_v3_v3(vlr2->v2->n, n1); + copy_v3_v3(vlr3->v4->n, n1); + } + for (a = startvert; a < obr->totvert; a++) { + ver= RE_findOrAddVert(obr, a); + normalize_v3(ver->n); + } + + + return orcoret; +} + +static void init_render_dm(DerivedMesh *dm, Render *re, ObjectRen *obr, + int timeoffset, float *orco, float mat[4][4]) +{ + Object *ob= obr->ob; + int a, end, totvert, vertofs; + short mat_iter; + VertRen *ver; + VlakRen *vlr; + MVert *mvert = NULL; + MFace *mface; + Material *ma; +#ifdef WITH_FREESTYLE + const int *index_mf_to_mpoly = NULL; + const int *index_mp_to_orig = NULL; + FreestyleFace *ffa = NULL; +#endif + /* Curve *cu= ELEM(ob->type, OB_FONT, OB_CURVE) ? ob->data : NULL; */ + + mvert= dm->getVertArray(dm); + totvert= dm->getNumVerts(dm); + + for (a=0; a<totvert; a++, mvert++) { + ver= RE_findOrAddVert(obr, obr->totvert++); + copy_v3_v3(ver->co, mvert->co); + mul_m4_v3(mat, ver->co); + + if (orco) { + ver->orco= orco; + orco+=3; + } + } + + if (!timeoffset) { + /* store customdata names, because DerivedMesh is freed */ + RE_set_customdata_names(obr, &dm->faceData); + + /* still to do for keys: the correct local texture coordinate */ + + /* faces in order of color blocks */ + vertofs= obr->totvert - totvert; + for (mat_iter= 0; (mat_iter < ob->totcol || (mat_iter==0 && ob->totcol==0)); mat_iter++) { + + ma= give_render_material(re, ob, mat_iter+1); + end= dm->getNumTessFaces(dm); + mface= dm->getTessFaceArray(dm); + +#ifdef WITH_FREESTYLE + if (ob->type == OB_MESH) { + Mesh *me= ob->data; + index_mf_to_mpoly= dm->getTessFaceDataArray(dm, CD_ORIGINDEX); + index_mp_to_orig= dm->getPolyDataArray(dm, CD_ORIGINDEX); + ffa= CustomData_get_layer(&me->pdata, CD_FREESTYLE_FACE); + } +#endif + + for (a=0; a<end; a++, mface++) { + int v1, v2, v3, v4, flag; + + if (mface->mat_nr == mat_iter) { + float len; + + v1= mface->v1; + v2= mface->v2; + v3= mface->v3; + v4= mface->v4; + flag= mface->flag & ME_SMOOTH; + + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->v1= RE_findOrAddVert(obr, vertofs+v1); + vlr->v2= RE_findOrAddVert(obr, vertofs+v2); + vlr->v3= RE_findOrAddVert(obr, vertofs+v3); + if (v4) vlr->v4= RE_findOrAddVert(obr, vertofs+v4); + else vlr->v4 = NULL; + + /* render normals are inverted in render */ + if (vlr->v4) + len= normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co); + else + len= normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co); + + vlr->mat= ma; + vlr->flag= flag; + vlr->ec= 0; /* mesh edges rendered separately */ +#ifdef WITH_FREESTYLE + if (ffa) { + int index = (index_mf_to_mpoly) ? DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, a) : a; + vlr->freestyle_face_mark= (ffa[index].flag & FREESTYLE_FACE_MARK) ? 1 : 0; + } + else { + vlr->freestyle_face_mark= 0; + } +#endif + + if (len==0) obr->totvlak--; + else { + CustomDataLayer *layer; + MTFace *mtface, *mtf; + MCol *mcol, *mc; + int index, mtfn= 0, mcn= 0; + char *name; + + for (index=0; index<dm->faceData.totlayer; index++) { + layer= &dm->faceData.layers[index]; + name= layer->name; + + if (layer->type == CD_MTFACE && mtfn < MAX_MTFACE) { + mtf= RE_vlakren_get_tface(obr, vlr, mtfn++, &name, 1); + mtface= (MTFace*)layer->data; + *mtf= mtface[a]; + } + else if (layer->type == CD_MCOL && mcn < MAX_MCOL) { + mc= RE_vlakren_get_mcol(obr, vlr, mcn++, &name, 1); + mcol= (MCol*)layer->data; + memcpy(mc, &mcol[a*4], sizeof(MCol)*4); + } + } + } + } + } + } + + /* Normals */ + calc_vertexnormals(re, obr, 1, 0, 0); + } + +} + +static void init_render_surf(Render *re, ObjectRen *obr, int timeoffset) +{ + Object *ob= obr->ob; + Nurb *nu = NULL; + Curve *cu; + ListBase displist= {NULL, NULL}; + DispList *dl; + Material **matar; + float *orco=NULL, mat[4][4]; + int a, totmat; + bool need_orco = false; + DerivedMesh *dm= NULL; + + cu= ob->data; + nu= cu->nurb.first; + if (nu == NULL) return; + + mul_m4_m4m4(mat, re->viewmat, ob->obmat); + invert_m4_m4(ob->imat, mat); + + /* material array */ + totmat= ob->totcol+1; + matar= MEM_callocN(sizeof(Material*)*totmat, "init_render_surf matar"); + + for (a=0; a<totmat; a++) { + matar[a]= give_render_material(re, ob, a+1); + + if (matar[a] && matar[a]->texco & TEXCO_ORCO) + need_orco= 1; + } + + if (ob->parent && (ob->parent->type==OB_LATTICE)) need_orco= 1; + + BKE_displist_make_surf(re->scene, ob, &displist, &dm, 1, 0, 1); + + if (dm) { + if (need_orco) { + orco = get_object_orco(re, ob); + if (!orco) { + orco= BKE_displist_make_orco(re->scene, ob, dm, true, true); + if (orco) { + set_object_orco(re, ob, orco); + } + } + } + + init_render_dm(dm, re, obr, timeoffset, orco, mat); + dm->release(dm); + } + else { + if (need_orco) { + orco = get_object_orco(re, ob); + if (!orco) { + orco = BKE_curve_surf_make_orco(ob); + set_object_orco(re, ob, orco); + } + } + + /* walk along displaylist and create rendervertices/-faces */ + for (dl=displist.first; dl; dl=dl->next) { + /* watch out: u ^= y, v ^= x !! */ + if (dl->type==DL_SURF) + orco+= 3*dl_surf_to_renderdata(obr, dl, matar, orco, mat); + } + } + + BKE_displist_free(&displist); + + MEM_freeN(matar); +} + +static void init_render_curve(Render *re, ObjectRen *obr, int timeoffset) +{ + Object *ob= obr->ob; + Curve *cu; + VertRen *ver; + VlakRen *vlr; + DispList *dl; + DerivedMesh *dm = NULL; + ListBase disp={NULL, NULL}; + Material **matar; + float *data, *fp, *orco=NULL; + float n[3], mat[4][4], nmat[4][4]; + int nr, startvert, a, b, negative_scale; + bool need_orco = false; + int totmat; + + cu= ob->data; + if (ob->type==OB_FONT && cu->str==NULL) return; + else if (ob->type==OB_CURVE && cu->nurb.first==NULL) return; + + BKE_displist_make_curveTypes_forRender(re->scene, ob, &disp, &dm, false, true); + dl= disp.first; + if (dl==NULL) return; + + mul_m4_m4m4(mat, re->viewmat, ob->obmat); + invert_m4_m4(ob->imat, mat); + negative_scale = is_negative_m4(mat); + + /* local object -> world space transform for normals */ + transpose_m4_m4(nmat, mat); + invert_m4(nmat); + + /* material array */ + totmat= ob->totcol+1; + matar= MEM_callocN(sizeof(Material*)*totmat, "init_render_surf matar"); + + for (a=0; a<totmat; a++) { + matar[a]= give_render_material(re, ob, a+1); + + if (matar[a] && matar[a]->texco & TEXCO_ORCO) + need_orco= 1; + } + + if (dm) { + if (need_orco) { + orco = get_object_orco(re, ob); + if (!orco) { + orco = BKE_displist_make_orco(re->scene, ob, dm, true, true); + if (orco) { + set_object_orco(re, ob, orco); + } + } + } + + init_render_dm(dm, re, obr, timeoffset, orco, mat); + dm->release(dm); + } + else { + if (need_orco) { + orco = get_object_orco(re, ob); + if (!orco) { + orco = BKE_curve_make_orco(re->scene, ob, NULL); + set_object_orco(re, ob, orco); + } + } + + while (dl) { + if (dl->col > ob->totcol) { + /* pass */ + } + else if (dl->type==DL_INDEX3) { + const int *index; + + startvert= obr->totvert; + data= dl->verts; + + for (a=0; a<dl->nr; a++, data+=3) { + ver= RE_findOrAddVert(obr, obr->totvert++); + copy_v3_v3(ver->co, data); + + mul_m4_v3(mat, ver->co); + + if (orco) { + ver->orco = orco; + orco += 3; + } + } + + if (timeoffset==0) { + float tmp[3]; + const int startvlak= obr->totvlak; + + zero_v3(n); + index= dl->index; + for (a=0; a<dl->parts; a++, index+=3) { + int v1 = index[0], v2 = index[2], v3 = index[1]; + float *co1 = &dl->verts[v1 * 3], + *co2 = &dl->verts[v2 * 3], + *co3 = &dl->verts[v3 * 3]; + + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->v1= RE_findOrAddVert(obr, startvert + v1); + vlr->v2= RE_findOrAddVert(obr, startvert + v2); + vlr->v3= RE_findOrAddVert(obr, startvert + v3); + vlr->v4= NULL; + + /* to prevent float accuracy issues, we calculate normal in local object space (not world) */ + if (normal_tri_v3(tmp, co1, co2, co3) > FLT_EPSILON) { + if (negative_scale == false) { + add_v3_v3(n, tmp); + } + else { + sub_v3_v3(n, tmp); + } + } + + vlr->mat= matar[ dl->col ]; + vlr->flag= 0; + vlr->ec= 0; + } + + /* transform normal to world space */ + mul_m4_v3(nmat, n); + normalize_v3(n); + + /* vertex normals */ + for (a= startvlak; a<obr->totvlak; a++) { + vlr= RE_findOrAddVlak(obr, a); + + copy_v3_v3(vlr->n, n); + add_v3_v3(vlr->v1->n, vlr->n); + add_v3_v3(vlr->v3->n, vlr->n); + add_v3_v3(vlr->v2->n, vlr->n); + } + for (a=startvert; a<obr->totvert; a++) { + ver= RE_findOrAddVert(obr, a); + normalize_v3(ver->n); + } + } + } + else if (dl->type==DL_SURF) { + + /* cyclic U means an extruded full circular curve, we skip bevel splitting then */ + if (dl->flag & DL_CYCL_U) { + orco+= 3*dl_surf_to_renderdata(obr, dl, matar, orco, mat); + } + else { + int p1, p2, p3, p4; + + fp= dl->verts; + startvert= obr->totvert; + nr= dl->nr*dl->parts; + + while (nr--) { + ver= RE_findOrAddVert(obr, obr->totvert++); + + copy_v3_v3(ver->co, fp); + mul_m4_v3(mat, ver->co); + fp+= 3; + + if (orco) { + ver->orco = orco; + orco += 3; + } + } + + if (dl->flag & DL_CYCL_V && orco) { + fp = dl->verts; + nr = dl->nr; + while (nr--) { + ver = RE_findOrAddVert(obr, obr->totvert++); + copy_v3_v3(ver->co, fp); + mul_m4_v3(mat, ver->co); + ver->orco = orco; + fp += 3; + orco += 3; + } + } + + if (dl->bevel_split || timeoffset == 0) { + const int startvlak= obr->totvlak; + + for (a=0; a<dl->parts; a++) { + + if (BKE_displist_surfindex_get(dl, a, &b, &p1, &p2, &p3, &p4)==0) + break; + + p1+= startvert; + p2+= startvert; + p3+= startvert; + p4+= startvert; + + if (dl->flag & DL_CYCL_V && orco && a == dl->parts - 1) { + p3 = p1 + dl->nr; + p4 = p2 + dl->nr; + } + + for (; b<dl->nr; b++) { + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + /* important 1 offset in order is kept [#24913] */ + vlr->v1= RE_findOrAddVert(obr, p2); + vlr->v2= RE_findOrAddVert(obr, p1); + vlr->v3= RE_findOrAddVert(obr, p3); + vlr->v4= RE_findOrAddVert(obr, p4); + vlr->ec= ME_V2V3+ME_V3V4; + if (a==0) vlr->ec+= ME_V1V2; + + vlr->flag= dl->rt; + + normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co); + vlr->mat= matar[ dl->col ]; + + p4= p3; + p3++; + p2= p1; + p1++; + } + } + + if (dl->bevel_split) { + for (a = 0; a < dl->parts - 1 + !!(dl->flag & DL_CYCL_V); a++) { + if (BLI_BITMAP_TEST(dl->bevel_split, a)) { + split_v_renderfaces( + obr, startvlak, startvert, dl->parts, dl->nr, a, + /* intentionally swap (v, u) --> (u, v) */ + dl->flag & DL_CYCL_V, dl->flag & DL_CYCL_U); + } + } + } + + /* vertex normals */ + for (a= startvlak; a<obr->totvlak; a++) { + vlr= RE_findOrAddVlak(obr, a); + + add_v3_v3(vlr->v1->n, vlr->n); + add_v3_v3(vlr->v3->n, vlr->n); + add_v3_v3(vlr->v2->n, vlr->n); + add_v3_v3(vlr->v4->n, vlr->n); + } + for (a=startvert; a<obr->totvert; a++) { + ver= RE_findOrAddVert(obr, a); + normalize_v3(ver->n); + } + } + } + } + + dl= dl->next; + } + } + + BKE_displist_free(&disp); + + MEM_freeN(matar); +} + +/* ------------------------------------------------------------------------- */ +/* Mesh */ +/* ------------------------------------------------------------------------- */ + +struct edgesort { + unsigned int v1, v2; + int f; + unsigned int i1, i2; +}; + +/* edges have to be added with lowest index first for sorting */ +static void to_edgesort(struct edgesort *ed, + unsigned int i1, unsigned int i2, + unsigned int v1, unsigned int v2, int f) +{ + if (v1 > v2) { + SWAP(unsigned int, v1, v2); + SWAP(unsigned int, i1, i2); + } + + ed->v1= v1; + ed->v2= v2; + ed->i1= i1; + ed->i2= i2; + ed->f = f; +} + +static int vergedgesort(const void *v1, const void *v2) +{ + const struct edgesort *x1=v1, *x2=v2; + + if ( x1->v1 > x2->v1) return 1; + else if ( x1->v1 < x2->v1) return -1; + else if ( x1->v2 > x2->v2) return 1; + else if ( x1->v2 < x2->v2) return -1; + + return 0; +} + +static struct edgesort *make_mesh_edge_lookup(DerivedMesh *dm, int *totedgesort) +{ + MFace *mf, *mface; + MTFace *tface=NULL; + struct edgesort *edsort, *ed; + unsigned int *mcol=NULL; + int a, totedge=0, totface; + + mface= dm->getTessFaceArray(dm); + totface= dm->getNumTessFaces(dm); + tface= dm->getTessFaceDataArray(dm, CD_MTFACE); + mcol= dm->getTessFaceDataArray(dm, CD_MCOL); + + if (mcol==NULL && tface==NULL) return NULL; + + /* make sorted table with edges and face indices in it */ + for (a= totface, mf= mface; a>0; a--, mf++) { + totedge += mf->v4 ? 4 : 3; + } + + if (totedge==0) + return NULL; + + ed= edsort= MEM_callocN(totedge*sizeof(struct edgesort), "edgesort"); + + for (a=0, mf=mface; a<totface; a++, mf++) { + to_edgesort(ed++, 0, 1, mf->v1, mf->v2, a); + to_edgesort(ed++, 1, 2, mf->v2, mf->v3, a); + if (mf->v4) { + to_edgesort(ed++, 2, 3, mf->v3, mf->v4, a); + to_edgesort(ed++, 3, 0, mf->v4, mf->v1, a); + } + else { + to_edgesort(ed++, 2, 3, mf->v3, mf->v1, a); + } + } + + qsort(edsort, totedge, sizeof(struct edgesort), vergedgesort); + + *totedgesort= totedge; + + return edsort; +} + +static void use_mesh_edge_lookup(ObjectRen *obr, DerivedMesh *dm, MEdge *medge, VlakRen *vlr, struct edgesort *edgetable, int totedge) +{ + struct edgesort ed, *edp; + CustomDataLayer *layer; + MTFace *mtface, *mtf; + MCol *mcol, *mc; + int index, mtfn, mcn; + char *name; + + if (medge->v1 < medge->v2) { + ed.v1= medge->v1; + ed.v2= medge->v2; + } + else { + ed.v1= medge->v2; + ed.v2= medge->v1; + } + + edp= bsearch(&ed, edgetable, totedge, sizeof(struct edgesort), vergedgesort); + + /* since edges have different index ordering, we have to duplicate mcol and tface */ + if (edp) { + mtfn= mcn= 0; + + for (index=0; index<dm->faceData.totlayer; index++) { + layer= &dm->faceData.layers[index]; + name= layer->name; + + if (layer->type == CD_MTFACE && mtfn < MAX_MTFACE) { + mtface= &((MTFace*)layer->data)[edp->f]; + mtf= RE_vlakren_get_tface(obr, vlr, mtfn++, &name, 1); + + *mtf= *mtface; + + memcpy(mtf->uv[0], mtface->uv[edp->i1], sizeof(float)*2); + memcpy(mtf->uv[1], mtface->uv[edp->i2], sizeof(float)*2); + memcpy(mtf->uv[2], mtface->uv[1], sizeof(float)*2); + memcpy(mtf->uv[3], mtface->uv[1], sizeof(float)*2); + } + else if (layer->type == CD_MCOL && mcn < MAX_MCOL) { + mcol= &((MCol*)layer->data)[edp->f*4]; + mc= RE_vlakren_get_mcol(obr, vlr, mcn++, &name, 1); + + mc[0]= mcol[edp->i1]; + mc[1]= mc[2]= mc[3]= mcol[edp->i2]; + } + } + } +} + +static void free_camera_inside_volumes(Render *re) +{ + BLI_freelistN(&re->render_volumes_inside); +} + +static void init_camera_inside_volumes(Render *re) +{ + ObjectInstanceRen *obi; + VolumeOb *vo; + /* coordinates are all in camera space, so camera coordinate is zero. we also + * add an offset for the clip start, however note that with clip start it's + * actually impossible to do a single 'inside' test, since there will not be + * a single point where all camera rays start from, though for small clip start + * they will be close together. */ + float co[3] = {0.f, 0.f, -re->clipsta}; + + for (vo= re->volumes.first; vo; vo= vo->next) { + for (obi= re->instancetable.first; obi; obi= obi->next) { + if (obi->obr == vo->obr) { + if (point_inside_volume_objectinstance(re, obi, co)) { + MatInside *mi; + + mi = MEM_mallocN(sizeof(MatInside), "camera inside material"); + mi->ma = vo->ma; + mi->obi = obi; + + BLI_addtail(&(re->render_volumes_inside), mi); + } + } + } + } + + +#if 0 /* debug */ + { + MatInside *m; + for (m = re->render_volumes_inside.first; m; m = m->next) { + printf("matinside: ma: %s\n", m->ma->id.name + 2); + } + } +#endif +} + +static void add_volume(Render *re, ObjectRen *obr, Material *ma) +{ + struct VolumeOb *vo; + + vo = MEM_mallocN(sizeof(VolumeOb), "volume object"); + + vo->ma = ma; + vo->obr = obr; + + BLI_addtail(&re->volumes, vo); +} + +#ifdef WITH_FREESTYLE +static EdgeHash *make_freestyle_edge_mark_hash(DerivedMesh *dm) +{ + EdgeHash *edge_hash= NULL; + FreestyleEdge *fed; + MEdge *medge; + int totedge, a; + + medge = dm->getEdgeArray(dm); + totedge = dm->getNumEdges(dm); + fed = dm->getEdgeDataArray(dm, CD_FREESTYLE_EDGE); + if (fed) { + edge_hash = BLI_edgehash_new(__func__); + for (a = 0; a < totedge; a++) { + if (fed[a].flag & FREESTYLE_EDGE_MARK) + BLI_edgehash_insert(edge_hash, medge[a].v1, medge[a].v2, medge+a); + } + } + return edge_hash; +} + +static bool has_freestyle_edge_mark(EdgeHash *edge_hash, int v1, int v2) +{ + MEdge *medge= BLI_edgehash_lookup(edge_hash, v1, v2); + return (!medge) ? 0 : 1; +} +#endif + +static void init_render_mesh(Render *re, ObjectRen *obr, int timeoffset) +{ + Object *ob= obr->ob; + Mesh *me; + MVert *mvert = NULL; + MFace *mface; + VlakRen *vlr; //, *vlr1; + VertRen *ver; + Material *ma; + DerivedMesh *dm; + CustomDataMask mask; + float xn, yn, zn, imat[3][3], mat[4][4]; //nor[3], + float *orco = NULL; + short (*loop_nors)[4][3] = NULL; + bool need_orco = false, need_stress = false, need_tangent = false, need_origindex = false; + bool need_nmap_tangent_concrete = false; + int a, a1, ok, vertofs; + int end, totvert = 0; + bool do_autosmooth = false, do_displace = false; + bool use_original_normals = false; + int recalc_normals = 0; /* false by default */ + int negative_scale; +#ifdef WITH_FREESTYLE + FreestyleFace *ffa; +#endif + + me= ob->data; + + mul_m4_m4m4(mat, re->viewmat, ob->obmat); + invert_m4_m4(ob->imat, mat); + copy_m3_m4(imat, ob->imat); + negative_scale= is_negative_m4(mat); + + need_orco= 0; + for (a=1; a<=ob->totcol; a++) { + ma= give_render_material(re, ob, a); + if (ma) { + if (ma->texco & (TEXCO_ORCO|TEXCO_STRESS)) + need_orco= 1; + if (ma->texco & TEXCO_STRESS) + need_stress= 1; + /* normalmaps, test if tangents needed, separated from shading */ + if (ma->mode_l & MA_TANGENT_V) { + need_tangent= 1; + if (me->mtpoly==NULL) + need_orco= 1; + } + if (ma->mode_l & MA_NORMAP_TANG) { + if (me->mtpoly==NULL) { + need_orco= 1; + } + need_tangent= 1; + } + if (ma->mode2_l & MA_TANGENT_CONCRETE) { + need_nmap_tangent_concrete = true; + } + } + } + + if (re->flag & R_NEED_TANGENT) { + /* exception for tangent space baking */ + if (me->mtpoly==NULL) { + need_orco= 1; + } + need_tangent= 1; + } + + /* check autosmooth and displacement, we then have to skip only-verts optimize + * Note: not sure what we want to give higher priority, currently do_displace + * takes precedence over do_autosmooth. + */ + do_displace = test_for_displace(re, ob); + do_autosmooth = ((me->flag & ME_AUTOSMOOTH) != 0) && !do_displace; + if (do_autosmooth || do_displace) + timeoffset = 0; + + /* origindex currently used when using autosmooth, or baking to vertex colors. */ + need_origindex = (do_autosmooth || ((re->flag & R_BAKING) && (re->r.bake_flag & R_BAKE_VCOL))); + + mask = CD_MASK_RENDER_INTERNAL; + if (!timeoffset) + if (need_orco) + mask |= CD_MASK_ORCO; + +#ifdef WITH_FREESTYLE + mask |= CD_MASK_ORIGINDEX | CD_MASK_FREESTYLE_EDGE | CD_MASK_FREESTYLE_FACE; +#endif + + if (re->r.scemode & R_VIEWPORT_PREVIEW) + dm= mesh_create_derived_view(re->scene, ob, mask); + else + dm= mesh_create_derived_render(re->scene, ob, mask); + if (dm==NULL) return; /* in case duplicated object fails? */ + + mvert= dm->getVertArray(dm); + totvert= dm->getNumVerts(dm); + + if (totvert == 0) { + dm->release(dm); + return; + } + + if (mask & CD_MASK_ORCO) { + orco = get_object_orco(re, ob); + if (!orco) { + orco= dm->getVertDataArray(dm, CD_ORCO); + if (orco) { + orco= MEM_dupallocN(orco); + set_object_orco(re, ob, orco); + } + } + } + + /* attempt to autsmooth on original mesh, only without subsurf */ + if (do_autosmooth && me->totvert==totvert && me->totface==dm->getNumTessFaces(dm)) + use_original_normals= true; + + ma= give_render_material(re, ob, 1); + + + if (ma->material_type == MA_TYPE_HALO) { + make_render_halos(re, obr, me, totvert, mvert, ma, orco); + } + else { + const int *index_vert_orig = NULL; + const int *index_mf_to_mpoly = NULL; + const int *index_mp_to_orig = NULL; + if (need_origindex) { + index_vert_orig = dm->getVertDataArray(dm, CD_ORIGINDEX); + /* double lookup for faces -> polys */ +#ifdef WITH_FREESTYLE + index_mf_to_mpoly = dm->getTessFaceDataArray(dm, CD_ORIGINDEX); + index_mp_to_orig = dm->getPolyDataArray(dm, CD_ORIGINDEX); +#endif + } + + for (a=0; a<totvert; a++, mvert++) { + ver= RE_findOrAddVert(obr, obr->totvert++); + copy_v3_v3(ver->co, mvert->co); + if (do_autosmooth == false) { /* autosmooth on original unrotated data to prevent differences between frames */ + normal_short_to_float_v3(ver->n, mvert->no); + mul_m4_v3(mat, ver->co); + mul_transposed_m3_v3(imat, ver->n); + normalize_v3(ver->n); + negate_v3(ver->n); + } + + if (orco) { + ver->orco= orco; + orco+=3; + } + + if (need_origindex) { + int *origindex; + origindex = RE_vertren_get_origindex(obr, ver, 1); + + /* Use orig index array if it's available (e.g. in the presence + * of modifiers). */ + if (index_vert_orig) + *origindex = index_vert_orig[a]; + else + *origindex = a; + } + } + + if (!timeoffset) { + short (*lnp)[4][3] = NULL; +#ifdef WITH_FREESTYLE + EdgeHash *edge_hash; + + /* create a hash table of Freestyle edge marks */ + edge_hash = make_freestyle_edge_mark_hash(dm); +#endif + + /* store customdata names, because DerivedMesh is freed */ + RE_set_customdata_names(obr, &dm->faceData); + + /* add tangent layers if we need */ + if ((ma->nmap_tangent_names_count && need_nmap_tangent_concrete) || need_tangent) { + dm->calcLoopTangents( + dm, need_tangent, + (const char (*)[MAX_NAME])ma->nmap_tangent_names, ma->nmap_tangent_names_count); + obr->tangent_mask = dm->tangent_mask; + DM_generate_tangent_tessface_data(dm, need_nmap_tangent_concrete || need_tangent); + } + + /* still to do for keys: the correct local texture coordinate */ + + /* faces in order of color blocks */ + vertofs= obr->totvert - totvert; + for (a1=0; (a1<ob->totcol || (a1==0 && ob->totcol==0)); a1++) { + + ma= give_render_material(re, ob, a1+1); + + /* test for 100% transparent */ + ok = 1; + if ((ma->alpha == 0.0f) && + (ma->spectra == 0.0f) && + /* No need to test filter here, it's only active with MA_RAYTRANSP and we check against it below. */ + /* (ma->filter == 0.0f) && */ + (ma->mode & MA_TRANSP) && + (ma->mode & (MA_RAYTRANSP | MA_RAYMIRROR)) == 0) + { + ok = 0; + /* texture on transparency? */ + for (a=0; a<MAX_MTEX; a++) { + if (ma->mtex[a] && ma->mtex[a]->tex) { + if (ma->mtex[a]->mapto & MAP_ALPHA) ok= 1; + } + } + } + + /* if wire material, and we got edges, don't do the faces */ + if (ma->material_type == MA_TYPE_WIRE) { + end= dm->getNumEdges(dm); + if (end) ok= 0; + } + + if (ok) { + end= dm->getNumTessFaces(dm); + mface= dm->getTessFaceArray(dm); + if (!loop_nors && do_autosmooth && + (dm->getTessFaceDataArray(dm, CD_TESSLOOPNORMAL) != NULL)) + { + lnp = loop_nors = MEM_mallocN(sizeof(*loop_nors) * end, __func__); + } +#ifdef WITH_FREESTYLE + index_mf_to_mpoly= dm->getTessFaceDataArray(dm, CD_ORIGINDEX); + index_mp_to_orig= dm->getPolyDataArray(dm, CD_ORIGINDEX); + ffa= CustomData_get_layer(&me->pdata, CD_FREESTYLE_FACE); +#endif + + for (a=0; a<end; a++, mface++) { + int v1, v2, v3, v4, flag; + + if ( mface->mat_nr==a1 ) { + float len; + bool reverse_verts = (negative_scale != 0 && do_autosmooth == false); + int rev_tab[] = {reverse_verts==0 ? 0 : 2, 1, reverse_verts==0 ? 2 : 0, 3}; + v1= reverse_verts==0 ? mface->v1 : mface->v3; + v2= mface->v2; + v3= reverse_verts==0 ? mface->v3 : mface->v1; + v4= mface->v4; + flag = do_autosmooth ? ME_SMOOTH : mface->flag & ME_SMOOTH; + + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->v1= RE_findOrAddVert(obr, vertofs+v1); + vlr->v2= RE_findOrAddVert(obr, vertofs+v2); + vlr->v3= RE_findOrAddVert(obr, vertofs+v3); + if (v4) vlr->v4 = RE_findOrAddVert(obr, vertofs+v4); + else vlr->v4 = NULL; + +#ifdef WITH_FREESTYLE + /* Freestyle edge/face marks */ + if (edge_hash) { + int edge_mark = 0; + + if (has_freestyle_edge_mark(edge_hash, v1, v2)) edge_mark |= R_EDGE_V1V2; + if (has_freestyle_edge_mark(edge_hash, v2, v3)) edge_mark |= R_EDGE_V2V3; + if (!v4) { + if (has_freestyle_edge_mark(edge_hash, v3, v1)) edge_mark |= R_EDGE_V3V1; + } + else { + if (has_freestyle_edge_mark(edge_hash, v3, v4)) edge_mark |= R_EDGE_V3V4; + if (has_freestyle_edge_mark(edge_hash, v4, v1)) edge_mark |= R_EDGE_V4V1; + } + vlr->freestyle_edge_mark= edge_mark; + } + if (ffa) { + int index = (index_mf_to_mpoly) ? DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, a) : a; + vlr->freestyle_face_mark= (ffa[index].flag & FREESTYLE_FACE_MARK) ? 1 : 0; + } + else { + vlr->freestyle_face_mark= 0; + } +#endif + + /* render normals are inverted in render */ + if (use_original_normals) { + MFace *mf= me->mface+a; + MVert *mv= me->mvert; + + if (vlr->v4) + len= normal_quad_v3(vlr->n, mv[mf->v4].co, mv[mf->v3].co, mv[mf->v2].co, mv[mf->v1].co); + else + len= normal_tri_v3(vlr->n, mv[mf->v3].co, mv[mf->v2].co, mv[mf->v1].co); + } + else { + if (vlr->v4) + len= normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co); + else + len= normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co); + } + + vlr->mat= ma; + vlr->flag= flag; + vlr->ec= 0; /* mesh edges rendered separately */ + + if (len==0) obr->totvlak--; + else { + CustomDataLayer *layer; + MTFace *mtface, *mtf; + MCol *mcol, *mc; + int index, mtfn= 0, mcn= 0, mln = 0, vindex; + char *name; + int nr_verts = v4!=0 ? 4 : 3; + + for (index=0; index<dm->faceData.totlayer; index++) { + layer= &dm->faceData.layers[index]; + name= layer->name; + + if (layer->type == CD_MTFACE && mtfn < MAX_MTFACE) { + int t; + mtf= RE_vlakren_get_tface(obr, vlr, mtfn++, &name, 1); + mtface= (MTFace*)layer->data; + *mtf = mtface[a]; /* copy face info */ + for (vindex=0; vindex<nr_verts; vindex++) + for (t=0; t<2; t++) + mtf->uv[vindex][t]=mtface[a].uv[rev_tab[vindex]][t]; + } + else if (layer->type == CD_MCOL && mcn < MAX_MCOL) { + mc= RE_vlakren_get_mcol(obr, vlr, mcn++, &name, 1); + mcol= (MCol*)layer->data; + for (vindex=0; vindex<nr_verts; vindex++) + mc[vindex]=mcol[a*4+rev_tab[vindex]]; + } + else if (layer->type == CD_TANGENT) { + if (need_nmap_tangent_concrete || need_tangent) { + int uv_start = CustomData_get_layer_index(&dm->faceData, CD_MTFACE); + int uv_index = CustomData_get_named_layer_index(&dm->faceData, CD_MTFACE, layer->name); + + /* if there are no UVs, orco tangents are in first slot */ + int n = (uv_start >= 0 && uv_index >= 0) ? uv_index - uv_start : 0; + + const float *tangent = (const float *) layer->data; + float *ftang = RE_vlakren_get_nmap_tangent(obr, vlr, n, true); + + for (vindex=0; vindex<nr_verts; vindex++) { + copy_v4_v4(ftang+vindex*4, tangent+a*16+rev_tab[vindex]*4); + mul_mat3_m4_v3(mat, ftang+vindex*4); + normalize_v3(ftang+vindex*4); + } + } + } + else if (layer->type == CD_TESSLOOPNORMAL && mln < 1) { + if (loop_nors) { + const short (*lnors)[4][3] = (const short (*)[4][3])layer->data; + for (vindex = 0; vindex < 4; vindex++) { + //print_v3("lnors[a][rev_tab[vindex]]", lnors[a][rev_tab[vindex]]); + copy_v3_v3_short((short *)lnp[0][vindex], lnors[a][rev_tab[vindex]]); + /* If we copy loop normals, we are doing autosmooth, so we are still + * in object space, no need to multiply with mat! + */ + } + lnp++; + } + mln++; + } + } + + if (need_origindex) { + /* Find original index of mpoly for this tessface. Options: + * - Modified mesh; two-step look up from tessface -> modified mpoly -> original mpoly + * - OR Tesselated mesh; look up from tessface -> mpoly + * - OR Failsafe; tessface == mpoly. Could probably assert(false) in this case? */ + int *origindex; + origindex = RE_vlakren_get_origindex(obr, vlr, 1); + if (index_mf_to_mpoly && index_mp_to_orig) + *origindex = DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, a); + else if (index_mf_to_mpoly) + *origindex = index_mf_to_mpoly[a]; + else + *origindex = a; + } + } + } + } + } + } + +#ifdef WITH_FREESTYLE + /* release the hash table of Freestyle edge marks */ + if (edge_hash) + BLI_edgehash_free(edge_hash, NULL); +#endif + + /* exception... we do edges for wire mode. potential conflict when faces exist... */ + end= dm->getNumEdges(dm); + mvert= dm->getVertArray(dm); + ma= give_render_material(re, ob, 1); + if (end && (ma->material_type == MA_TYPE_WIRE)) { + MEdge *medge; + struct edgesort *edgetable; + int totedge= 0; + recalc_normals= 1; + + medge= dm->getEdgeArray(dm); + + /* we want edges to have UV and vcol too... */ + edgetable= make_mesh_edge_lookup(dm, &totedge); + + for (a1=0; a1<end; a1++, medge++) { + if (medge->flag&ME_EDGERENDER) { + MVert *v0 = &mvert[medge->v1]; + MVert *v1 = &mvert[medge->v2]; + + vlr= RE_findOrAddVlak(obr, obr->totvlak++); + vlr->v1= RE_findOrAddVert(obr, vertofs+medge->v1); + vlr->v2= RE_findOrAddVert(obr, vertofs+medge->v2); + vlr->v3= vlr->v2; + vlr->v4= NULL; + + if (edgetable) + use_mesh_edge_lookup(obr, dm, medge, vlr, edgetable, totedge); + + xn= -(v0->no[0]+v1->no[0]); + yn= -(v0->no[1]+v1->no[1]); + zn= -(v0->no[2]+v1->no[2]); + /* transpose ! */ + vlr->n[0]= imat[0][0]*xn+imat[0][1]*yn+imat[0][2]*zn; + vlr->n[1]= imat[1][0]*xn+imat[1][1]*yn+imat[1][2]*zn; + vlr->n[2]= imat[2][0]*xn+imat[2][1]*yn+imat[2][2]*zn; + normalize_v3(vlr->n); + + vlr->mat= ma; + vlr->flag= 0; + vlr->ec= ME_V1V2; + } + } + if (edgetable) + MEM_freeN(edgetable); + } + } + } + + if (!timeoffset) { + if (need_stress) + calc_edge_stress(re, obr, me); + + if (do_displace) { + calc_vertexnormals(re, obr, 1, 0, 0); + displace(re, obr); + recalc_normals = 0; /* Already computed by displace! */ + } + else if (do_autosmooth) { + recalc_normals = (loop_nors == NULL); /* Should never happen, but better be safe than sorry. */ + autosmooth(re, obr, mat, loop_nors); + } + + if (recalc_normals!=0 || need_tangent!=0) + calc_vertexnormals(re, obr, recalc_normals, need_tangent, need_nmap_tangent_concrete); + } + + MEM_SAFE_FREE(loop_nors); + + dm->release(dm); +} + +/* ------------------------------------------------------------------------- */ +/* Lamps and Shadowbuffers */ +/* ------------------------------------------------------------------------- */ + +static void initshadowbuf(Render *re, LampRen *lar, float mat[4][4]) +{ + struct ShadBuf *shb; + float viewinv[4][4]; + + /* if (la->spsi<16) return; */ + + /* memory alloc */ + shb= (struct ShadBuf *)MEM_callocN(sizeof(struct ShadBuf), "initshadbuf"); + lar->shb= shb; + + if (shb==NULL) return; + + VECCOPY(shb->co, lar->co); /* int copy */ + + /* percentage render: keep track of min and max */ + shb->size= (lar->bufsize*re->r.size)/100; + + if (shb->size<512) shb->size= 512; + else if (shb->size > lar->bufsize) shb->size= lar->bufsize; + + shb->size &= ~15; /* make sure its multiples of 16 */ + + shb->samp= lar->samp; + shb->soft= lar->soft; + shb->shadhalostep= lar->shadhalostep; + + normalize_m4(mat); + invert_m4_m4(shb->winmat, mat); /* winmat is temp */ + + /* matrix: combination of inverse view and lampmat */ + /* calculate again: the ortho-render has no correct viewinv */ + invert_m4_m4(viewinv, re->viewmat); + mul_m4_m4m4(shb->viewmat, shb->winmat, viewinv); + + /* projection */ + shb->d= lar->clipsta; + shb->clipend= lar->clipend; + + /* bias is percentage, made 2x larger because of correction for angle of incidence */ + /* when a ray is closer to parallel of a face, bias value is increased during render */ + shb->bias= (0.02f*lar->bias)*0x7FFFFFFF; + + /* halfway method (average of first and 2nd z) reduces bias issues */ + if (ELEM(lar->buftype, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) + shb->bias= 0.1f*shb->bias; + + shb->compressthresh= lar->compressthresh; +} + +void area_lamp_vectors(LampRen *lar) +{ + float xsize= 0.5f*lar->area_size, ysize= 0.5f*lar->area_sizey, multifac; + + /* make it smaller, so area light can be multisampled */ + multifac= 1.0f/sqrtf((float)lar->ray_totsamp); + xsize *= multifac; + ysize *= multifac; + + /* corner vectors */ + lar->area[0][0]= lar->co[0] - xsize*lar->mat[0][0] - ysize*lar->mat[1][0]; + lar->area[0][1]= lar->co[1] - xsize*lar->mat[0][1] - ysize*lar->mat[1][1]; + lar->area[0][2]= lar->co[2] - xsize*lar->mat[0][2] - ysize*lar->mat[1][2]; + + /* corner vectors */ + lar->area[1][0]= lar->co[0] - xsize*lar->mat[0][0] + ysize*lar->mat[1][0]; + lar->area[1][1]= lar->co[1] - xsize*lar->mat[0][1] + ysize*lar->mat[1][1]; + lar->area[1][2]= lar->co[2] - xsize*lar->mat[0][2] + ysize*lar->mat[1][2]; + + /* corner vectors */ + lar->area[2][0]= lar->co[0] + xsize*lar->mat[0][0] + ysize*lar->mat[1][0]; + lar->area[2][1]= lar->co[1] + xsize*lar->mat[0][1] + ysize*lar->mat[1][1]; + lar->area[2][2]= lar->co[2] + xsize*lar->mat[0][2] + ysize*lar->mat[1][2]; + + /* corner vectors */ + lar->area[3][0]= lar->co[0] + xsize*lar->mat[0][0] - ysize*lar->mat[1][0]; + lar->area[3][1]= lar->co[1] + xsize*lar->mat[0][1] - ysize*lar->mat[1][1]; + lar->area[3][2]= lar->co[2] + xsize*lar->mat[0][2] - ysize*lar->mat[1][2]; + /* only for correction button size, matrix size works on energy */ + lar->areasize= lar->dist*lar->dist/(4.0f*xsize*ysize); +} + +/* If lar takes more lamp data, the decoupling will be better. */ +static GroupObject *add_render_lamp(Render *re, Object *ob) +{ + Lamp *la= ob->data; + LampRen *lar; + GroupObject *go; + float mat[4][4], angle, xn, yn; + float vec[3]; + int c; + + /* previewrender sets this to zero... prevent accidents */ + if (la==NULL) return NULL; + + /* prevent only shadow from rendering light */ + if (la->mode & LA_ONLYSHADOW) + if ((re->r.mode & R_SHADOW)==0) + return NULL; + + re->totlamp++; + + /* groups is used to unify support for lightgroups, this is the global lightgroup */ + go= MEM_callocN(sizeof(GroupObject), "groupobject"); + BLI_addtail(&re->lights, go); + go->ob= ob; + /* lamprens are in own list, for freeing */ + lar= (LampRen *)MEM_callocN(sizeof(LampRen), "lampren"); + BLI_addtail(&re->lampren, lar); + go->lampren= lar; + + mul_m4_m4m4(mat, re->viewmat, ob->obmat); + invert_m4_m4(ob->imat, mat); + + copy_m4_m4(lar->lampmat, ob->obmat); + copy_m3_m4(lar->mat, mat); + copy_m3_m4(lar->imat, ob->imat); + + lar->bufsize = la->bufsize; + lar->samp = la->samp; + lar->buffers= la->buffers; + if (lar->buffers==0) lar->buffers= 1; + lar->buftype= la->buftype; + lar->filtertype= la->filtertype; + lar->soft = la->soft; + lar->shadhalostep = la->shadhalostep; + lar->clipsta = la->clipsta; + lar->clipend = la->clipend; + + lar->bias = la->bias; + lar->compressthresh = la->compressthresh; + + lar->type= la->type; + lar->mode= la->mode; + + lar->energy= la->energy; + if (la->mode & LA_NEG) lar->energy= -lar->energy; + + lar->vec[0]= -mat[2][0]; + lar->vec[1]= -mat[2][1]; + lar->vec[2]= -mat[2][2]; + normalize_v3(lar->vec); + lar->co[0]= mat[3][0]; + lar->co[1]= mat[3][1]; + lar->co[2]= mat[3][2]; + lar->dist= la->dist; + lar->haint= la->haint; + lar->distkw= lar->dist*lar->dist; + lar->r= lar->energy*la->r; + lar->g= lar->energy*la->g; + lar->b= lar->energy*la->b; + lar->shdwr= la->shdwr; + lar->shdwg= la->shdwg; + lar->shdwb= la->shdwb; + lar->k= la->k; + + /* area */ + lar->ray_samp= la->ray_samp; + lar->ray_sampy= la->ray_sampy; + lar->ray_sampz= la->ray_sampz; + + lar->area_size= la->area_size; + lar->area_sizey= la->area_sizey; + lar->area_sizez= la->area_sizez; + + lar->area_shape= la->area_shape; + + /* Annoying, lamp UI does this, but the UI might not have been used? - add here too. + * make sure this matches buttons_shading.c's logic */ + if (ELEM(la->type, LA_AREA, LA_SPOT, LA_SUN, LA_LOCAL) && (la->mode & LA_SHAD_RAY)) + if (ELEM(la->type, LA_SPOT, LA_SUN, LA_LOCAL)) + if (la->ray_samp_method == LA_SAMP_CONSTANT) la->ray_samp_method = LA_SAMP_HALTON; + + lar->ray_samp_method= la->ray_samp_method; + lar->ray_samp_type= la->ray_samp_type; + + lar->adapt_thresh= la->adapt_thresh; + lar->sunsky = NULL; + + if ( ELEM(lar->type, LA_SPOT, LA_LOCAL)) { + lar->ray_totsamp= lar->ray_samp*lar->ray_samp; + lar->area_shape = LA_AREA_SQUARE; + lar->area_sizey= lar->area_size; + } + else if (lar->type==LA_AREA) { + switch (lar->area_shape) { + case LA_AREA_SQUARE: + lar->ray_totsamp= lar->ray_samp*lar->ray_samp; + lar->ray_sampy= lar->ray_samp; + lar->area_sizey= lar->area_size; + break; + case LA_AREA_RECT: + lar->ray_totsamp= lar->ray_samp*lar->ray_sampy; + break; + case LA_AREA_CUBE: + lar->ray_totsamp= lar->ray_samp*lar->ray_samp*lar->ray_samp; + lar->ray_sampy= lar->ray_samp; + lar->ray_sampz= lar->ray_samp; + lar->area_sizey= lar->area_size; + lar->area_sizez= lar->area_size; + break; + case LA_AREA_BOX: + lar->ray_totsamp= lar->ray_samp*lar->ray_sampy*lar->ray_sampz; + break; + } + + area_lamp_vectors(lar); + init_jitter_plane(lar); /* subsamples */ + } + else if (lar->type==LA_SUN) { + lar->ray_totsamp= lar->ray_samp*lar->ray_samp; + lar->area_shape = LA_AREA_SQUARE; + lar->area_sizey= lar->area_size; + + if ((la->sun_effect_type & LA_SUN_EFFECT_SKY) || + (la->sun_effect_type & LA_SUN_EFFECT_AP)) + { + lar->sunsky = (struct SunSky*)MEM_callocN(sizeof(struct SunSky), "sunskyren"); + lar->sunsky->effect_type = la->sun_effect_type; + + copy_v3_v3(vec, ob->obmat[2]); + normalize_v3(vec); + + InitSunSky( + lar->sunsky, la->atm_turbidity, vec, la->horizon_brightness, + la->spread, la->sun_brightness, la->sun_size, la->backscattered_light, + la->skyblendfac, la->skyblendtype, la->sky_exposure, la->sky_colorspace); + InitAtmosphere( + lar->sunsky, la->sun_intensity, 1.0, 1.0, la->atm_inscattering_factor, la->atm_extinction_factor, + la->atm_distance_factor); + } + } + else lar->ray_totsamp= 0; + + lar->spotsi= la->spotsize; + if (lar->mode & LA_HALO) { + if (lar->spotsi > DEG2RADF(170.0f)) lar->spotsi = DEG2RADF(170.0f); + } + lar->spotsi= cosf(lar->spotsi * 0.5f); + lar->spotbl= (1.0f-lar->spotsi)*la->spotblend; + + memcpy(lar->mtex, la->mtex, MAX_MTEX*sizeof(void *)); + + lar->lay = ob->lay & 0xFFFFFF; /* higher 8 bits are localview layers */ + + lar->falloff_type = la->falloff_type; + lar->ld1= la->att1; + lar->ld2= la->att2; + lar->coeff_const= la->coeff_const; + lar->coeff_lin= la->coeff_lin; + lar->coeff_quad= la->coeff_quad; + lar->curfalloff = curvemapping_copy(la->curfalloff); + + if (lar->curfalloff) { + /* so threads don't conflict on init */ + curvemapping_initialize(lar->curfalloff); + } + + if (lar->type==LA_SPOT) { + + normalize_v3(lar->imat[0]); + normalize_v3(lar->imat[1]); + normalize_v3(lar->imat[2]); + + xn = saacos(lar->spotsi); + xn = sinf(xn) / cosf(xn); + lar->spottexfac= 1.0f/(xn); + + if (lar->mode & LA_ONLYSHADOW) { + if ((lar->mode & (LA_SHAD_BUF|LA_SHAD_RAY))==0) lar->mode -= LA_ONLYSHADOW; + } + + } + + /* set flag for spothalo en initvars */ + if ((la->type == LA_SPOT) && (la->mode & LA_HALO) && + (!(la->mode & LA_SHAD_BUF) || la->buftype != LA_SHADBUF_DEEP)) + { + if (la->haint>0.0f) { + re->flag |= R_LAMPHALO; + + /* camera position (0, 0, 0) rotate around lamp */ + lar->sh_invcampos[0]= -lar->co[0]; + lar->sh_invcampos[1]= -lar->co[1]; + lar->sh_invcampos[2]= -lar->co[2]; + mul_m3_v3(lar->imat, lar->sh_invcampos); + + /* z factor, for a normalized volume */ + angle= saacos(lar->spotsi); + xn= lar->spotsi; + yn = sinf(angle); + lar->sh_zfac= yn/xn; + /* pre-scale */ + lar->sh_invcampos[2]*= lar->sh_zfac; + + /* halfway shadow buffer doesn't work for volumetric effects */ + if (ELEM(lar->buftype, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) + lar->buftype = LA_SHADBUF_REGULAR; + + } + } + else if (la->type==LA_HEMI) { + lar->mode &= ~(LA_SHAD_RAY|LA_SHAD_BUF); + } + + for (c=0; c<MAX_MTEX; c++) { + if (la->mtex[c] && la->mtex[c]->tex) { + if (la->mtex[c]->mapto & LAMAP_COL) + lar->mode |= LA_TEXTURE; + if (la->mtex[c]->mapto & LAMAP_SHAD) + lar->mode |= LA_SHAD_TEX; + + if (G.is_rendering) { + if (re->osa) { + if (la->mtex[c]->tex->type==TEX_IMAGE) lar->mode |= LA_OSATEX; + } + } + } + } + + /* old code checked for internal render (aka not yafray) */ + { + /* to make sure we can check ray shadow easily in the render code */ + if (lar->mode & LA_SHAD_RAY) { + if ( (re->r.mode & R_RAYTRACE)==0) + lar->mode &= ~LA_SHAD_RAY; + } + + + if (re->r.mode & R_SHADOW) { + + if (la->type==LA_AREA && (lar->mode & LA_SHAD_RAY) && (lar->ray_samp_method == LA_SAMP_CONSTANT)) { + init_jitter_plane(lar); + } + else if (la->type==LA_SPOT && (lar->mode & LA_SHAD_BUF) ) { + /* Per lamp, one shadow buffer is made. */ + lar->bufflag= la->bufflag; + copy_m4_m4(mat, ob->obmat); + initshadowbuf(re, lar, mat); /* mat is altered */ + } + + + /* this is the way used all over to check for shadow */ + if (lar->shb || (lar->mode & LA_SHAD_RAY)) { + LampShadowSample *ls; + LampShadowSubSample *lss; + int a, b; + + memset(re->shadowsamplenr, 0, sizeof(re->shadowsamplenr)); + + lar->shadsamp= MEM_mallocN(re->r.threads*sizeof(LampShadowSample), "lamp shadow sample"); + ls= lar->shadsamp; + + /* shadfacs actually mean light, let's put them to 1 to prevent unitialized accidents */ + for (a=0; a<re->r.threads; a++, ls++) { + lss= ls->s; + for (b=0; b<re->r.osa; b++, lss++) { + lss->samplenr= -1; /* used to detect whether we store or read */ + lss->shadfac[0]= 1.0f; + lss->shadfac[1]= 1.0f; + lss->shadfac[2]= 1.0f; + lss->shadfac[3]= 1.0f; + } + } + } + } + } + + return go; +} + +static bool is_object_restricted(Render *re, Object *ob) +{ + if (re->r.scemode & R_VIEWPORT_PREVIEW) + return (ob->restrictflag & OB_RESTRICT_VIEW) != 0; + else + return (ob->restrictflag & OB_RESTRICT_RENDER) != 0; +} + +static bool is_object_hidden(Render *re, Object *ob) +{ + if (is_object_restricted(re, ob)) + return true; + + if (re->r.scemode & R_VIEWPORT_PREVIEW) { + /* Mesh deform cages and so on mess up the preview. To avoid the problem, + * viewport doesn't show mesh object if its draw type is bounding box or wireframe. + * Unless it's an active smoke domain! + */ + ModifierData *md = NULL; + + if ((md = modifiers_findByType(ob, eModifierType_Smoke)) && + (modifier_isEnabled(re->scene, md, eModifierMode_Realtime))) + { + return false; + } + return ELEM(ob->dt, OB_BOUNDBOX, OB_WIRE); + } + else { + return false; + } +} + +/* layflag: allows material group to ignore layerflag */ +static void add_lightgroup(Render *re, Group *group, int exclusive) +{ + GroupObject *go, *gol; + + group->id.tag &= ~LIB_TAG_DOIT; + + /* it's a bit too many loops in loops... but will survive */ + /* note that 'exclusive' will remove it from the global list */ + for (go= group->gobject.first; go; go= go->next) { + go->lampren= NULL; + + if (is_object_hidden(re, go->ob)) + continue; + + if (go->ob->lay & re->lay) { + if (go->ob && go->ob->type==OB_LAMP) { + for (gol= re->lights.first; gol; gol= gol->next) { + if (gol->ob==go->ob) { + go->lampren= gol->lampren; + break; + } + } + if (go->lampren==NULL) + gol= add_render_lamp(re, go->ob); + if (gol && exclusive) { + BLI_remlink(&re->lights, gol); + MEM_freeN(gol); + } + } + } + } +} + +static void set_material_lightgroups(Render *re) +{ + Group *group; + Material *ma; + + /* not for preview render */ + if (re->scene->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW)) + return; + + for (group= re->main->group.first; group; group=group->id.next) + group->id.tag |= LIB_TAG_DOIT; + + /* it's a bit too many loops in loops... but will survive */ + /* hola! materials not in use...? */ + for (ma= re->main->mat.first; ma; ma=ma->id.next) { + if (ma->group && (ma->group->id.tag & LIB_TAG_DOIT)) + add_lightgroup(re, ma->group, ma->mode & MA_GROUP_NOLAY); + } +} + +static void set_renderlayer_lightgroups(Render *re, Scene *sce) +{ + SceneRenderLayer *srl; + + for (srl= sce->r.layers.first; srl; srl= srl->next) { + if (srl->light_override) + add_lightgroup(re, srl->light_override, 0); + } +} + +/* ------------------------------------------------------------------------- */ +/* World */ +/* ------------------------------------------------------------------------- */ + +void init_render_world(Render *re) +{ + void *wrld_prev[2] = { + re->wrld.aotables, + re->wrld.aosphere, + }; + + int a; + + if (re->scene && re->scene->world) { + re->wrld = *(re->scene->world); + + copy_v3_v3(re->grvec, re->viewmat[2]); + normalize_v3(re->grvec); + copy_m3_m4(re->imat, re->viewinv); + + for (a=0; a<MAX_MTEX; a++) + if (re->wrld.mtex[a] && re->wrld.mtex[a]->tex) re->wrld.skytype |= WO_SKYTEX; + + /* AO samples should be OSA minimum */ + if (re->osa) + while (re->wrld.aosamp*re->wrld.aosamp < re->osa) + re->wrld.aosamp++; + if (!(re->r.mode & R_RAYTRACE) && (re->wrld.ao_gather_method == WO_AOGATHER_RAYTRACE)) + re->wrld.mode &= ~(WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT); + } + else { + memset(&re->wrld, 0, sizeof(World)); + re->wrld.exp= 0.0f; + re->wrld.range= 1.0f; + + /* for mist pass */ + re->wrld.miststa= re->clipsta; + re->wrld.mistdist= re->clipend-re->clipsta; + re->wrld.misi= 1.0f; + } + + re->wrld.linfac= 1.0f + powf((2.0f*re->wrld.exp + 0.5f), -10); + re->wrld.logfac= logf((re->wrld.linfac-1.0f)/re->wrld.linfac) / re->wrld.range; + + /* restore runtime vars, needed for viewport rendering [#36005] */ + re->wrld.aotables = wrld_prev[0]; + re->wrld.aosphere = wrld_prev[1]; +} + + + +/* ------------------------------------------------------------------------- */ +/* Object Finalization */ +/* ------------------------------------------------------------------------- */ + +/* prevent phong interpolation for giving ray shadow errors (terminator problem) */ +static void set_phong_threshold(ObjectRen *obr) +{ +// VertRen *ver; + VlakRen *vlr; + float thresh= 0.0, dot; + int tot=0, i; + + /* Added check for 'pointy' situations, only dotproducts of 0.9 and larger + * are taken into account. This threshold is meant to work on smooth geometry, not + * for extreme cases (ton) */ + + for (i=0; i<obr->totvlak; i++) { + vlr= RE_findOrAddVlak(obr, i); + if ((vlr->flag & R_SMOOTH) && (vlr->flag & R_STRAND)==0) { + dot= dot_v3v3(vlr->n, vlr->v1->n); + dot= ABS(dot); + if (dot>0.9f) { + thresh+= dot; tot++; + } + dot= dot_v3v3(vlr->n, vlr->v2->n); + dot= ABS(dot); + if (dot>0.9f) { + thresh+= dot; tot++; + } + + dot= dot_v3v3(vlr->n, vlr->v3->n); + dot= ABS(dot); + if (dot>0.9f) { + thresh+= dot; tot++; + } + + if (vlr->v4) { + dot= dot_v3v3(vlr->n, vlr->v4->n); + dot= ABS(dot); + if (dot>0.9f) { + thresh+= dot; tot++; + } + } + } + } + + if (tot) { + thresh/= (float)tot; + obr->ob->smoothresh= cosf(0.5f*(float)M_PI-saacos(thresh)); + } +} + +/* per face check if all samples should be taken. + * if raytrace or multisample, do always for raytraced material, or when material full_osa set */ +static void set_fullsample_trace_flag(Render *re, ObjectRen *obr) +{ + VlakRen *vlr; + int a, trace, mode, osa; + + osa= re->osa; + trace= re->r.mode & R_RAYTRACE; + + for (a=obr->totvlak-1; a>=0; a--) { + vlr= RE_findOrAddVlak(obr, a); + mode= vlr->mat->mode; + + if (trace && (mode & MA_TRACEBLE)) + vlr->flag |= R_TRACEBLE; + + if (osa) { + if (mode & MA_FULL_OSA) { + vlr->flag |= R_FULL_OSA; + } + else if (trace) { + if (mode & MA_SHLESS) { + /* pass */ + } + else if (vlr->mat->material_type == MA_TYPE_VOLUME) { + /* pass */ + } + else if ((mode & MA_RAYMIRROR) || ((mode & MA_TRANSP) && (mode & MA_RAYTRANSP))) { + /* for blurry reflect/refract, better to take more samples + * inside the raytrace than as OSA samples */ + if ((vlr->mat->gloss_mir == 1.0f) && (vlr->mat->gloss_tra == 1.0f)) + vlr->flag |= R_FULL_OSA; + } + } + } + } +} + +/* split quads for predictable baking + * dir 1 == (0, 1, 2) (0, 2, 3), 2 == (1, 3, 0) (1, 2, 3) + */ +static void split_quads(ObjectRen *obr, int dir) +{ + VlakRen *vlr, *vlr1; + int a; + + for (a=obr->totvlak-1; a>=0; a--) { + vlr= RE_findOrAddVlak(obr, a); + + /* test if rendering as a quad or triangle, skip wire */ + if ((vlr->flag & R_STRAND)==0 && (vlr->mat->material_type != MA_TYPE_WIRE)) { + + if (vlr->v4) { + + vlr1= RE_vlakren_copy(obr, vlr); + vlr1->flag |= R_FACE_SPLIT; + + if ( dir==2 ) vlr->flag |= R_DIVIDE_24; + else vlr->flag &= ~R_DIVIDE_24; + + /* new vertex pointers */ + if (vlr->flag & R_DIVIDE_24) { + vlr1->v1= vlr->v2; + vlr1->v2= vlr->v3; + vlr1->v3= vlr->v4; + + vlr->v3 = vlr->v4; + + vlr1->flag |= R_DIVIDE_24; + } + else { + vlr1->v1= vlr->v1; + vlr1->v2= vlr->v3; + vlr1->v3= vlr->v4; + + vlr1->flag &= ~R_DIVIDE_24; + } + vlr->v4 = vlr1->v4 = NULL; + +#ifdef WITH_FREESTYLE + /* Freestyle edge marks */ + if (vlr->flag & R_DIVIDE_24) { + vlr1->freestyle_edge_mark= + ((vlr->freestyle_edge_mark & R_EDGE_V2V3) ? R_EDGE_V1V2 : 0) | + ((vlr->freestyle_edge_mark & R_EDGE_V3V4) ? R_EDGE_V2V3 : 0); + vlr->freestyle_edge_mark= + ((vlr->freestyle_edge_mark & R_EDGE_V1V2) ? R_EDGE_V1V2 : 0) | + ((vlr->freestyle_edge_mark & R_EDGE_V4V1) ? R_EDGE_V3V1 : 0); + } + else { + vlr1->freestyle_edge_mark= + ((vlr->freestyle_edge_mark & R_EDGE_V3V4) ? R_EDGE_V2V3 : 0) | + ((vlr->freestyle_edge_mark & R_EDGE_V4V1) ? R_EDGE_V3V1 : 0); + vlr->freestyle_edge_mark= + ((vlr->freestyle_edge_mark & R_EDGE_V1V2) ? R_EDGE_V1V2 : 0) | + ((vlr->freestyle_edge_mark & R_EDGE_V2V3) ? R_EDGE_V2V3 : 0); + } +#endif + + /* new normals */ + normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co); + normal_tri_v3(vlr1->n, vlr1->v3->co, vlr1->v2->co, vlr1->v1->co); + } + /* clear the flag when not divided */ + else vlr->flag &= ~R_DIVIDE_24; + } + } +} + +static void check_non_flat_quads(ObjectRen *obr) +{ + VlakRen *vlr, *vlr1; + VertRen *v1, *v2, *v3, *v4; + float nor[3], xn, flen; + int a; + + for (a=obr->totvlak-1; a>=0; a--) { + vlr= RE_findOrAddVlak(obr, a); + + /* test if rendering as a quad or triangle, skip wire */ + if (vlr->v4 && (vlr->flag & R_STRAND)==0 && (vlr->mat->material_type != MA_TYPE_WIRE)) { + + /* check if quad is actually triangle */ + v1= vlr->v1; + v2= vlr->v2; + v3= vlr->v3; + v4= vlr->v4; + sub_v3_v3v3(nor, v1->co, v2->co); + if ( ABS(nor[0])<FLT_EPSILON10 && ABS(nor[1])<FLT_EPSILON10 && ABS(nor[2])<FLT_EPSILON10 ) { + vlr->v1= v2; + vlr->v2= v3; + vlr->v3= v4; + vlr->v4= NULL; + vlr->flag |= (R_DIVIDE_24 | R_FACE_SPLIT); + } + else { + sub_v3_v3v3(nor, v2->co, v3->co); + if ( ABS(nor[0])<FLT_EPSILON10 && ABS(nor[1])<FLT_EPSILON10 && ABS(nor[2])<FLT_EPSILON10 ) { + vlr->v2= v3; + vlr->v3= v4; + vlr->v4= NULL; + vlr->flag |= R_FACE_SPLIT; + } + else { + sub_v3_v3v3(nor, v3->co, v4->co); + if ( ABS(nor[0])<FLT_EPSILON10 && ABS(nor[1])<FLT_EPSILON10 && ABS(nor[2])<FLT_EPSILON10 ) { + vlr->v4= NULL; + } + else { + sub_v3_v3v3(nor, v4->co, v1->co); + if ( ABS(nor[0])<FLT_EPSILON10 && ABS(nor[1])<FLT_EPSILON10 && ABS(nor[2])<FLT_EPSILON10 ) { + vlr->v4= NULL; + } + } + } + } + + if (vlr->v4) { + + /* Face is divided along edge with the least gradient */ + /* Flagged with R_DIVIDE_24 if divide is from vert 2 to 4 */ + /* 4---3 4---3 */ + /* |\ 1| or |1 /| */ + /* |0\ | |/ 0| */ + /* 1---2 1---2 0 = orig face, 1 = new face */ + + /* render normals are inverted in render! we calculate normal of single tria here */ + flen= normal_tri_v3(nor, vlr->v4->co, vlr->v3->co, vlr->v1->co); + if (flen==0.0f) normal_tri_v3(nor, vlr->v4->co, vlr->v2->co, vlr->v1->co); + + xn = dot_v3v3(nor, vlr->n); + + if (ABS(xn) < 0.999995f ) { /* checked on noisy fractal grid */ + + float d1, d2; + + vlr1= RE_vlakren_copy(obr, vlr); + vlr1->flag |= R_FACE_SPLIT; + + /* split direction based on vnorms */ + normal_tri_v3(nor, vlr->v1->co, vlr->v2->co, vlr->v3->co); + d1 = dot_v3v3(nor, vlr->v1->n); + + normal_tri_v3(nor, vlr->v2->co, vlr->v3->co, vlr->v4->co); + d2 = dot_v3v3(nor, vlr->v2->n); + + if (fabsf(d1) < fabsf(d2) ) vlr->flag |= R_DIVIDE_24; + else vlr->flag &= ~R_DIVIDE_24; + + /* new vertex pointers */ + if (vlr->flag & R_DIVIDE_24) { + vlr1->v1= vlr->v2; + vlr1->v2= vlr->v3; + vlr1->v3= vlr->v4; + + vlr->v3 = vlr->v4; + + vlr1->flag |= R_DIVIDE_24; + } + else { + vlr1->v1= vlr->v1; + vlr1->v2= vlr->v3; + vlr1->v3= vlr->v4; + + vlr1->flag &= ~R_DIVIDE_24; + } + vlr->v4 = vlr1->v4 = NULL; + + /* new normals */ + normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co); + normal_tri_v3(vlr1->n, vlr1->v3->co, vlr1->v2->co, vlr1->v1->co); + +#ifdef WITH_FREESTYLE + /* Freestyle edge marks */ + if (vlr->flag & R_DIVIDE_24) { + vlr1->freestyle_edge_mark= + ((vlr->freestyle_edge_mark & R_EDGE_V2V3) ? R_EDGE_V1V2 : 0) | + ((vlr->freestyle_edge_mark & R_EDGE_V3V4) ? R_EDGE_V2V3 : 0); + vlr->freestyle_edge_mark= + ((vlr->freestyle_edge_mark & R_EDGE_V1V2) ? R_EDGE_V1V2 : 0) | + ((vlr->freestyle_edge_mark & R_EDGE_V4V1) ? R_EDGE_V3V1 : 0); + } + else { + vlr1->freestyle_edge_mark= + ((vlr->freestyle_edge_mark & R_EDGE_V3V4) ? R_EDGE_V2V3 : 0) | + ((vlr->freestyle_edge_mark & R_EDGE_V4V1) ? R_EDGE_V3V1 : 0); + vlr->freestyle_edge_mark= + ((vlr->freestyle_edge_mark & R_EDGE_V1V2) ? R_EDGE_V1V2 : 0) | + ((vlr->freestyle_edge_mark & R_EDGE_V2V3) ? R_EDGE_V2V3 : 0); + } +#endif + } + /* clear the flag when not divided */ + else vlr->flag &= ~R_DIVIDE_24; + } + } + } +} + +static void finalize_render_object(Render *re, ObjectRen *obr, int timeoffset) +{ + Object *ob= obr->ob; + VertRen *ver= NULL; + StrandRen *strand= NULL; + StrandBound *sbound= NULL; + float min[3], max[3], smin[3], smax[3]; + int a, b; + + if (obr->totvert || obr->totvlak || obr->tothalo || obr->totstrand) { + /* the exception below is because displace code now is in init_render_mesh call, + * I will look at means to have autosmooth enabled for all object types + * and have it as general postprocess, like displace */ + if (ob->type!=OB_MESH && test_for_displace(re, ob)) + displace(re, obr); + + if (!timeoffset) { + /* phong normal interpolation can cause error in tracing + * (terminator problem) */ + ob->smoothresh= 0.0; + if ((re->r.mode & R_RAYTRACE) && (re->r.mode & R_SHADOW)) + set_phong_threshold(obr); + + if (re->flag & R_BAKING && re->r.bake_quad_split != 0) { + /* Baking lets us define a quad split order */ + split_quads(obr, re->r.bake_quad_split); + } + else if (BKE_object_is_animated(re->scene, ob)) + split_quads(obr, 1); + else { + if ((re->r.mode & R_SIMPLIFY && re->r.simplify_flag & R_SIMPLE_NO_TRIANGULATE) == 0) + check_non_flat_quads(obr); + } + + set_fullsample_trace_flag(re, obr); + + /* compute bounding boxes for clipping */ + INIT_MINMAX(min, max); + for (a=0; a<obr->totvert; a++) { + if ((a & 255)==0) ver= obr->vertnodes[a>>8].vert; + else ver++; + + minmax_v3v3_v3(min, max, ver->co); + } + + if (obr->strandbuf) { + float width; + + /* compute average bounding box of strandpoint itself (width) */ + if (obr->strandbuf->flag & R_STRAND_B_UNITS) + obr->strandbuf->maxwidth = max_ff(obr->strandbuf->ma->strand_sta, obr->strandbuf->ma->strand_end); + else + obr->strandbuf->maxwidth= 0.0f; + + width= obr->strandbuf->maxwidth; + sbound= obr->strandbuf->bound; + for (b=0; b<obr->strandbuf->totbound; b++, sbound++) { + + INIT_MINMAX(smin, smax); + + for (a=sbound->start; a<sbound->end; a++) { + strand= RE_findOrAddStrand(obr, a); + strand_minmax(strand, smin, smax, width); + } + + copy_v3_v3(sbound->boundbox[0], smin); + copy_v3_v3(sbound->boundbox[1], smax); + + minmax_v3v3_v3(min, max, smin); + minmax_v3v3_v3(min, max, smax); + } + } + + copy_v3_v3(obr->boundbox[0], min); + copy_v3_v3(obr->boundbox[1], max); + } + } +} + +/* ------------------------------------------------------------------------- */ +/* Database */ +/* ------------------------------------------------------------------------- */ + +static int render_object_type(short type) +{ + return OB_TYPE_SUPPORT_MATERIAL(type); +} + +static void find_dupli_instances(Render *re, ObjectRen *obr, DupliObject *dob) +{ + ObjectInstanceRen *obi; + float imat[4][4], obmat[4][4], obimat[4][4], nmat[3][3]; + int first = 1; + + mul_m4_m4m4(obmat, re->viewmat, obr->obmat); + invert_m4_m4(imat, obmat); + + /* for objects instanced by dupliverts/faces/particles, we go over the + * list of instances to find ones that instance obr, and setup their + * matrices and obr pointer */ + for (obi=re->instancetable.last; obi; obi=obi->prev) { + if (!obi->obr && obi->ob == obr->ob && obi->psysindex == obr->psysindex) { + obi->obr= obr; + + /* compute difference between object matrix and + * object matrix with dupli transform, in viewspace */ + copy_m4_m4(obimat, obi->mat); + mul_m4_m4m4(obi->mat, obimat, imat); + + copy_m3_m4(nmat, obi->mat); + invert_m3_m3(obi->nmat, nmat); + transpose_m3(obi->nmat); + + if (dob) { + copy_v3_v3(obi->dupliorco, dob->orco); + obi->dupliuv[0]= dob->uv[0]; + obi->dupliuv[1]= dob->uv[1]; + } + + if (!first) { + re->totvert += obr->totvert; + re->totvlak += obr->totvlak; + re->tothalo += obr->tothalo; + re->totstrand += obr->totstrand; + } + else + first= 0; + } + } +} + +static void assign_dupligroup_dupli(Render *re, ObjectInstanceRen *obi, ObjectRen *obr, DupliObject *dob) +{ + float imat[4][4], obmat[4][4], obimat[4][4], nmat[3][3]; + + mul_m4_m4m4(obmat, re->viewmat, obr->obmat); + invert_m4_m4(imat, obmat); + + obi->obr= obr; + + /* compute difference between object matrix and + * object matrix with dupli transform, in viewspace */ + copy_m4_m4(obimat, obi->mat); + mul_m4_m4m4(obi->mat, obimat, imat); + + copy_m3_m4(nmat, obi->mat); + invert_m3_m3(obi->nmat, nmat); + transpose_m3(obi->nmat); + + if (dob) { + copy_v3_v3(obi->dupliorco, dob->orco); + obi->dupliuv[0]= dob->uv[0]; + obi->dupliuv[1]= dob->uv[1]; + } + + re->totvert += obr->totvert; + re->totvlak += obr->totvlak; + re->tothalo += obr->tothalo; + re->totstrand += obr->totstrand; +} + +static ObjectRen *find_dupligroup_dupli(Render *re, Object *ob, int psysindex) +{ + ObjectRen *obr; + + /* if the object is itself instanced, we don't want to create an instance + * for it */ + if (ob->transflag & OB_RENDER_DUPLI) + return NULL; + + /* try to find an object that was already created so we can reuse it + * and save memory */ + for (obr=re->objecttable.first; obr; obr=obr->next) + if (obr->ob == ob && obr->psysindex == psysindex && (obr->flag & R_INSTANCEABLE)) + return obr; + + return NULL; +} + +static void set_dupli_tex_mat(Render *re, ObjectInstanceRen *obi, DupliObject *dob, float omat[4][4]) +{ + /* For duplis we need to have a matrix that transform the coordinate back + * to it's original position, without the dupli transforms. We also check + * the matrix is actually needed, to save memory on lots of dupliverts for + * example */ + static Object *lastob= NULL; + static int needtexmat= 0; + + /* init */ + if (!re) { + lastob= NULL; + needtexmat= 0; + return; + } + + /* check if we actually need it */ + if (lastob != dob->ob) { + Material ***material; + short a, *totmaterial; + + lastob= dob->ob; + needtexmat= 0; + + totmaterial= give_totcolp(dob->ob); + material= give_matarar(dob->ob); + + if (totmaterial && material) + for (a= 0; a<*totmaterial; a++) + if ((*material)[a] && (*material)[a]->texco & TEXCO_OBJECT) + needtexmat= 1; + } + + if (needtexmat) { + float imat[4][4]; + + obi->duplitexmat= BLI_memarena_alloc(re->memArena, sizeof(float)*4*4); + invert_m4_m4(imat, dob->mat); + mul_m4_series(obi->duplitexmat, re->viewmat, omat, imat, re->viewinv); + } + + copy_v3_v3(obi->dupliorco, dob->orco); + copy_v2_v2(obi->dupliuv, dob->uv); +} + +static void init_render_object_data(Render *re, ObjectRen *obr, int timeoffset) +{ + Object *ob= obr->ob; + ParticleSystem *psys; + int i; + + if (obr->psysindex) { + if ((!obr->prev || obr->prev->ob != ob || (obr->prev->flag & R_INSTANCEABLE)==0) && ob->type==OB_MESH) { + /* the emitter mesh wasn't rendered so the modifier stack wasn't + * evaluated with render settings */ + DerivedMesh *dm; + const CustomDataMask mask = CD_MASK_RENDER_INTERNAL; + + if (re->r.scemode & R_VIEWPORT_PREVIEW) + dm = mesh_create_derived_view(re->scene, ob, mask); + else + dm = mesh_create_derived_render(re->scene, ob, mask); + dm->release(dm); + } + + for (psys=ob->particlesystem.first, i=0; i<obr->psysindex-1; i++) + psys= psys->next; + + render_new_particle_system(re, obr, psys, timeoffset); + } + else { + if (ELEM(ob->type, OB_FONT, OB_CURVE)) + init_render_curve(re, obr, timeoffset); + else if (ob->type==OB_SURF) + init_render_surf(re, obr, timeoffset); + else if (ob->type==OB_MESH) + init_render_mesh(re, obr, timeoffset); + else if (ob->type==OB_MBALL) + init_render_mball(re, obr); + } + + finalize_render_object(re, obr, timeoffset); + + re->totvert += obr->totvert; + re->totvlak += obr->totvlak; + re->tothalo += obr->tothalo; + re->totstrand += obr->totstrand; +} + +static void add_render_object(Render *re, Object *ob, Object *par, DupliObject *dob, float omat[4][4], int timeoffset) +{ + ObjectRen *obr; + ObjectInstanceRen *obi; + ParticleSystem *psys; + int show_emitter, allow_render= 1, index, psysindex, i; + + index= (dob)? dob->persistent_id[0]: 0; + + /* It seems that we may generate psys->renderdata recursively in some nasty intricated cases of + * several levels of bupliobject (see T51524). + * For now, basic rule is, do not restore psys if it was already in 'render state'. + * Another, more robust solution could be to add some reference counting to that renderdata... */ + bool psys_has_renderdata = false; + + /* the emitter has to be processed first (render levels of modifiers) */ + /* so here we only check if the emitter should be rendered */ + if (ob->particlesystem.first) { + show_emitter= 0; + for (psys=ob->particlesystem.first; psys; psys=psys->next) { + show_emitter += psys->part->draw & PART_DRAW_EMITTER; + if (!(re->r.scemode & R_VIEWPORT_PREVIEW)) { + psys_has_renderdata |= (psys->renderdata != NULL); + psys_render_set(ob, psys, re->viewmat, re->winmat, re->winx, re->winy, timeoffset); + } + } + + /* if no psys has "show emitter" selected don't render emitter */ + if (show_emitter == 0) + allow_render= 0; + } + + /* one render object for the data itself */ + if (allow_render) { + obr= RE_addRenderObject(re, ob, par, index, 0, ob->lay); + if ((dob && !dob->animated) || (ob->transflag & OB_RENDER_DUPLI)) { + obr->flag |= R_INSTANCEABLE; + copy_m4_m4(obr->obmat, ob->obmat); + } + init_render_object_data(re, obr, timeoffset); + + /* only add instance for objects that have not been used for dupli */ + if (!(ob->transflag & OB_RENDER_DUPLI)) { + obi = RE_addRenderInstance(re, obr, ob, par, index, 0, NULL, ob->lay, dob); + if (dob) set_dupli_tex_mat(re, obi, dob, omat); + } + else + find_dupli_instances(re, obr, dob); + + for (i=1; i<=ob->totcol; i++) { + Material* ma = give_render_material(re, ob, i); + if (ma && ma->material_type == MA_TYPE_VOLUME) + add_volume(re, obr, ma); + } + } + + /* and one render object per particle system */ + if (ob->particlesystem.first) { + psysindex= 1; + for (psys=ob->particlesystem.first; psys; psys=psys->next, psysindex++) { + if (!psys_check_enabled(ob, psys, G.is_rendering)) + continue; + + obr= RE_addRenderObject(re, ob, par, index, psysindex, ob->lay); + if ((dob && !dob->animated) || (ob->transflag & OB_RENDER_DUPLI)) { + obr->flag |= R_INSTANCEABLE; + copy_m4_m4(obr->obmat, ob->obmat); + } + if (dob) + psys->flag |= PSYS_USE_IMAT; + init_render_object_data(re, obr, timeoffset); + if (!(re->r.scemode & R_VIEWPORT_PREVIEW) && !psys_has_renderdata) { + psys_render_restore(ob, psys); + } + psys->flag &= ~PSYS_USE_IMAT; + + /* only add instance for objects that have not been used for dupli */ + if (!(ob->transflag & OB_RENDER_DUPLI)) { + obi = RE_addRenderInstance(re, obr, ob, par, index, psysindex, NULL, ob->lay, dob); + if (dob) set_dupli_tex_mat(re, obi, dob, omat); + } + else + find_dupli_instances(re, obr, dob); + } + } +} + +/* par = pointer to duplicator parent, needed for object lookup table */ +/* index = when duplicater copies same object (particle), the counter */ +static void init_render_object(Render *re, Object *ob, Object *par, DupliObject *dob, float omat[4][4], int timeoffset) +{ + static double lasttime= 0.0; + double time; + float mat[4][4]; + + if (ob->type==OB_LAMP) + add_render_lamp(re, ob); + else if (render_object_type(ob->type)) + add_render_object(re, ob, par, dob, omat, timeoffset); + else { + mul_m4_m4m4(mat, re->viewmat, ob->obmat); + invert_m4_m4(ob->imat, mat); + } + + time= PIL_check_seconds_timer(); + if (time - lasttime > 1.0) { + lasttime= time; + /* clumsy copying still */ + re->i.totvert= re->totvert; + re->i.totface= re->totvlak; + re->i.totstrand= re->totstrand; + re->i.tothalo= re->tothalo; + re->i.totlamp= re->totlamp; + re->stats_draw(re->sdh, &re->i); + } + + ob->flag |= OB_DONE; +} + +void RE_Database_Free(Render *re) +{ + LampRen *lar; + + /* will crash if we try to free empty database */ + if (!re->i.convertdone) + return; + + /* statistics for debugging render memory usage */ + if ((G.debug & G_DEBUG) && (G.is_rendering)) { + if ((re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))==0) { + BKE_image_print_memlist(); + MEM_printmemlist_stats(); + } + } + + /* FREE */ + + for (lar= re->lampren.first; lar; lar= lar->next) { + freeshadowbuf(lar); + if (lar->jitter) MEM_freeN(lar->jitter); + if (lar->shadsamp) MEM_freeN(lar->shadsamp); + if (lar->sunsky) MEM_freeN(lar->sunsky); + curvemapping_free(lar->curfalloff); + } + + free_volume_precache(re); + + BLI_freelistN(&re->lampren); + BLI_freelistN(&re->lights); + + free_renderdata_tables(re); + + /* free orco */ + free_mesh_orco_hash(re); + + if (re->main) { + end_render_materials(re->main); + end_render_textures(re); + free_pointdensities(re); + } + + free_camera_inside_volumes(re); + + if (re->wrld.aosphere) { + MEM_freeN(re->wrld.aosphere); + re->wrld.aosphere= NULL; + if (re->scene && re->scene->world) + re->scene->world->aosphere= NULL; + } + if (re->wrld.aotables) { + MEM_freeN(re->wrld.aotables); + re->wrld.aotables= NULL; + if (re->scene && re->scene->world) + re->scene->world->aotables= NULL; + } + if (re->r.mode & R_RAYTRACE) + free_render_qmcsampler(re); + + if (re->r.mode & R_RAYTRACE) freeraytree(re); + + free_sss(re); + free_occ(re); + free_strand_surface(re); + + re->totvlak=re->totvert=re->totstrand=re->totlamp=re->tothalo= 0; + re->i.convertdone = false; + + re->bakebuf= NULL; + + if (re->scene) + if (re->scene->r.scemode & R_FREE_IMAGE) + if ((re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))==0) + BKE_image_free_all_textures(); + + if (re->memArena) { + BLI_memarena_free(re->memArena); + re->memArena = NULL; + } +} + +static int allow_render_object(Render *re, Object *ob, int nolamps, int onlyselected, Object *actob) +{ + if (is_object_hidden(re, ob)) + return 0; + + /* Only handle dupli-hiding here if there is no particle systems. Else, let those handle show/noshow. */ + if (!ob->particlesystem.first) { + if ((ob->transflag & OB_DUPLI) && !(ob->transflag & OB_DUPLIFRAMES)) { + return 0; + } + } + + /* don't add non-basic meta objects, ends up having renderobjects with no geometry */ + if (ob->type == OB_MBALL && ob!=BKE_mball_basis_find(re->eval_ctx, re->scene, ob)) + return 0; + + if (nolamps && (ob->type==OB_LAMP)) + return 0; + + if (onlyselected && (ob!=actob && !(ob->flag & SELECT))) + return 0; + + return 1; +} + +static int allow_render_dupli_instance(Render *UNUSED(re), DupliObject *dob, Object *obd) +{ + ParticleSystem *psys; + Material *ma; + short a, *totmaterial; + + /* don't allow objects with halos. we need to have + * all halo's to sort them globally in advance */ + totmaterial= give_totcolp(obd); + + if (totmaterial) { + for (a= 0; a<*totmaterial; a++) { + ma= give_current_material(obd, a + 1); + if (ma && (ma->material_type == MA_TYPE_HALO)) + return 0; + } + } + + for (psys=obd->particlesystem.first; psys; psys=psys->next) + if (!ELEM(psys->part->ren_as, PART_DRAW_BB, PART_DRAW_LINE, PART_DRAW_PATH, PART_DRAW_OB, PART_DRAW_GR)) + return 0; + + /* don't allow lamp, animated duplis, or radio render */ + return (render_object_type(obd->type) && + (!(dob->type == OB_DUPLIGROUP) || !dob->animated)); +} + +static void dupli_render_particle_set(Render *re, Object *ob, int timeoffset, int level, int enable) +{ + /* ugly function, but we need to set particle systems to their render + * settings before calling object_duplilist, to get render level duplis */ + Group *group; + GroupObject *go; + ParticleSystem *psys; + DerivedMesh *dm; + + if (re->r.scemode & R_VIEWPORT_PREVIEW) + return; + + if (level >= MAX_DUPLI_RECUR) + return; + + if (ob->transflag & OB_DUPLIPARTS) { + for (psys=ob->particlesystem.first; psys; psys=psys->next) { + if (ELEM(psys->part->ren_as, PART_DRAW_OB, PART_DRAW_GR)) { + if (enable) + psys_render_set(ob, psys, re->viewmat, re->winmat, re->winx, re->winy, timeoffset); + else + psys_render_restore(ob, psys); + } + } + + if (enable) { + /* this is to make sure we get render level duplis in groups: + * the derivedmesh must be created before init_render_mesh, + * since object_duplilist does dupliparticles before that */ + dm = mesh_create_derived_render(re->scene, ob, CD_MASK_RENDER_INTERNAL); + dm->release(dm); + + for (psys=ob->particlesystem.first; psys; psys=psys->next) + psys_get_modifier(ob, psys)->flag &= ~eParticleSystemFlag_psys_updated; + } + } + + if (ob->dup_group==NULL) return; + group= ob->dup_group; + + for (go= group->gobject.first; go; go= go->next) + dupli_render_particle_set(re, go->ob, timeoffset, level+1, enable); +} + +static int get_vector_renderlayers(Scene *sce) +{ + SceneRenderLayer *srl; + unsigned int lay= 0; + + for (srl= sce->r.layers.first; srl; srl= srl->next) + if (srl->passflag & SCE_PASS_VECTOR) + lay |= srl->lay; + + return lay; +} + +static void add_group_render_dupli_obs(Render *re, Group *group, int nolamps, int onlyselected, Object *actob, int timeoffset, int level) +{ + GroupObject *go; + Object *ob; + + /* simple preventing of too deep nested groups */ + if (level>MAX_DUPLI_RECUR) return; + + /* recursively go into dupligroups to find objects with OB_RENDER_DUPLI + * that were not created yet */ + for (go= group->gobject.first; go; go= go->next) { + ob= go->ob; + + if (ob->flag & OB_DONE) { + if (ob->transflag & OB_RENDER_DUPLI) { + if (allow_render_object(re, ob, nolamps, onlyselected, actob)) { + init_render_object(re, ob, NULL, NULL, NULL, timeoffset); + ob->transflag &= ~OB_RENDER_DUPLI; + + if (ob->dup_group) + add_group_render_dupli_obs(re, ob->dup_group, nolamps, onlyselected, actob, timeoffset, level+1); + } + } + } + } +} + +static void database_init_objects(Render *re, unsigned int renderlay, int nolamps, int onlyselected, Object *actob, int timeoffset) +{ + Base *base; + Object *ob; + Group *group; + ObjectInstanceRen *obi; + Scene *sce_iter; + int lay, vectorlay; + + /* for duplis we need the Object texture mapping to work as if + * untransformed, set_dupli_tex_mat sets the matrix to allow that + * NULL is just for init */ + set_dupli_tex_mat(NULL, NULL, NULL, NULL); + + /* loop over all objects rather then using SETLOOPER because we may + * reference an mtex-mapped object which isn't rendered or is an + * empty in a dupli group. We could scan all render material/lamp/world + * mtex's for mapto objects but its easier just to set the + * 'imat' / 'imat_ren' on all and unlikely to be a performance hit + * See bug: [#28744] - campbell */ + for (ob= re->main->object.first; ob; ob= ob->id.next) { + float mat[4][4]; + + /* imat objects has to be done here, since displace can have texture using Object map-input */ + mul_m4_m4m4(mat, re->viewmat, ob->obmat); + invert_m4_m4(ob->imat_ren, mat); + copy_m4_m4(ob->imat, ob->imat_ren); + /* each object should only be rendered once */ + ob->flag &= ~OB_DONE; + ob->transflag &= ~OB_RENDER_DUPLI; + } + + for (SETLOOPER(re->scene, sce_iter, base)) { + ob= base->object; + + /* in the prev/next pass for making speed vectors, avoid creating + * objects that are not on a renderlayer with a vector pass, can + * save a lot of time in complex scenes */ + vectorlay= get_vector_renderlayers(re->scene); + lay= (timeoffset)? renderlay & vectorlay: renderlay; + + /* if the object has been restricted from rendering in the outliner, ignore it */ + if (is_object_restricted(re, ob)) continue; + + /* OB_DONE means the object itself got duplicated, so was already converted */ + if (ob->flag & OB_DONE) { + /* OB_RENDER_DUPLI means instances for it were already created, now + * it still needs to create the ObjectRen containing the data */ + if (ob->transflag & OB_RENDER_DUPLI) { + if (allow_render_object(re, ob, nolamps, onlyselected, actob)) { + init_render_object(re, ob, NULL, NULL, NULL, timeoffset); + ob->transflag &= ~OB_RENDER_DUPLI; + } + } + } + else if ((base->lay & lay) || (ob->type==OB_LAMP && (base->lay & re->lay)) ) { + if ((ob->transflag & OB_DUPLI) && (ob->type!=OB_MBALL)) { + DupliObject *dob; + ListBase *duplilist; + DupliApplyData *duplilist_apply_data = NULL; + int i; + + /* create list of duplis generated by this object, particle + * system need to have render settings set for dupli particles */ + dupli_render_particle_set(re, ob, timeoffset, 0, 1); + duplilist = object_duplilist(re->eval_ctx, re->scene, ob); + duplilist_apply_data = duplilist_apply(ob, NULL, duplilist); + /* postpone 'dupli_render_particle_set', since RE_addRenderInstance reads + * index values from 'dob->persistent_id[0]', referencing 'psys->child' which + * may be smaller once the particle system is restored, see: T45563. */ + + for (dob= duplilist->first, i = 0; dob; dob= dob->next, ++i) { + DupliExtraData *dob_extra = &duplilist_apply_data->extra[i]; + Object *obd= dob->ob; + + copy_m4_m4(obd->obmat, dob->mat); + + /* group duplis need to set ob matrices correct, for deform. so no_draw is part handled */ + if (!(obd->transflag & OB_RENDER_DUPLI) && dob->no_draw) + continue; + + if (is_object_hidden(re, obd)) + continue; + + if (obd->type==OB_MBALL) + continue; + + if (!allow_render_object(re, obd, nolamps, onlyselected, actob)) + continue; + + if (allow_render_dupli_instance(re, dob, obd)) { + ParticleSystem *psys; + ObjectRen *obr = NULL; + int psysindex; + float mat[4][4]; + + obi=NULL; + + /* instances instead of the actual object are added in two cases, either + * this is a duplivert/face/particle, or it is a non-animated object in + * a dupligroup that has already been created before */ + if (dob->type != OB_DUPLIGROUP || (obr=find_dupligroup_dupli(re, obd, 0))) { + mul_m4_m4m4(mat, re->viewmat, dob->mat); + /* ob = particle system, use that layer */ + obi = RE_addRenderInstance(re, NULL, obd, ob, dob->persistent_id[0], 0, mat, ob->lay, dob); + + /* fill in instance variables for texturing */ + set_dupli_tex_mat(re, obi, dob, dob_extra->obmat); + if (dob->type != OB_DUPLIGROUP) { + copy_v3_v3(obi->dupliorco, dob->orco); + obi->dupliuv[0]= dob->uv[0]; + obi->dupliuv[1]= dob->uv[1]; + } + else { + /* for the second case, setup instance to point to the already + * created object, and possibly setup instances if this object + * itself was duplicated. for the first case find_dupli_instances + * will be called later. */ + assign_dupligroup_dupli(re, obi, obr, dob); + if (obd->transflag & OB_RENDER_DUPLI) + find_dupli_instances(re, obr, dob); + } + } + + /* same logic for particles, each particle system has it's own object, so + * need to go over them separately */ + psysindex= 1; + for (psys=obd->particlesystem.first; psys; psys=psys->next) { + if (dob->type != OB_DUPLIGROUP || (obr=find_dupligroup_dupli(re, obd, psysindex))) { + if (obi == NULL) + mul_m4_m4m4(mat, re->viewmat, dob->mat); + obi = RE_addRenderInstance(re, NULL, obd, ob, dob->persistent_id[0], psysindex++, mat, obd->lay, dob); + + set_dupli_tex_mat(re, obi, dob, dob_extra->obmat); + if (dob->type != OB_DUPLIGROUP) { + copy_v3_v3(obi->dupliorco, dob->orco); + obi->dupliuv[0]= dob->uv[0]; + obi->dupliuv[1]= dob->uv[1]; + } + else { + assign_dupligroup_dupli(re, obi, obr, dob); + if (obd->transflag & OB_RENDER_DUPLI) + find_dupli_instances(re, obr, dob); + } + } + } + + if (obi==NULL) + /* can't instance, just create the object */ + init_render_object(re, obd, ob, dob, dob_extra->obmat, timeoffset); + + if (dob->type != OB_DUPLIGROUP) { + obd->flag |= OB_DONE; + obd->transflag |= OB_RENDER_DUPLI; + } + } + else + init_render_object(re, obd, ob, dob, dob_extra->obmat, timeoffset); + + if (re->test_break(re->tbh)) break; + } + + /* restore particle system */ + dupli_render_particle_set(re, ob, timeoffset, 0, false); + + if (duplilist_apply_data) { + duplilist_restore(duplilist, duplilist_apply_data); + duplilist_free_apply_data(duplilist_apply_data); + } + free_object_duplilist(duplilist); + + if (allow_render_object(re, ob, nolamps, onlyselected, actob)) + init_render_object(re, ob, NULL, NULL, NULL, timeoffset); + } + else if (allow_render_object(re, ob, nolamps, onlyselected, actob)) + init_render_object(re, ob, NULL, NULL, NULL, timeoffset); + } + + if (re->test_break(re->tbh)) break; + } + + /* objects in groups with OB_RENDER_DUPLI set still need to be created, + * since they may not be part of the scene */ + for (group= re->main->group.first; group; group=group->id.next) + add_group_render_dupli_obs(re, group, nolamps, onlyselected, actob, timeoffset, 0); + + if (!re->test_break(re->tbh)) + RE_makeRenderInstances(re); +} + +/* used to be 'rotate scene' */ +void RE_Database_FromScene(Render *re, Main *bmain, Scene *scene, unsigned int lay, int use_camera_view) +{ + Scene *sce; + Object *camera; + float mat[4][4]; + float amb[3]; + + re->main= bmain; + re->scene= scene; + re->lay= lay; + + if (re->r.scemode & R_VIEWPORT_PREVIEW) + re->scene_color_manage = BKE_scene_check_color_management_enabled(scene); + + /* scene needs to be set to get camera */ + camera= RE_GetCamera(re); + + /* per second, per object, stats print this */ + re->i.infostr= "Preparing Scene data"; + re->i.cfra= scene->r.cfra; + BLI_strncpy(re->i.scene_name, scene->id.name + 2, sizeof(re->i.scene_name)); + + /* XXX add test if dbase was filled already? */ + + re->memArena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "render db arena"); + re->totvlak=re->totvert=re->totstrand=re->totlamp=re->tothalo= 0; + re->lights.first= re->lights.last= NULL; + re->lampren.first= re->lampren.last= NULL; + + re->i.partsdone = false; /* signal now in use for previewrender */ + + /* in localview, lamps are using normal layers, objects only local bits */ + if (re->lay & 0xFF000000) + lay &= 0xFF000000; + + /* applies changes fully */ + if ((re->r.scemode & (R_NO_FRAME_UPDATE|R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))==0) { + BKE_scene_update_for_newframe(re->eval_ctx, re->main, re->scene, lay); + render_update_anim_renderdata(re, &re->scene->r); + } + + /* if no camera, viewmat should have been set! */ + if (use_camera_view && camera) { + /* called before but need to call again in case of lens animation from the + * above call to BKE_scene_update_for_newframe, fixes bug. [#22702]. + * following calls don't depend on 'RE_SetCamera' */ + RE_SetCamera(re, camera); + RE_GetCameraModelMatrix(re, camera, mat); + invert_m4(mat); + RE_SetView(re, mat); + + /* force correct matrix for scaled cameras */ + DAG_id_tag_update_ex(re->main, &camera->id, OB_RECALC_OB); + } + + /* store for incremental render, viewmat rotates dbase */ + copy_m4_m4(re->viewmat_orig, re->viewmat); + + init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */ + if (re->r.mode & R_RAYTRACE) { + init_render_qmcsampler(re); + + if (re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) + if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT) + init_ao_sphere(re, &re->wrld); + } + + /* still bad... doing all */ + init_render_textures(re); + copy_v3_v3(amb, &re->wrld.ambr); + init_render_materials(re->main, re->r.mode, amb, (re->r.scemode & R_BUTS_PREVIEW) == 0); + set_node_shader_lamp_loop(shade_material_loop); + + /* MAKE RENDER DATA */ + database_init_objects(re, lay, 0, 0, NULL, 0); + + if (!re->test_break(re->tbh)) { + set_material_lightgroups(re); + for (sce= re->scene; sce; sce= sce->set) + set_renderlayer_lightgroups(re, sce); + + /* for now some clumsy copying still */ + re->i.totvert= re->totvert; + re->i.totface= re->totvlak; + re->i.totstrand= re->totstrand; + re->i.tothalo= re->tothalo; + re->i.totlamp= re->totlamp; + re->stats_draw(re->sdh, &re->i); + } +} + +void RE_Database_Preprocess(Render *re) +{ + if (!re->test_break(re->tbh)) { + int tothalo; + + tothalo= re->tothalo; + sort_halos(re, tothalo); + + init_camera_inside_volumes(re); + + re->i.infostr = IFACE_("Creating Shadowbuffers"); + re->stats_draw(re->sdh, &re->i); + + /* SHADOW BUFFER */ + threaded_makeshadowbufs(re); + + /* old code checked for internal render (aka not yafray) */ + { + /* raytree */ + if (!re->test_break(re->tbh)) { + if (re->r.mode & R_RAYTRACE) { + makeraytree(re); + } + } + /* ENVIRONMENT MAPS */ + if (!re->test_break(re->tbh)) + make_envmaps(re); + + /* point density texture */ + if (!re->test_break(re->tbh)) + make_pointdensities(re); + /* voxel data texture */ + if (!re->test_break(re->tbh)) + make_voxeldata(re); + } + + if (!re->test_break(re->tbh)) + project_renderdata(re, projectverto, (re->r.mode & R_PANORAMA) != 0, 0, 1); + + /* Occlusion */ + if ((re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) && !re->test_break(re->tbh)) + if (re->wrld.ao_gather_method == WO_AOGATHER_APPROX) + if (re->r.mode & R_SHADOW) + make_occ_tree(re); + + /* SSS */ + if ((re->r.mode & R_SSS) && !re->test_break(re->tbh)) + make_sss_tree(re); + + if (!re->test_break(re->tbh)) + if (re->r.mode & R_RAYTRACE) + volume_precache(re); + } + + re->i.convertdone = true; + + if (re->test_break(re->tbh)) + RE_Database_Free(re); + + re->i.infostr = NULL; + re->stats_draw(re->sdh, &re->i); +} + +/* exported call to recalculate hoco for vertices, when winmat changed */ +void RE_DataBase_ApplyWindow(Render *re) +{ + project_renderdata(re, projectverto, 0, 0, 0); +} + +/* exported call to rotate render data again, when viewmat changed */ +void RE_DataBase_IncrementalView(Render *re, float viewmat[4][4], int restore) +{ + float oldviewinv[4][4], tmat[4][4]; + + invert_m4_m4(oldviewinv, re->viewmat_orig); + + /* we have to correct for the already rotated vertexcoords */ + mul_m4_m4m4(tmat, viewmat, oldviewinv); + + copy_m4_m4(re->viewmat, viewmat); + invert_m4_m4(re->viewinv, re->viewmat); + + init_camera_inside_volumes(re); + + env_rotate_scene(re, tmat, !restore); + + /* SSS points distribution depends on view */ + if ((re->r.mode & R_SSS) && !re->test_break(re->tbh)) + make_sss_tree(re); +} + + +void RE_DataBase_GetView(Render *re, float mat[4][4]) +{ + copy_m4_m4(mat, re->viewmat); +} + +/* ------------------------------------------------------------------------- */ +/* Speed Vectors */ +/* ------------------------------------------------------------------------- */ + +static void database_fromscene_vectors(Render *re, Scene *scene, unsigned int lay, int timeoffset) +{ + Object *camera= RE_GetCamera(re); + float mat[4][4]; + + re->scene= scene; + re->lay= lay; + + /* XXX add test if dbase was filled already? */ + + re->memArena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "vector render db arena"); + re->totvlak=re->totvert=re->totstrand=re->totlamp=re->tothalo= 0; + re->i.totface=re->i.totvert=re->i.totstrand=re->i.totlamp=re->i.tothalo= 0; + re->lights.first= re->lights.last= NULL; + + /* in localview, lamps are using normal layers, objects only local bits */ + if (re->lay & 0xFF000000) + lay &= 0xFF000000; + + /* applies changes fully */ + scene->r.cfra += timeoffset; + BKE_scene_update_for_newframe(re->eval_ctx, re->main, re->scene, lay); + + /* if no camera, viewmat should have been set! */ + if (camera) { + RE_GetCameraModelMatrix(re, camera, mat); + normalize_m4(mat); + invert_m4(mat); + RE_SetView(re, mat); + } + + /* MAKE RENDER DATA */ + database_init_objects(re, lay, 0, 0, NULL, timeoffset); + + if (!re->test_break(re->tbh)) + project_renderdata(re, projectverto, (re->r.mode & R_PANORAMA) != 0, 0, 1); + + /* do this in end, particles for example need cfra */ + scene->r.cfra -= timeoffset; +} + +/* choose to use static, to prevent giving too many args to this call */ +static void speedvector_project(Render *re, float zco[2], const float co[3], const float ho[4]) +{ + static float pixelphix=0.0f, pixelphiy=0.0f, zmulx=0.0f, zmuly=0.0f; + static int pano= 0; + float div; + + /* initialize */ + if (re) { + pano= re->r.mode & R_PANORAMA; + + /* precalculate amount of radians 1 pixel rotates */ + if (pano) { + /* size of 1 pixel mapped to viewplane coords */ + float psize; + + psize = BLI_rctf_size_x(&re->viewplane) / (float)re->winx; + /* x angle of a pixel */ + pixelphix = atan(psize / re->clipsta); + + psize = BLI_rctf_size_y(&re->viewplane) / (float)re->winy; + /* y angle of a pixel */ + pixelphiy = atan(psize / re->clipsta); + } + zmulx= re->winx/2; + zmuly= re->winy/2; + + return; + } + + /* now map hocos to screenspace, uses very primitive clip still */ + if (ho[3]<0.1f) div= 10.0f; + else div= 1.0f/ho[3]; + + /* use cylinder projection */ + if (pano) { + float vec[3], ang; + /* angle between (0, 0, -1) and (co) */ + copy_v3_v3(vec, co); + + ang= saacos(-vec[2]/sqrtf(vec[0]*vec[0] + vec[2]*vec[2])); + if (vec[0]<0.0f) ang= -ang; + zco[0]= ang/pixelphix + zmulx; + + ang= 0.5f*(float)M_PI - saacos(vec[1] / len_v3(vec)); + zco[1]= ang/pixelphiy + zmuly; + + } + else { + zco[0]= zmulx*(1.0f+ho[0]*div); + zco[1]= zmuly*(1.0f+ho[1]*div); + } +} + +static void calculate_speedvector(const float vectors[2], int step, float winsq, float winroot, const float co[3], const float ho[4], float speed[4]) +{ + float zco[2], len; + + speedvector_project(NULL, zco, co, ho); + + zco[0]= vectors[0] - zco[0]; + zco[1]= vectors[1] - zco[1]; + + /* enable nice masks for hardly moving stuff or float inaccuracy */ + if (zco[0]<0.1f && zco[0]>-0.1f && zco[1]<0.1f && zco[1]>-0.1f ) { + zco[0]= 0.0f; + zco[1]= 0.0f; + } + + /* maximize speed for image width, otherwise it never looks good */ + len= zco[0]*zco[0] + zco[1]*zco[1]; + if (len > winsq) { + len= winroot/sqrtf(len); + zco[0]*= len; + zco[1]*= len; + } + + /* note; in main vecblur loop speedvec is negated again */ + if (step) { + speed[2]= -zco[0]; + speed[3]= -zco[1]; + } + else { + speed[0]= zco[0]; + speed[1]= zco[1]; + } +} + +static float *calculate_strandsurface_speedvectors(Render *re, ObjectInstanceRen *obi, StrandSurface *mesh) +{ + if (mesh->co && mesh->prevco && mesh->nextco) { + float winsq= (float)re->winx*(float)re->winy; /* int's can wrap on large images */ + float winroot= sqrtf(winsq); + float (*winspeed)[4]; + float ho[4], prevho[4], nextho[4], winmat[4][4], vec[2]; + int a; + + if (obi->flag & R_TRANSFORMED) + mul_m4_m4m4(winmat, re->winmat, obi->mat); + else + copy_m4_m4(winmat, re->winmat); + + winspeed= MEM_callocN(sizeof(float)*4*mesh->totvert, "StrandSurfWin"); + + for (a=0; a<mesh->totvert; a++) { + projectvert(mesh->co[a], winmat, ho); + + projectvert(mesh->prevco[a], winmat, prevho); + speedvector_project(NULL, vec, mesh->prevco[a], prevho); + calculate_speedvector(vec, 0, winsq, winroot, mesh->co[a], ho, winspeed[a]); + + projectvert(mesh->nextco[a], winmat, nextho); + speedvector_project(NULL, vec, mesh->nextco[a], nextho); + calculate_speedvector(vec, 1, winsq, winroot, mesh->co[a], ho, winspeed[a]); + } + + return (float *)winspeed; + } + + return NULL; +} + +static void calculate_speedvectors(Render *re, ObjectInstanceRen *obi, float *vectors, int step) +{ + ObjectRen *obr= obi->obr; + VertRen *ver= NULL; + StrandRen *strand= NULL; + StrandBuffer *strandbuf; + StrandSurface *mesh= NULL; + float *speed, (*winspeed)[4]=NULL, ho[4], winmat[4][4]; + float *co1, *co2, *co3, *co4, w[4]; + float winsq = (float)re->winx * (float)re->winy, winroot = sqrtf(winsq); /* int's can wrap on large images */ + int a, *face, *index; + + if (obi->flag & R_TRANSFORMED) + mul_m4_m4m4(winmat, re->winmat, obi->mat); + else + copy_m4_m4(winmat, re->winmat); + + if (obr->vertnodes) { + for (a=0; a<obr->totvert; a++, vectors+=2) { + if ((a & 255)==0) ver= obr->vertnodes[a>>8].vert; + else ver++; + + speed= RE_vertren_get_winspeed(obi, ver, 1); + projectvert(ver->co, winmat, ho); + calculate_speedvector(vectors, step, winsq, winroot, ver->co, ho, speed); + } + } + + if (obr->strandnodes) { + strandbuf= obr->strandbuf; + mesh= (strandbuf)? strandbuf->surface: NULL; + + /* compute speed vectors at surface vertices */ + if (mesh) + winspeed= (float(*)[4])calculate_strandsurface_speedvectors(re, obi, mesh); + + if (winspeed) { + for (a=0; a<obr->totstrand; a++, vectors+=2) { + if ((a & 255)==0) strand= obr->strandnodes[a>>8].strand; + else strand++; + + index= RE_strandren_get_face(obr, strand, 0); + if (index && *index < mesh->totface) { + speed= RE_strandren_get_winspeed(obi, strand, 1); + + /* interpolate speed vectors from strand surface */ + face= mesh->face[*index]; + + co1 = mesh->co[face[0]]; + co2 = mesh->co[face[1]]; + co3 = mesh->co[face[2]]; + + if (face[3]) { + co4 = mesh->co[face[3]]; + interp_weights_quad_v3(w, co1, co2, co3, co4, strand->vert->co); + } + else { + interp_weights_tri_v3(w, co1, co2, co3, strand->vert->co); + } + + zero_v4(speed); + madd_v4_v4fl(speed, winspeed[face[0]], w[0]); + madd_v4_v4fl(speed, winspeed[face[1]], w[1]); + madd_v4_v4fl(speed, winspeed[face[2]], w[2]); + if (face[3]) + madd_v4_v4fl(speed, winspeed[face[3]], w[3]); + } + } + + MEM_freeN(winspeed); + } + } +} + +static int load_fluidsimspeedvectors(Render *re, ObjectInstanceRen *obi, float *vectors, int step) +{ + ObjectRen *obr= obi->obr; + Object *fsob= obr->ob; + VertRen *ver= NULL; + float *speed, div, zco[2], avgvel[4] = {0.0, 0.0, 0.0, 0.0}; + float zmulx= re->winx/2, zmuly= re->winy/2, len; + float winsq = (float)re->winx * (float)re->winy, winroot= sqrtf(winsq); /* int's can wrap on large images */ + int a, j; + float hoco[4], ho[4], fsvec[4], camco[4]; + float mat[4][4], winmat[4][4]; + float imat[4][4]; + FluidsimModifierData *fluidmd = (FluidsimModifierData *)modifiers_findByType(fsob, eModifierType_Fluidsim); + FluidsimSettings *fss; + FluidVertexVelocity *velarray = NULL; + + /* only one step needed */ + if (step) return 1; + + if (fluidmd) + fss = fluidmd->fss; + else + return 0; + + copy_m4_m4(mat, re->viewmat); + invert_m4_m4(imat, mat); + + /* set first vertex OK */ + if (!fss->meshVelocities) return 0; + + if ( obr->totvert != fss->totvert) { + //fprintf(stderr, "load_fluidsimspeedvectors - modified fluidsim mesh, not using speed vectors (%d,%d)...\n", obr->totvert, fsob->fluidsimSettings->meshSurface->totvert); // DEBUG + return 0; + } + + velarray = fss->meshVelocities; + + if (obi->flag & R_TRANSFORMED) + mul_m4_m4m4(winmat, re->winmat, obi->mat); + else + copy_m4_m4(winmat, re->winmat); + + /* (bad) HACK calculate average velocity */ + /* better solution would be fixing getVelocityAt() in intern/elbeem/intern/solver_util.cpp + * so that also small drops/little water volumes return a velocity != 0. + * But I had no luck in fixing that function - DG */ + for (a=0; a<obr->totvert; a++) { + for (j=0;j<3;j++) avgvel[j] += velarray[a].vel[j]; + + } + for (j=0;j<3;j++) avgvel[j] /= (float)(obr->totvert); + + + for (a=0; a<obr->totvert; a++, vectors+=2) { + if ((a & 255)==0) + ver= obr->vertnodes[a>>8].vert; + else + ver++; + + /* get fluid velocity */ + fsvec[3] = 0.0f; + //fsvec[0] = fsvec[1] = fsvec[2] = fsvec[3] = 0.0; fsvec[2] = 2.0f; // NT fixed test + for (j=0;j<3;j++) fsvec[j] = velarray[a].vel[j]; + + /* (bad) HACK insert average velocity if none is there (see previous comment) */ + if ((fsvec[0] == 0.0f) && (fsvec[1] == 0.0f) && (fsvec[2] == 0.0f)) { + fsvec[0] = avgvel[0]; + fsvec[1] = avgvel[1]; + fsvec[2] = avgvel[2]; + } + + /* transform (=rotate) to cam space */ + camco[0] = dot_v3v3(imat[0], fsvec); + camco[1] = dot_v3v3(imat[1], fsvec); + camco[2] = dot_v3v3(imat[2], fsvec); + + /* get homogeneous coordinates */ + projectvert(camco, winmat, hoco); + projectvert(ver->co, winmat, ho); + + /* now map hocos to screenspace, uses very primitive clip still */ + /* use ho[3] of original vertex, xy component of vel. direction */ + if (ho[3]<0.1f) div= 10.0f; + else div= 1.0f/ho[3]; + zco[0]= zmulx*hoco[0]*div; + zco[1]= zmuly*hoco[1]*div; + + /* maximize speed as usual */ + len= zco[0]*zco[0] + zco[1]*zco[1]; + if (len > winsq) { + len= winroot/sqrtf(len); + zco[0]*= len; zco[1]*= len; + } + + speed= RE_vertren_get_winspeed(obi, ver, 1); + /* set both to the same value */ + speed[0]= speed[2]= zco[0]; + speed[1]= speed[3]= zco[1]; + //if (a < 20) fprintf(stderr,"speed %d %f,%f | camco %f,%f,%f | hoco %f,%f,%f,%f\n", a, speed[0], speed[1], camco[0],camco[1], camco[2], hoco[0],hoco[1], hoco[2],hoco[3]); // NT DEBUG + } + + return 1; +} + +/* makes copy per object of all vectors */ +/* result should be that we can free entire database */ +static void copy_dbase_object_vectors(Render *re, ListBase *lb) +{ + ObjectInstanceRen *obi, *obilb; + ObjectRen *obr; + VertRen *ver= NULL; + float *vec, ho[4], winmat[4][4]; + int a, totvector; + + for (obi= re->instancetable.first; obi; obi= obi->next) { + obr= obi->obr; + + obilb= MEM_mallocN(sizeof(ObjectInstanceRen), "ObInstanceVector"); + memcpy(obilb, obi, sizeof(ObjectInstanceRen)); + BLI_addtail(lb, obilb); + + obilb->totvector= totvector= obr->totvert; + + if (totvector > 0) { + vec= obilb->vectors= MEM_mallocN(2*sizeof(float)*totvector, "vector array"); + + if (obi->flag & R_TRANSFORMED) + mul_m4_m4m4(winmat, re->winmat, obi->mat); + else + copy_m4_m4(winmat, re->winmat); + + for (a=0; a<obr->totvert; a++, vec+=2) { + if ((a & 255)==0) ver= obr->vertnodes[a>>8].vert; + else ver++; + + projectvert(ver->co, winmat, ho); + speedvector_project(NULL, vec, ver->co, ho); + } + } + } +} + +static void free_dbase_object_vectors(ListBase *lb) +{ + ObjectInstanceRen *obi; + + for (obi= lb->first; obi; obi= obi->next) + if (obi->vectors) + MEM_freeN(obi->vectors); + BLI_freelistN(lb); +} + +void RE_Database_FromScene_Vectors(Render *re, Main *bmain, Scene *sce, unsigned int lay) +{ + ObjectInstanceRen *obi, *oldobi; + StrandSurface *mesh; + ListBase *table; + ListBase oldtable= {NULL, NULL}, newtable= {NULL, NULL}; + ListBase strandsurface; + int step; + + re->i.infostr = IFACE_("Calculating previous frame vectors"); + re->r.mode |= R_SPEED; + + speedvector_project(re, NULL, NULL, NULL); /* initializes projection code */ + + /* creates entire dbase */ + database_fromscene_vectors(re, sce, lay, -1); + + /* copy away vertex info */ + copy_dbase_object_vectors(re, &oldtable); + + /* free dbase and make the future one */ + strandsurface= re->strandsurface; + memset(&re->strandsurface, 0, sizeof(ListBase)); + re->i.convertdone = true; + RE_Database_Free(re); + re->strandsurface= strandsurface; + + if (!re->test_break(re->tbh)) { + /* creates entire dbase */ + re->i.infostr = IFACE_("Calculating next frame vectors"); + + database_fromscene_vectors(re, sce, lay, +1); + } + /* copy away vertex info */ + copy_dbase_object_vectors(re, &newtable); + + /* free dbase and make the real one */ + strandsurface= re->strandsurface; + memset(&re->strandsurface, 0, sizeof(ListBase)); + re->i.convertdone = true; + RE_Database_Free(re); + re->strandsurface= strandsurface; + + if (!re->test_break(re->tbh)) { + RE_Database_FromScene(re, bmain, sce, lay, 1); + RE_Database_Preprocess(re); + } + + if (!re->test_break(re->tbh)) { + int vectorlay= get_vector_renderlayers(re->scene); + + for (step= 0; step<2; step++) { + + if (step) + table= &newtable; + else + table= &oldtable; + + oldobi= table->first; + for (obi= re->instancetable.first; obi && oldobi; obi= obi->next) { + int ok= 1; + FluidsimModifierData *fluidmd; + + if (!(obi->lay & vectorlay)) + continue; + + obi->totvector= obi->obr->totvert; + + /* find matching object in old table */ + if (oldobi->ob!=obi->ob || oldobi->par!=obi->par || oldobi->index!=obi->index || oldobi->psysindex!=obi->psysindex) { + ok= 0; + for (oldobi= table->first; oldobi; oldobi= oldobi->next) + if (oldobi->ob==obi->ob && oldobi->par==obi->par && oldobi->index==obi->index && oldobi->psysindex==obi->psysindex) + break; + if (oldobi==NULL) + oldobi= table->first; + else + ok= 1; + } + if (ok==0) { + printf("speed table: missing object %s\n", obi->ob->id.name + 2); + continue; + } + + /* NT check for fluidsim special treatment */ + fluidmd = (FluidsimModifierData *)modifiers_findByType(obi->ob, eModifierType_Fluidsim); + if (fluidmd && fluidmd->fss && (fluidmd->fss->type & OB_FLUIDSIM_DOMAIN)) { + /* use preloaded per vertex simulation data, only does calculation for step=1 */ + /* NOTE/FIXME - velocities and meshes loaded unnecessarily often during the database_fromscene_vectors calls... */ + load_fluidsimspeedvectors(re, obi, oldobi->vectors, step); + } + else { + /* check if both have same amounts of vertices */ + if (obi->totvector==oldobi->totvector) + calculate_speedvectors(re, obi, oldobi->vectors, step); + else + printf("Warning: object %s has different amount of vertices or strands on other frame\n", obi->ob->id.name + 2); + } /* not fluidsim */ + + oldobi= oldobi->next; + } + } + } + + free_dbase_object_vectors(&oldtable); + free_dbase_object_vectors(&newtable); + + for (mesh=re->strandsurface.first; mesh; mesh=mesh->next) { + if (mesh->prevco) { + MEM_freeN(mesh->prevco); + mesh->prevco= NULL; + } + if (mesh->nextco) { + MEM_freeN(mesh->nextco); + mesh->nextco= NULL; + } + } + + re->i.infostr = NULL; + re->stats_draw(re->sdh, &re->i); +} + + +/* ------------------------------------------------------------------------- */ +/* Baking */ +/* ------------------------------------------------------------------------- */ + +/* setup for shaded view or bake, so only lamps and materials are initialized */ +/* type: + * RE_BAKE_LIGHT: for shaded view, only add lamps + * RE_BAKE_ALL: for baking, all lamps and objects + * RE_BAKE_NORMALS:for baking, no lamps and only selected objects + * RE_BAKE_AO: for baking, no lamps, but all objects + * RE_BAKE_TEXTURE:for baking, no lamps, only selected objects + * RE_BAKE_VERTEX_COLORS:for baking, no lamps, only selected objects + * RE_BAKE_DISPLACEMENT:for baking, no lamps, only selected objects + * RE_BAKE_DERIVATIVE:for baking, no lamps, only selected objects + * RE_BAKE_SHADOW: for baking, only shadows, but all objects + */ +void RE_Database_Baking(Render *re, Main *bmain, Scene *scene, unsigned int lay, const int type, Object *actob) +{ + Object *camera; + float mat[4][4]; + float amb[3]; + const short onlyselected= !ELEM(type, RE_BAKE_LIGHT, RE_BAKE_ALL, RE_BAKE_SHADOW, RE_BAKE_AO, RE_BAKE_VERTEX_COLORS); + const short nolamps= ELEM(type, RE_BAKE_NORMALS, RE_BAKE_TEXTURE, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE, RE_BAKE_VERTEX_COLORS); + + re->main= bmain; + re->scene= scene; + re->lay= lay; + + /* renderdata setup and exceptions */ + render_copy_renderdata(&re->r, &scene->r); + + RE_init_threadcount(re); + + re->flag |= R_BAKING; + re->excludeob= actob; + if (actob) + re->flag |= R_BAKE_TRACE; + + if (type==RE_BAKE_NORMALS && re->r.bake_normal_space==R_BAKE_SPACE_TANGENT) + re->flag |= R_NEED_TANGENT; + + if (type==RE_BAKE_VERTEX_COLORS) + re->flag |= R_NEED_VCOL; + + if (!actob && ELEM(type, RE_BAKE_LIGHT, RE_BAKE_NORMALS, RE_BAKE_TEXTURE, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE, RE_BAKE_VERTEX_COLORS)) { + re->r.mode &= ~R_SHADOW; + re->r.mode &= ~R_RAYTRACE; + } + + if (!actob && (type==RE_BAKE_SHADOW)) { + re->r.mode |= R_SHADOW; + } + + /* setup render stuff */ + re->memArena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "bake db arena"); + + re->totvlak=re->totvert=re->totstrand=re->totlamp=re->tothalo= 0; + re->lights.first= re->lights.last= NULL; + re->lampren.first= re->lampren.last= NULL; + + /* in localview, lamps are using normal layers, objects only local bits */ + if (re->lay & 0xFF000000) + lay &= 0xFF000000; + + camera= RE_GetCamera(re); + + /* if no camera, set unit */ + if (camera) { + normalize_m4_m4(mat, camera->obmat); + invert_m4(mat); + RE_SetView(re, mat); + } + else { + unit_m4(mat); + RE_SetView(re, mat); + } + copy_m3_m4(re->imat, re->viewinv); + + /* TODO: deep shadow maps + baking + strands */ + /* strands use the window matrix and view size, there is to correct + * window matrix but at least avoids malloc and crash loop [#27807] */ + unit_m4(re->winmat); + re->winx= re->winy= 256; + /* done setting dummy values */ + + init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */ + if (re->r.mode & R_RAYTRACE) { + init_render_qmcsampler(re); + + if (re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) + if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT) + init_ao_sphere(re, &re->wrld); + } + + /* still bad... doing all */ + init_render_textures(re); + + copy_v3_v3(amb, &re->wrld.ambr); + init_render_materials(re->main, re->r.mode, amb, true); + + set_node_shader_lamp_loop(shade_material_loop); + + /* MAKE RENDER DATA */ + database_init_objects(re, lay, nolamps, onlyselected, actob, 0); + + set_material_lightgroups(re); + + /* SHADOW BUFFER */ + if (type!=RE_BAKE_LIGHT) + if (re->r.mode & R_SHADOW) + threaded_makeshadowbufs(re); + + /* raytree */ + if (!re->test_break(re->tbh)) + if (re->r.mode & R_RAYTRACE) + makeraytree(re); + + /* point density texture */ + if (!re->test_break(re->tbh)) + make_pointdensities(re); + + /* voxel data texture */ + if (!re->test_break(re->tbh)) + make_voxeldata(re); + + /* occlusion */ + if ((re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) && !re->test_break(re->tbh)) + if (re->wrld.ao_gather_method == WO_AOGATHER_APPROX) + if (re->r.mode & R_SHADOW) + make_occ_tree(re); + + re->i.convertdone = true; +} diff --git a/source/blender/render/intern/source/envmap.c b/source/blender/render/intern/source/envmap.c new file mode 100644 index 00000000000..85a6af92a28 --- /dev/null +++ b/source/blender/render/intern/source/envmap.c @@ -0,0 +1,822 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributors: 2004/2005/2006 Blender Foundation, full recode + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/envmap.c + * \ingroup render + */ + +#include <math.h> +#include <string.h> + +/* external modules: */ + +#include "BLI_math.h" +#include "BLI_blenlib.h" +#include "BLI_threads.h" +#include "BLI_utildefines.h" + +#include "BLT_translation.h" + +#include "IMB_imbuf_types.h" +#include "IMB_imbuf.h" /* for rectcpy */ + +#include "DNA_group_types.h" +#include "DNA_image_types.h" +#include "DNA_lamp_types.h" +#include "DNA_object_types.h" +#include "DNA_scene_types.h" +#include "DNA_texture_types.h" + +#include "BKE_main.h" +#include "BKE_image.h" /* BKE_imbuf_write */ +#include "BKE_texture.h" +#include "BKE_scene.h" + +/* this module */ +#include "render_types.h" +#include "envmap.h" +#include "renderdatabase.h" +#include "renderpipeline.h" +#include "texture.h" +#include "zbuf.h" +#include "render_result.h" + +/* ------------------------------------------------------------------------- */ + +static void envmap_split_ima(EnvMap *env, ImBuf *ibuf) +{ + int dx, part; + + /* after lock we test cube[1], if set the other thread has done it fine */ + BLI_thread_lock(LOCK_IMAGE); + if (env->cube[1] == NULL) { + + BKE_texture_envmap_free_data(env); + + dx = ibuf->y; + dx /= 2; + if (3 * dx == ibuf->x) { + env->type = ENV_CUBE; + env->ok = ENV_OSA; + } + else if (ibuf->x == ibuf->y) { + env->type = ENV_PLANE; + env->ok = ENV_OSA; + } + else { + printf("Incorrect envmap size\n"); + env->ok = 0; + env->ima->ok = 0; + } + + if (env->ok) { + if (env->type == ENV_CUBE) { + for (part = 0; part < 6; part++) { + env->cube[part] = IMB_allocImBuf(dx, dx, 24, IB_rect | IB_rectfloat); + } + IMB_float_from_rect(ibuf); + + IMB_rectcpy(env->cube[0], ibuf, + 0, 0, 0, 0, dx, dx); + IMB_rectcpy(env->cube[1], ibuf, + 0, 0, dx, 0, dx, dx); + IMB_rectcpy(env->cube[2], ibuf, + 0, 0, 2 * dx, 0, dx, dx); + IMB_rectcpy(env->cube[3], ibuf, + 0, 0, 0, dx, dx, dx); + IMB_rectcpy(env->cube[4], ibuf, + 0, 0, dx, dx, dx, dx); + IMB_rectcpy(env->cube[5], ibuf, + 0, 0, 2 * dx, dx, dx, dx); + + } + else { /* ENV_PLANE */ + env->cube[1] = IMB_dupImBuf(ibuf); + IMB_float_from_rect(env->cube[1]); + } + } + } + BLI_thread_unlock(LOCK_IMAGE); +} + +/* ------------------------------------------------------------------------- */ +/* ****************** RENDER ********************** */ + +/* copy current render */ +static Render *envmap_render_copy(Render *re, EnvMap *env) +{ + Render *envre; + float viewscale; + int cuberes; + + envre = RE_NewRender("Envmap"); + + env->lastsize = re->r.size; + cuberes = (env->cuberes * re->r.size) / 100; + cuberes &= 0xFFFC; + + /* this flag has R_ZTRA in it for example */ + envre->flag = re->flag; + + /* set up renderdata */ + render_copy_renderdata(&envre->r, &re->r); + envre->r.mode &= ~(R_BORDER | R_PANORAMA | R_ORTHO | R_MBLUR); + BLI_freelistN(&envre->r.layers); + BLI_freelistN(&envre->r.views); + envre->r.filtertype = 0; + envre->r.tilex = envre->r.xsch / 2; + envre->r.tiley = envre->r.ysch / 2; + envre->r.size = 100; + envre->r.yasp = envre->r.xasp = 1; + + RE_InitState(envre, NULL, &envre->r, NULL, cuberes, cuberes, NULL); + envre->main = re->main; + envre->scene = re->scene; /* unsure about this... */ + envre->scene_color_manage = re->scene_color_manage; + envre->lay = re->lay; + + /* view stuff in env render */ + viewscale = (env->type == ENV_PLANE) ? env->viewscale : 1.0f; + RE_SetEnvmapCamera(envre, env->object, viewscale, env->clipsta, env->clipend); + copy_m4_m4(envre->viewmat_orig, re->viewmat_orig); + + /* callbacks */ + envre->display_update = re->display_update; + envre->duh = re->duh; + envre->test_break = re->test_break; + envre->tbh = re->tbh; + envre->current_scene_update = re->current_scene_update; + envre->suh = re->suh; + + /* and for the evil stuff; copy the database... */ + envre->totvlak = re->totvlak; + envre->totvert = re->totvert; + envre->tothalo = re->tothalo; + envre->totstrand = re->totstrand; + envre->totlamp = re->totlamp; + envre->sortedhalos = re->sortedhalos; + envre->lights = re->lights; + envre->objecttable = re->objecttable; + envre->customdata_names = re->customdata_names; + envre->raytree = re->raytree; + envre->totinstance = re->totinstance; + envre->instancetable = re->instancetable; + envre->objectinstance = re->objectinstance; + envre->qmcsamplers = re->qmcsamplers; + + return envre; +} + +static void envmap_free_render_copy(Render *envre) +{ + + envre->totvlak = 0; + envre->totvert = 0; + envre->tothalo = 0; + envre->totstrand = 0; + envre->totlamp = 0; + envre->totinstance = 0; + envre->sortedhalos = NULL; + BLI_listbase_clear(&envre->lights); + BLI_listbase_clear(&envre->objecttable); + BLI_listbase_clear(&envre->customdata_names); + envre->raytree = NULL; + BLI_listbase_clear(&envre->instancetable); + envre->objectinstance = NULL; + envre->qmcsamplers = NULL; + + RE_FreeRender(envre); +} + +/* ------------------------------------------------------------------------- */ + +static void envmap_transmatrix(float mat[4][4], int part) +{ + float tmat[4][4], eul[3], rotmat[4][4]; + + eul[0] = eul[1] = eul[2] = 0.0; + + if (part == 0) { /* neg z */ + /* pass */ + } + else if (part == 1) { /* pos z */ + eul[0] = M_PI; + } + else if (part == 2) { /* pos y */ + eul[0] = M_PI / 2.0; + } + else if (part == 3) { /* neg x */ + eul[0] = M_PI / 2.0; + eul[2] = M_PI / 2.0; + } + else if (part == 4) { /* neg y */ + eul[0] = M_PI / 2.0; + eul[2] = M_PI; + } + else { /* pos x */ + eul[0] = M_PI / 2.0; + eul[2] = -M_PI / 2.0; + } + + copy_m4_m4(tmat, mat); + eul_to_mat4(rotmat, eul); + mul_m4_m4m4(mat, tmat, rotmat); +} +/* ------------------------------------------------------------------------- */ + +static void env_set_imats(Render *re) +{ + Base *base; + float mat[4][4]; + + base = re->scene->base.first; + while (base) { + mul_m4_m4m4(mat, re->viewmat, base->object->obmat); + invert_m4_m4(base->object->imat, mat); + + base = base->next; + } + +} + +/* ------------------------------------------------------------------------- */ + +void env_rotate_scene(Render *re, float mat[4][4], int do_rotate) +{ + ObjectRen *obr; + ObjectInstanceRen *obi; + LampRen *lar = NULL; + HaloRen *har = NULL; + float imat[3][3], mat_inverse[4][4], smat[4][4], tmat[4][4], cmat[3][3], tmpmat[4][4]; + int a; + + if (do_rotate == 0) { + invert_m4_m4(tmat, mat); + copy_m3_m4(imat, tmat); + + copy_m4_m4(mat_inverse, mat); + } + else { + copy_m4_m4(tmat, mat); + copy_m3_m4(imat, mat); + + invert_m4_m4(mat_inverse, tmat); + } + + for (obi = re->instancetable.first; obi; obi = obi->next) { + /* append or set matrix depending on dupli */ + if (obi->flag & R_DUPLI_TRANSFORMED) { + copy_m4_m4(tmpmat, obi->mat); + mul_m4_m4m4(obi->mat, tmat, tmpmat); + } + else if (do_rotate == 1) + copy_m4_m4(obi->mat, tmat); + else + unit_m4(obi->mat); + + copy_m3_m4(cmat, obi->mat); + invert_m3_m3(obi->nmat, cmat); + transpose_m3(obi->nmat); + + /* indicate the renderer has to use transform matrices */ + if (do_rotate == 0) + obi->flag &= ~R_ENV_TRANSFORMED; + else { + obi->flag |= R_ENV_TRANSFORMED; + copy_m4_m4(obi->imat, mat_inverse); + } + } + + + for (obr = re->objecttable.first; obr; obr = obr->next) { + for (a = 0; a < obr->tothalo; a++) { + if ((a & 255) == 0) har = obr->bloha[a >> 8]; + else har++; + + mul_m4_v3(tmat, har->co); + } + + /* imat_ren is needed for correct texture coordinates */ + mul_m4_m4m4(obr->ob->imat_ren, re->viewmat, obr->ob->obmat); + invert_m4(obr->ob->imat_ren); + } + + for (lar = re->lampren.first; lar; lar = lar->next) { + float lamp_imat[4][4]; + + /* copy from add_render_lamp */ + if (do_rotate == 1) + mul_m4_m4m4(tmpmat, re->viewmat, lar->lampmat); + else + mul_m4_m4m4(tmpmat, re->viewmat_orig, lar->lampmat); + + invert_m4_m4(lamp_imat, tmpmat); + copy_m3_m4(lar->mat, tmpmat); + copy_m3_m4(lar->imat, lamp_imat); + + lar->vec[0]= -tmpmat[2][0]; + lar->vec[1]= -tmpmat[2][1]; + lar->vec[2]= -tmpmat[2][2]; + normalize_v3(lar->vec); + lar->co[0]= tmpmat[3][0]; + lar->co[1]= tmpmat[3][1]; + lar->co[2]= tmpmat[3][2]; + + if (lar->type == LA_AREA) { + area_lamp_vectors(lar); + } + else if (lar->type == LA_SPOT) { + normalize_v3(lar->imat[0]); + normalize_v3(lar->imat[1]); + normalize_v3(lar->imat[2]); + + lar->sh_invcampos[0] = -lar->co[0]; + lar->sh_invcampos[1] = -lar->co[1]; + lar->sh_invcampos[2] = -lar->co[2]; + mul_m3_v3(lar->imat, lar->sh_invcampos); + lar->sh_invcampos[2] *= lar->sh_zfac; + + if (lar->shb) { + if (do_rotate == 1) { + mul_m4_m4m4(smat, lar->shb->viewmat, mat_inverse); + mul_m4_m4m4(lar->shb->persmat, lar->shb->winmat, smat); + } + else mul_m4_m4m4(lar->shb->persmat, lar->shb->winmat, lar->shb->viewmat); + } + } + } + + if (do_rotate) { + init_render_world(re); + env_set_imats(re); + } +} + +/* ------------------------------------------------------------------------- */ + +static void env_layerflags(Render *re, unsigned int notlay) +{ + ObjectRen *obr; + VlakRen *vlr = NULL; + int a; + + /* invert notlay, so if face is in multiple layers it will still be visible, + * unless all 'notlay' bits match the face bits. + * face: 0110 + * not: 0100 + * ~not: 1011 + * now (face & ~not) is true + */ + + notlay = ~notlay; + + for (obr = re->objecttable.first; obr; obr = obr->next) { + if ((obr->lay & notlay) == 0) { + for (a = 0; a < obr->totvlak; a++) { + if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak; + else vlr++; + + vlr->flag |= R_HIDDEN; + } + } + } +} + +static void env_hideobject(Render *re, Object *ob) +{ + ObjectRen *obr; + VlakRen *vlr = NULL; + int a; + + for (obr = re->objecttable.first; obr; obr = obr->next) { + for (a = 0; a < obr->totvlak; a++) { + if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak; + else vlr++; + + if (obr->ob == ob) + vlr->flag |= R_HIDDEN; + } + } +} + +static void env_showobjects(Render *re) +{ + ObjectRen *obr; + VlakRen *vlr = NULL; + int a; + + for (obr = re->objecttable.first; obr; obr = obr->next) { + for (a = 0; a < obr->totvlak; a++) { + if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak; + else vlr++; + + vlr->flag &= ~R_HIDDEN; + } + } +} + +/* ------------------------------------------------------------------------- */ + +static void render_envmap(Render *re, EnvMap *env) +{ + /* only the cubemap and planar map is implemented */ + Render *envre; + ImBuf *ibuf; + float orthmat[4][4]; + float oldviewinv[4][4], mat[4][4], tmat[4][4]; + short part; + + /* need a recalc: ortho-render has no correct viewinv */ + invert_m4_m4(oldviewinv, re->viewmat); + + envre = envmap_render_copy(re, env); + + /* precalc orthmat for object */ + copy_m4_m4(orthmat, env->object->obmat); + normalize_m4(orthmat); + + /* need imat later for texture imat */ + mul_m4_m4m4(mat, re->viewmat, orthmat); + invert_m4_m4(tmat, mat); + copy_m3_m4(env->obimat, tmat); + + for (part = 0; part < 6; part++) { + if (env->type == ENV_PLANE && part != 1) + continue; + + re->display_clear(re->dch, envre->result); + + copy_m4_m4(tmat, orthmat); + envmap_transmatrix(tmat, part); + invert_m4_m4(mat, tmat); + /* mat now is the camera 'viewmat' */ + + copy_m4_m4(envre->viewmat, mat); + copy_m4_m4(envre->viewinv, tmat); + + /* we have to correct for the already rotated vertexcoords */ + mul_m4_m4m4(tmat, envre->viewmat, oldviewinv); + invert_m4_m4(env->imat, tmat); + + env_rotate_scene(envre, tmat, 1); + project_renderdata(envre, projectverto, 0, 0, 1); + env_layerflags(envre, env->notlay); + env_hideobject(envre, env->object); + + if (re->test_break(re->tbh) == 0) { + RE_TileProcessor(envre); + } + + /* rotate back */ + env_showobjects(envre); + env_rotate_scene(envre, tmat, 0); + + if (re->test_break(re->tbh) == 0) { + int y; + float *alpha; + float *rect; + + if (envre->result->do_exr_tile) { + BLI_rw_mutex_lock(&envre->resultmutex, THREAD_LOCK_WRITE); + render_result_exr_file_end(envre); + BLI_rw_mutex_unlock(&envre->resultmutex); + } + + RenderLayer *rl = envre->result->layers.first; + + /* envmap is rendered independently of multiview */ + rect = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, ""); + ibuf = IMB_allocImBuf(envre->rectx, envre->recty, 24, IB_rect | IB_rectfloat); + memcpy(ibuf->rect_float, rect, ibuf->channels * ibuf->x * ibuf->y * sizeof(float)); + + /* envmap renders without alpha */ + alpha = ibuf->rect_float + 3; + for (y = ibuf->x * ibuf->y - 1; y >= 0; y--, alpha += 4) + *alpha = 1.0; + + env->cube[part] = ibuf; + } + + if (re->test_break(re->tbh)) break; + + } + + if (re->test_break(re->tbh)) BKE_texture_envmap_free_data(env); + else { + if (envre->r.mode & R_OSA) env->ok = ENV_OSA; + else env->ok = ENV_NORMAL; + env->lastframe = re->scene->r.cfra; + } + + /* restore */ + envmap_free_render_copy(envre); + env_set_imats(re); + +} + +/* ------------------------------------------------------------------------- */ + +void make_envmaps(Render *re) +{ + Tex *tex; + bool do_init = false; + int depth = 0, trace; + + if (!(re->r.mode & R_ENVMAP)) return; + + /* we don't raytrace, disabling the flag will cause ray_transp render solid */ + trace = (re->r.mode & R_RAYTRACE); + re->r.mode &= ~R_RAYTRACE; + + re->i.infostr = IFACE_("Creating Environment maps"); + re->stats_draw(re->sdh, &re->i); + + /* 5 = hardcoded max recursion level */ + while (depth < 5) { + tex = re->main->tex.first; + while (tex) { + if (tex->id.us && tex->type == TEX_ENVMAP) { + if (tex->env && tex->env->object) { + EnvMap *env = tex->env; + + if (env->object->lay & re->lay) { + if (env->stype == ENV_LOAD) { + float orthmat[4][4], mat[4][4], tmat[4][4]; + + /* precalc orthmat for object */ + copy_m4_m4(orthmat, env->object->obmat); + normalize_m4(orthmat); + + /* need imat later for texture imat */ + mul_m4_m4m4(mat, re->viewmat, orthmat); + invert_m4_m4(tmat, mat); + copy_m3_m4(env->obimat, tmat); + } + else { + + /* decide if to render an envmap (again) */ + if (env->depth >= depth) { + + /* set 'recalc' to make sure it does an entire loop of recalcs */ + + if (env->ok) { + /* free when OSA, and old one isn't OSA */ + if ((re->r.mode & R_OSA) && env->ok == ENV_NORMAL) + BKE_texture_envmap_free_data(env); + /* free when size larger */ + else if (env->lastsize < re->r.size) + BKE_texture_envmap_free_data(env); + /* free when env is in recalcmode */ + else if (env->recalc) + BKE_texture_envmap_free_data(env); + } + + if (env->ok == 0 && depth == 0) env->recalc = 1; + + if (env->ok == 0) { + do_init = true; + render_envmap(re, env); + + if (depth == env->depth) env->recalc = 0; + } + } + } + } + } + } + tex = tex->id.next; + } + depth++; + } + + if (do_init) { + re->display_init(re->dih, re->result); + re->display_clear(re->dch, re->result); + // re->flag |= R_REDRAW_PRV; + } + /* restore */ + re->r.mode |= trace; + +} + +/* ------------------------------------------------------------------------- */ + +static int envcube_isect(EnvMap *env, const float vec[3], float answ[2]) +{ + float lambda; + int face; + + if (env->type == ENV_PLANE) { + face = 1; + + lambda = 1.0f / vec[2]; + answ[0] = env->viewscale * lambda * vec[0]; + answ[1] = -env->viewscale * lambda * vec[1]; + } + else { + /* which face */ + if (vec[2] <= -fabsf(vec[0]) && vec[2] <= -fabsf(vec[1]) ) { + face = 0; + lambda = -1.0f / vec[2]; + answ[0] = lambda * vec[0]; + answ[1] = lambda * vec[1]; + } + else if (vec[2] >= fabsf(vec[0]) && vec[2] >= fabsf(vec[1])) { + face = 1; + lambda = 1.0f / vec[2]; + answ[0] = lambda * vec[0]; + answ[1] = -lambda * vec[1]; + } + else if (vec[1] >= fabsf(vec[0])) { + face = 2; + lambda = 1.0f / vec[1]; + answ[0] = lambda * vec[0]; + answ[1] = lambda * vec[2]; + } + else if (vec[0] <= -fabsf(vec[1])) { + face = 3; + lambda = -1.0f / vec[0]; + answ[0] = lambda * vec[1]; + answ[1] = lambda * vec[2]; + } + else if (vec[1] <= -fabsf(vec[0])) { + face = 4; + lambda = -1.0f / vec[1]; + answ[0] = -lambda * vec[0]; + answ[1] = lambda * vec[2]; + } + else { + face = 5; + lambda = 1.0f / vec[0]; + answ[0] = -lambda * vec[1]; + answ[1] = lambda * vec[2]; + } + } + + answ[0] = 0.5f + 0.5f * answ[0]; + answ[1] = 0.5f + 0.5f * answ[1]; + return face; +} + +/* ------------------------------------------------------------------------- */ + +static void set_dxtdyt(float r_dxt[3], float r_dyt[3], const float dxt[3], const float dyt[3], int face) +{ + if (face == 2 || face == 4) { + r_dxt[0] = dxt[0]; + r_dyt[0] = dyt[0]; + r_dxt[1] = dxt[2]; + r_dyt[1] = dyt[2]; + } + else if (face == 3 || face == 5) { + r_dxt[0] = dxt[1]; + r_dxt[1] = dxt[2]; + r_dyt[0] = dyt[1]; + r_dyt[1] = dyt[2]; + } + else { + r_dxt[0] = dxt[0]; + r_dyt[0] = dyt[0]; + r_dxt[1] = dxt[1]; + r_dyt[1] = dyt[1]; + } +} + +/* ------------------------------------------------------------------------- */ + +int envmaptex(Tex *tex, const float texvec[3], float dxt[3], float dyt[3], int osatex, TexResult *texres, struct ImagePool *pool, const bool skip_load_image) +{ + extern Render R; /* only in this call */ + /* texvec should be the already reflected normal */ + EnvMap *env; + ImBuf *ibuf; + float fac, vec[3], sco[3], dxts[3], dyts[3]; + int face, face1; + + env = tex->env; + if (env == NULL || (env->stype != ENV_LOAD && env->object == NULL)) { + texres->tin = 0.0; + return 0; + } + + if (env->stype == ENV_LOAD) { + env->ima = tex->ima; + if (env->ima && env->ima->ok) { + if (env->cube[1] == NULL) { + ImBuf *ibuf_ima = BKE_image_pool_acquire_ibuf(env->ima, NULL, pool); + if (ibuf_ima) + envmap_split_ima(env, ibuf_ima); + else + env->ok = 0; + + if (env->type == ENV_PLANE) + tex->extend = TEX_EXTEND; + + BKE_image_pool_release_ibuf(env->ima, ibuf_ima, pool); + } + } + } + + if (env->ok == 0) { + texres->tin = 0.0; + return 0; + } + + /* rotate to envmap space, if object is set */ + copy_v3_v3(vec, texvec); + if (env->object) { + mul_m3_v3(env->obimat, vec); + if (osatex) { + mul_m3_v3(env->obimat, dxt); + mul_m3_v3(env->obimat, dyt); + } + } + else { + if (!BKE_scene_use_world_space_shading(R.scene)) { + // texvec is in view space + mul_mat3_m4_v3(R.viewinv, vec); + if (osatex) { + mul_mat3_m4_v3(R.viewinv, dxt); + mul_mat3_m4_v3(R.viewinv, dyt); + } + } + } + + face = envcube_isect(env, vec, sco); + ibuf = env->cube[face]; + + if (osatex) { + set_dxtdyt(dxts, dyts, dxt, dyt, face); + imagewraposa(tex, NULL, ibuf, sco, dxts, dyts, texres, pool, skip_load_image); + + /* edges? */ + + if (texres->ta < 1.0f) { + TexResult texr1, texr2; + + texr1.nor = texr2.nor = NULL; + texr1.talpha = texr2.talpha = texres->talpha; /* boxclip expects this initialized */ + + add_v3_v3(vec, dxt); + face1 = envcube_isect(env, vec, sco); + sub_v3_v3(vec, dxt); + + if (face != face1) { + ibuf = env->cube[face1]; + set_dxtdyt(dxts, dyts, dxt, dyt, face1); + imagewraposa(tex, NULL, ibuf, sco, dxts, dyts, &texr1, pool, skip_load_image); + } + else texr1.tr = texr1.tg = texr1.tb = texr1.ta = 0.0; + + /* here was the nasty bug! results were not zero-ed. FPE! */ + + add_v3_v3(vec, dyt); + face1 = envcube_isect(env, vec, sco); + sub_v3_v3(vec, dyt); + + if (face != face1) { + ibuf = env->cube[face1]; + set_dxtdyt(dxts, dyts, dxt, dyt, face1); + imagewraposa(tex, NULL, ibuf, sco, dxts, dyts, &texr2, pool, skip_load_image); + } + else texr2.tr = texr2.tg = texr2.tb = texr2.ta = 0.0; + + fac = (texres->ta + texr1.ta + texr2.ta); + if (fac != 0.0f) { + fac = 1.0f / fac; + + texres->tr = fac * (texres->ta * texres->tr + texr1.ta * texr1.tr + texr2.ta * texr2.tr); + texres->tg = fac * (texres->ta * texres->tg + texr1.ta * texr1.tg + texr2.ta * texr2.tg); + texres->tb = fac * (texres->ta * texres->tb + texr1.ta * texr1.tb + texr2.ta * texr2.tb); + } + texres->ta = 1.0; + } + } + else { + imagewrap(tex, NULL, ibuf, sco, texres, pool, skip_load_image); + } + + return 1; +} diff --git a/source/blender/render/intern/source/external_engine.c b/source/blender/render/intern/source/external_engine.c index b541c993bc7..10ed91b53c4 100644 --- a/source/blender/render/intern/source/external_engine.c +++ b/source/blender/render/intern/source/external_engine.c @@ -112,11 +112,11 @@ void RE_engines_register(RenderEngineType *render_type) RenderEngineType *RE_engines_find(const char *idname) { RenderEngineType *type; - + type = BLI_findstring(&R_engines, idname, offsetof(RenderEngineType, idname)); if (!type) type = BLI_findstring(&R_engines, "BLENDER_EEVEE", offsetof(RenderEngineType, idname)); - + return type; } @@ -320,7 +320,7 @@ int RE_engine_test_break(RenderEngine *engine) if (re) return re->test_break(re->tbh); - + return 0; } @@ -776,7 +776,7 @@ int RE_engine_render(Render *re, int do_all) if (BKE_reports_contain(re->reports, RPT_ERROR)) G.is_break = true; - + #ifdef WITH_FREESTYLE if (re->r.mode & R_EDGE_FRS) RE_RenderFreestyleExternal(re); diff --git a/source/blender/render/intern/source/imagetexture.c b/source/blender/render/intern/source/imagetexture.c index b9d55916f51..1e9ad79e599 100644 --- a/source/blender/render/intern/source/imagetexture.c +++ b/source/blender/render/intern/source/imagetexture.c @@ -32,7 +32,7 @@ #include <fcntl.h> #include <math.h> #include <float.h> -#ifndef WIN32 +#ifndef WIN32 #include <unistd.h> #else #include <io.h> @@ -67,7 +67,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max static void ibuf_get_color(float col[4], struct ImBuf *ibuf, int x, int y) { int ofs = y * ibuf->x + x; - + if (ibuf->rect_float) { if (ibuf->channels==4) { const float *fp= ibuf->rect_float + 4*ofs; @@ -105,15 +105,15 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul int xi, yi; /* original values */ texres->tin= texres->ta= texres->tr= texres->tg= texres->tb= 0.0f; - + /* we need to set retval OK, otherwise texture code generates normals itself... */ retval= texres->nor ? 3 : 1; - + /* quick tests */ if (ibuf==NULL && ima==NULL) return retval; if (ima) { - + /* hack for icon render */ if (skip_load_image && !BKE_image_has_loaded_ibuf(ima)) return retval; @@ -127,7 +127,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul BKE_image_pool_release_ibuf(ima, ibuf, pool); return retval; } - + /* setup mapping */ if (tex->imaflag & TEX_IMAROT) { fy= texvec[0]; @@ -137,10 +137,10 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul fx= texvec[0]; fy= texvec[1]; } - + if (tex->extend == TEX_CHECKER) { int xs, ys; - + xs= (int)floor(fx); ys= (int)floor(fy); fx-= xs; @@ -205,7 +205,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul if (y<0) y+= ibuf->y; } } - + /* keep this before interpolation [#29761] */ if (ima) { if ((tex->imaflag & TEX_USEALPHA) && (ima->flag & IMA_IGNORE_ALPHA) == 0) { @@ -232,7 +232,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul else { /* no filtering */ ibuf_get_color(&texres->tr, ibuf, x, y); } - + if (texres->nor) { if (tex->imaflag & TEX_NORMALMAP) { /* qdn: normal from color @@ -283,7 +283,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul else { texres->ta = texres->tin = 1.0; } - + if (tex->flag & TEX_NEGALPHA) { texres->ta = 1.0f - texres->ta; } @@ -301,7 +301,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul BKE_image_pool_release_ibuf(ima, ibuf, pool); BRICONTRGB; - + return retval; } @@ -327,9 +327,9 @@ static void clipx_rctf_swap(rctf *stack, short *count, float x1, float x2) newrct->xmin = rf->xmin+(x2-x1); newrct->ymin = rf->ymin; newrct->ymax = rf->ymax; - + if (newrct->xmin ==newrct->xmax) (*count)--; - + rf->xmin = x1; } } @@ -489,7 +489,7 @@ static void boxsampleclip(struct ImBuf *ibuf, rctf *rf, TexResult *texres) else { div= texres->tr= texres->tg= texres->tb= texres->ta= 0.0; for (y=starty; y<=endy; y++) { - + muly= 1.0; if (starty==endy) { @@ -499,10 +499,10 @@ static void boxsampleclip(struct ImBuf *ibuf, rctf *rf, TexResult *texres) if (y==starty) muly= 1.0f-(rf->ymin - y); if (y==endy) muly= (rf->ymax - y); } - + if (startx==endx) { mulx= muly; - + ibuf_get_color(col, ibuf, startx, y); texres->ta+= mulx*col[3]; @@ -518,7 +518,7 @@ static void boxsampleclip(struct ImBuf *ibuf, rctf *rf, TexResult *texres) if (x==endx) mulx*= (rf->xmax - x); ibuf_get_color(col, ibuf, x, y); - + if (mulx==1.0f) { texres->ta+= col[3]; texres->tr+= col[0]; @@ -573,7 +573,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max rf->ymax = maxy*(ibuf->y); texr.talpha= texres->talpha; /* is read by boxsample_clip */ - + if (imapextend) { CLAMP(rf->xmin, 0.0f, ibuf->x-1); CLAMP(rf->xmax, 0.0f, ibuf->x-1); @@ -608,7 +608,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max tot= texres->tr= texres->tb= texres->tg= texres->ta= 0.0; while (count--) { boxsampleclip(ibuf, rf, &texr); - + opp= square_rctf(rf); tot+= opp; @@ -629,7 +629,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max boxsampleclip(ibuf, rf, texres); if (texres->talpha==0) texres->ta= 1.0; - + if (alphaclip!=1.0f) { /* premul it all */ texres->tr*= alphaclip; @@ -637,7 +637,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max texres->tb*= alphaclip; texres->ta*= alphaclip; } -} +} /*----------------------------------------------------------------------------------------------------------------- * from here, some functions only used for the new filtering */ @@ -874,7 +874,7 @@ static void image_mipmap_test(Tex *tex, ImBuf *ibuf) { if (tex->imaflag & TEX_MIPMAP) { if ((ibuf->flags & IB_fields) == 0) { - + if (ibuf->mipmap[0] && (ibuf->userflags & IB_MIPMAP_INVALID)) { BLI_thread_lock(LOCK_IMAGE); if (ibuf->userflags & IB_MIPMAP_INVALID) { @@ -885,7 +885,7 @@ static void image_mipmap_test(Tex *tex, ImBuf *ibuf) } if (ibuf->mipmap[0] == NULL) { BLI_thread_lock(LOCK_IMAGE); - if (ibuf->mipmap[0] == NULL) + if (ibuf->mipmap[0] == NULL) IMB_makemipmap(ibuf, tex->imaflag & TEX_GAUSS_MIP); BLI_thread_unlock(LOCK_IMAGE); } @@ -895,7 +895,7 @@ static void image_mipmap_test(Tex *tex, ImBuf *ibuf) } } } - + } static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], float dxt[2], float dyt[2], TexResult *texres, struct ImagePool *pool, const bool skip_load_image) @@ -946,7 +946,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex /* mipmap test */ image_mipmap_test(tex, ibuf); - + if (ima) { if ((tex->imaflag & TEX_USEALPHA) && (ima->flag & IMA_IGNORE_ALPHA) == 0) { if ((tex->imaflag & TEX_CALCALPHA) == 0) { @@ -1281,7 +1281,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex else texres->tin = texres->ta; if (tex->flag & TEX_NEGALPHA) texres->ta = 1.f - texres->ta; - + if (texres->nor && (tex->imaflag & TEX_NORMALMAP)) { /* normal from color */ /* The invert of the red channel is to make * the normal map compliant with the outside world. @@ -1312,7 +1312,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex BKE_image_pool_release_ibuf(ima, ibuf, pool); BRICONTRGB; - + return retval; } @@ -1334,10 +1334,10 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const return imagewraposa_aniso(tex, ima, ibuf, texvec, dxt, dyt, texres, pool, skip_load_image); texres->tin= texres->ta= texres->tr= texres->tg= texres->tb= 0.0f; - + /* we need to set retval OK, otherwise texture code generates normals itself... */ retval = texres->nor ? 3 : 1; - + /* quick tests */ if (ibuf==NULL && ima==NULL) return retval; @@ -1346,7 +1346,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const /* hack for icon render */ if (skip_load_image && !BKE_image_has_loaded_ibuf(ima)) return retval; - + ibuf = BKE_image_pool_acquire_ibuf(ima, &tex->iuser, pool); ima->flag|= IMA_USED_FOR_RENDER; @@ -1356,7 +1356,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const BKE_image_pool_release_ibuf(ima, ibuf, pool); return retval; } - + /* mipmap test */ image_mipmap_test(tex, ibuf); @@ -1367,9 +1367,9 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const } } } - + texr.talpha= texres->talpha; - + if (tex->imaflag & TEX_IMAROT) { fy= texvec[0]; fx= texvec[1]; @@ -1378,7 +1378,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const fx= texvec[0]; fy= texvec[1]; } - + /* pixel coordinates */ minx = min_fff(dxt[0], dyt[0], dxt[0] + dyt[0]); @@ -1389,7 +1389,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const /* tex_sharper has been removed */ minx= (maxx-minx)/2.0f; miny= (maxy-miny)/2.0f; - + if (tex->imaflag & TEX_FILTER_MIN) { /* make sure the filtersize is minimal in pixels (normal, ref map can have miniature pixel dx/dy) */ float addval= (0.5f * tex->filtersize) / (float) MIN2(ibuf->x, ibuf->y); @@ -1402,7 +1402,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const else if (tex->filtersize!=1.0f) { minx*= tex->filtersize; miny*= tex->filtersize; - + dxt[0]*= tex->filtersize; dxt[1]*= tex->filtersize; dyt[0]*= tex->filtersize; @@ -1410,13 +1410,13 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const } if (tex->imaflag & TEX_IMAROT) SWAP(float, minx, miny); - + if (minx>0.25f) minx= 0.25f; else if (minx<0.00001f) minx= 0.00001f; /* side faces of unit-cube */ if (miny>0.25f) miny= 0.25f; else if (miny<0.00001f) miny= 0.00001f; - + /* repeat and clip */ imaprepeat= (tex->extend==TEX_REPEAT); imapextend= (tex->extend==TEX_EXTEND); @@ -1430,10 +1430,10 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const if (tex->extend == TEX_CHECKER) { int xs, ys, xs1, ys1, xs2, ys2, boundary; - + xs= (int)floor(fx); ys= (int)floor(fy); - + /* both checkers available, no boundary exceptions, checkerdist will eat aliasing */ if ( (tex->flag & TEX_CHECKER_ODD) && (tex->flag & TEX_CHECKER_EVEN) ) { fx-= xs; @@ -1447,7 +1447,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const return retval; } else { - + xs1= (int)floor(fx-minx); ys1= (int)floor(fy-miny); xs2= (int)floor(fx+minx); @@ -1479,14 +1479,14 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const if (tex->flag & TEX_CHECKER_ODD) { if ((xs1+ys) & 1) fx-= xs2; else fx-= xs1; - + if ((ys1+xs) & 1) fy-= ys2; else fy-= ys1; } if (tex->flag & TEX_CHECKER_EVEN) { if ((xs1+ys) & 1) fx-= xs1; else fx-= xs2; - + if ((ys1+xs) & 1) fy-= ys1; else fy-= ys2; } @@ -1525,7 +1525,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const if (fx>1.0f) fx -= (int)(fx); else if (fx<0.0f) fx+= 1-(int)(fx); } - + if (imapextend) { if (fy>1.0f) fy = 1.0f; else if (fy<0.0f) fy= 0.0f; @@ -1540,18 +1540,18 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const if (tex->imaflag & TEX_MIPMAP) { ImBuf *previbuf, *curibuf; float bumpscale; - + dx = minx; dy = miny; maxd = max_ff(dx, dy); if (maxd > 0.5f) maxd = 0.5f; pixsize = 1.0f / (float) MIN2(ibuf->x, ibuf->y); - + bumpscale= pixsize/maxd; if (bumpscale>1.0f) bumpscale= 1.0f; else bumpscale*=bumpscale; - + curmap= 0; previbuf= curibuf= ibuf; while (curmap < IMB_MIPMAP_LEVELS && ibuf->mipmap[curmap]) { @@ -1567,12 +1567,12 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const if (minx < 0.5f / ibuf->x) minx = 0.5f / ibuf->x; if (miny < 0.5f / ibuf->y) miny = 0.5f / ibuf->y; } - + if (texres->nor && (tex->imaflag & TEX_NORMALMAP)==0) { /* a bit extra filter */ //minx*= 1.35f; //miny*= 1.35f; - + boxsample(curibuf, fx-minx, fy-miny, fx+minx, fy+miny, texres, imaprepeat, imapextend); val1= texres->tr+texres->tg+texres->tb; boxsample(curibuf, fx-minx+dxt[0], fy-miny+dxt[1], fx+minx+dxt[0], fy+miny+dxt[1], &texr, imaprepeat, imapextend); @@ -1583,11 +1583,11 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const /* don't switch x or y! */ texres->nor[0]= (val1-val2); texres->nor[1]= (val1-val3); - + if (previbuf!=curibuf) { /* interpolate */ - + boxsample(previbuf, fx-minx, fy-miny, fx+minx, fy+miny, &texr, imaprepeat, imapextend); - + /* calc rgb */ dx= 2.0f*(pixsize-maxd)/pixsize; if (dx>=1.0f) { @@ -1601,16 +1601,16 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const texres->tr= dy*texres->tr+ dx*texr.tr; texres->ta= dy*texres->ta+ dx*texr.ta; } - + val1= dy*val1+ dx*(texr.tr + texr.tg + texr.tb); boxsample(previbuf, fx-minx+dxt[0], fy-miny+dxt[1], fx+minx+dxt[0], fy+miny+dxt[1], &texr, imaprepeat, imapextend); val2= dy*val2+ dx*(texr.tr + texr.tg + texr.tb); boxsample(previbuf, fx-minx+dyt[0], fy-miny+dyt[1], fx+minx+dyt[0], fy+miny+dyt[1], &texr, imaprepeat, imapextend); val3= dy*val3+ dx*(texr.tr + texr.tg + texr.tb); - + texres->nor[0]= (val1-val2); /* vals have been interpolated above! */ texres->nor[1]= (val1-val3); - + if (dx<1.0f) { dy= 1.0f-dx; texres->tb= dy*texres->tb+ dx*texr.tb; @@ -1632,9 +1632,9 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const if (previbuf!=curibuf) { /* interpolate */ boxsample(previbuf, minx, miny, maxx, maxy, &texr, imaprepeat, imapextend); - + fx= 2.0f*(pixsize-maxd)/pixsize; - + if (fx>=1.0f) { texres->ta= texr.ta; texres->tb= texr.tb; texres->tg= texr.tg; texres->tr= texr.tr; @@ -1672,7 +1672,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const else boxsample(ibuf, fx-minx, fy-miny, fx+minx, fy+miny, texres, imaprepeat, imapextend); } - + if (tex->imaflag & TEX_CALCALPHA) { texres->ta = texres->tin = texres->ta * max_fff(texres->tr, texres->tg, texres->tb); } @@ -1681,7 +1681,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const } if (tex->flag & TEX_NEGALPHA) texres->ta= 1.0f-texres->ta; - + if (texres->nor && (tex->imaflag & TEX_NORMALMAP)) { /* qdn: normal from color * The invert of the red channel is to make @@ -1705,7 +1705,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const BKE_image_pool_release_ibuf(ima, ibuf, pool); BRICONTRGB; - + return retval; } @@ -1713,16 +1713,16 @@ void image_sample(Image *ima, float fx, float fy, float dx, float dy, float resu { TexResult texres; ImBuf *ibuf = BKE_image_pool_acquire_ibuf(ima, NULL, pool); - + if (UNLIKELY(ibuf == NULL)) { zero_v4(result); return; } - + texres.talpha = true; /* boxsample expects to be initialized */ boxsample(ibuf, fx, fy, fx + dx, fy + dy, &texres, 0, 1); copy_v4_v4(result, &texres.tr); - + ima->flag|= IMA_USED_FOR_RENDER; BKE_image_pool_release_ibuf(ima, ibuf, pool); @@ -1737,11 +1737,11 @@ void ibuf_sample(ImBuf *ibuf, float fx, float fy, float dx, float dy, float resu AFD.dyt[0] = dy; AFD.dyt[1] = dy; //copy_v2_v2(AFD.dxt, dx); //copy_v2_v2(AFD.dyt, dy); - + AFD.intpol = 1; AFD.extflag = TXC_EXTD; ewa_eval(&texres, ibuf, fx, fy, &AFD); - + copy_v4_v4(result, &texres.tr); } diff --git a/source/blender/render/intern/source/initrender.c b/source/blender/render/intern/source/initrender.c index 4274d641674..9611a8a7452 100644 --- a/source/blender/render/intern/source/initrender.c +++ b/source/blender/render/intern/source/initrender.c @@ -66,9 +66,9 @@ static float filt_quadratic(float x) static float filt_cubic(float x) { float x2 = x * x; - + if (x < 0.0f) x = -x; - + if (x < 1.0f) return 0.5f * x * x2 - x2 + 2.0f / 3.0f; if (x < 2.0f) return (2.0f - x) * (2.0f - x) * (2.0f - x) / 6.0f; return 0.0f; @@ -78,7 +78,7 @@ static float filt_cubic(float x) static float filt_catrom(float x) { float x2 = x * x; - + if (x < 0.0f) x = -x; if (x < 1.0f) return 1.5f * x2 * x - 2.5f * x2 + 1.0f; if (x < 2.0f) return -0.5f * x2 * x + 2.5f * x2 - 4.0f * x + 2.0f; @@ -108,34 +108,34 @@ static float filt_mitchell(float x) /* Mitchell & Netravali's two-param cubic */ float RE_filter_value(int type, float x) { float gaussfac = 1.6f; - + x = ABS(x); - + switch (type) { case R_FILTER_BOX: if (x > 1.0f) return 0.0f; return 1.0f; - + case R_FILTER_TENT: if (x > 1.0f) return 0.0f; return 1.0f - x; - + case R_FILTER_GAUSS: { const float two_gaussfac2 = 2.0f * gaussfac * gaussfac; x *= 3.0f * gaussfac; return 1.0f / sqrtf((float)M_PI * two_gaussfac2) * expf(-x*x / two_gaussfac2); } - + case R_FILTER_MITCH: return filt_mitchell(x * gaussfac); - + case R_FILTER_QUAD: return filt_quadratic(x * gaussfac); - + case R_FILTER_CUBIC: return filt_cubic(x * gaussfac); - + case R_FILTER_CATROM: return filt_catrom(x * gaussfac); } @@ -221,20 +221,20 @@ void RE_parts_init(Render *re) { int nr, xd, yd, partx, party, xparts, yparts; int xminb, xmaxb, yminb, ymaxb; - + RE_parts_free(re); - + /* this is render info for caller, is not reset when parts are freed! */ re->i.totpart = 0; re->i.curpart = 0; re->i.partsdone = 0; - + /* just for readable code.. */ xminb = re->disprect.xmin; yminb = re->disprect.ymin; xmaxb = re->disprect.xmax; ymaxb = re->disprect.ymax; - + RE_parts_clamp(re); partx = re->partx; @@ -242,17 +242,17 @@ void RE_parts_init(Render *re) /* part count */ xparts = (re->rectx + partx - 1) / partx; yparts = (re->recty + party - 1) / party; - + for (nr = 0; nr < xparts * yparts; nr++) { rcti disprect; int rectx, recty; - + xd = (nr % xparts); yd = (nr - xd) / xparts; - + disprect.xmin = xminb + xd * partx; disprect.ymin = yminb + yd * party; - + /* ensure we cover the entire picture, so last parts go to end */ if (xd < xparts - 1) { disprect.xmax = disprect.xmin + partx; @@ -260,21 +260,21 @@ void RE_parts_init(Render *re) disprect.xmax = xmaxb; } else disprect.xmax = xmaxb; - + if (yd < yparts - 1) { disprect.ymax = disprect.ymin + party; if (disprect.ymax > ymaxb) disprect.ymax = ymaxb; } else disprect.ymax = ymaxb; - + rectx = BLI_rcti_size_x(&disprect); recty = BLI_rcti_size_y(&disprect); - + /* so, now can we add this part? */ if (rectx > 0 && recty > 0) { RenderPart *pa = MEM_callocN(sizeof(RenderPart), "new part"); - + pa->disprect = disprect; pa->rectx = rectx; pa->recty = recty; diff --git a/source/blender/render/intern/source/occlusion.c b/source/blender/render/intern/source/occlusion.c new file mode 100644 index 00000000000..8aa90a390b3 --- /dev/null +++ b/source/blender/render/intern/source/occlusion.c @@ -0,0 +1,1533 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2008 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): Brecht Van Lommel. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/occlusion.c + * \ingroup render + */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "MEM_guardedalloc.h" + +#include "DNA_material_types.h" + +#include "BLI_math.h" +#include "BLI_memarena.h" +#include "BLI_threads.h" +#include "BLI_utildefines.h" + +#include "BLT_translation.h" + +#include "BKE_node.h" +#include "BKE_scene.h" + + +#include "RE_shader_ext.h" + +/* local includes */ +#include "occlusion.h" +#include "render_types.h" +#include "rendercore.h" +#include "renderdatabase.h" +#include "shading.h" + +/* ------------------------- Declarations --------------------------- */ + +#define INVPI ((float)M_1_PI) +#define TOTCHILD 8 +#define CACHE_STEP 3 + +typedef struct OcclusionCacheSample { + float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2; + int x, y, filled; +} OcclusionCacheSample; + +typedef struct OcclusionCache { + OcclusionCacheSample *sample; + int x, y, w, h, step; +} OcclusionCache; + +typedef struct OccFace { + int obi; + int facenr; +} OccFace; + +typedef struct OccNode { + float co[3], area; + float sh[9], dco; + float occlusion, rad[3]; + int childflag; + union { + //OccFace face; + int face; + struct OccNode *node; + } child[TOTCHILD]; +} OccNode; + +typedef struct OcclusionTree { + MemArena *arena; + + float (*co)[3]; /* temporary during build */ + + OccFace *face; /* instance and face indices */ + float *occlusion; /* occlusion for faces */ + float (*rad)[3]; /* radiance for faces */ + + OccNode *root; + + OccNode **stack[BLENDER_MAX_THREADS]; + int maxdepth; + + int totface; + + float error; + float distfac; + + int dothreadedbuild; + int totbuildthread; + int doindirect; + + OcclusionCache *cache; + + int num_threads; +} OcclusionTree; + +typedef struct OcclusionThread { + Render *re; + StrandSurface *mesh; + float (*faceao)[3]; + float (*faceenv)[3]; + float (*faceindirect)[3]; + int begin, end; + int thread; +} OcclusionThread; + +typedef struct OcclusionBuildThread { + OcclusionTree *tree; + int begin, end, depth; + OccNode *node; +} OcclusionBuildThread; + +/* ------------------------- Shading --------------------------- */ + +extern Render R; /* meh */ + +static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad) +{ + ShadeInput *shi = ssamp->shi; + ShadeResult *shr = ssamp->shr; + float l, u, v, *v1, *v2, *v3; + + /* init */ + if (vlr->v4) { + shi->u = u = 0.5f; + shi->v = v = 0.5f; + } + else { + shi->u = u = 1.0f / 3.0f; + shi->v = v = 1.0f / 3.0f; + } + + /* setup render coordinates */ + v1 = vlr->v1->co; + v2 = vlr->v2->co; + v3 = vlr->v3->co; + + /* renderco */ + l = 1.0f - u - v; + + shi->co[0] = l * v3[0] + u * v1[0] + v * v2[0]; + shi->co[1] = l * v3[1] + u * v1[1] + v * v2[1]; + shi->co[2] = l * v3[2] + u * v1[2] + v * v2[2]; + + shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2); + + /* set up view vector */ + copy_v3_v3(shi->view, shi->co); + normalize_v3(shi->view); + + /* cache for shadow */ + shi->samplenr++; + + shi->xs = 0; /* TODO */ + shi->ys = 0; + + shade_input_set_normals(shi); + + /* no normal flip */ + if (shi->flippednor) + shade_input_flip_normals(shi); + + madd_v3_v3fl(shi->co, shi->facenor, -0.0001f); /* ugly.. */ + + /* not a pretty solution, but fixes common cases */ + if (shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) { + negate_v3(shi->vn); + negate_v3(shi->vno); + negate_v3(shi->nmapnorm); + } + + /* init material vars */ + shade_input_init_material(shi); + + /* render */ + shade_input_set_shade_texco(shi); + + if (shi->mat->nodetree && shi->mat->use_nodes) { + ntreeShaderExecTree(shi->mat->nodetree, shi, shr); + shi->mat = vlr->mat; /* shi->mat is being set in nodetree */ + } + else { + shade_material_loop(shi, shr); + } + + copy_v3_v3(rad, shr->combined); +} + +static void occ_build_shade(Render *re, OcclusionTree *tree) +{ + ShadeSample ssamp; + ObjectInstanceRen *obi; + VlakRen *vlr; + int a; + + R = *re; + + /* setup shade sample with correct passes */ + memset(&ssamp, 0, sizeof(ShadeSample)); + ssamp.shi[0].lay = re->lay; + ssamp.shi[0].passflag = SCE_PASS_DIFFUSE | SCE_PASS_RGBA; + ssamp.shi[0].combinedflag = ~(SCE_PASS_SPEC); + ssamp.tot = 1; + + for (a = 0; a < tree->totface; a++) { + obi = &R.objectinstance[tree->face[a].obi]; + vlr = RE_findOrAddVlak(obi->obr, tree->face[a].facenr); + + occ_shade(&ssamp, obi, vlr, tree->rad[a]); + + if (re->test_break(re->tbh)) + break; + } +} + +/* ------------------------- Spherical Harmonics --------------------------- */ + +/* Use 2nd order SH => 9 coefficients, stored in this order: + * 0 = (0,0), + * 1 = (1,-1), 2 = (1,0), 3 = (1,1), + * 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */ + +static void sh_copy(float *shresult, float *sh) +{ + memcpy(shresult, sh, sizeof(float) * 9); +} + +static void sh_mul(float *sh, float f) +{ + int i; + + for (i = 0; i < 9; i++) + sh[i] *= f; +} + +static void sh_add(float *shresult, float *sh1, float *sh2) +{ + int i; + + for (i = 0; i < 9; i++) + shresult[i] = sh1[i] + sh2[i]; +} + +static void sh_from_disc(float *n, float area, float *shresult) +{ + /* See formula (3) in: + * "An Efficient Representation for Irradiance Environment Maps" */ + float sh[9], x, y, z; + + x = n[0]; + y = n[1]; + z = n[2]; + + sh[0] = 0.282095f; + + sh[1] = 0.488603f * y; + sh[2] = 0.488603f * z; + sh[3] = 0.488603f * x; + + sh[4] = 1.092548f * x * y; + sh[5] = 1.092548f * y * z; + sh[6] = 0.315392f * (3.0f * z * z - 1.0f); + sh[7] = 1.092548f * x * z; + sh[8] = 0.546274f * (x * x - y * y); + + sh_mul(sh, area); + sh_copy(shresult, sh); +} + +static float sh_eval(float *sh, float *v) +{ + /* See formula (13) in: + * "An Efficient Representation for Irradiance Environment Maps" */ + static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f; + static const float c4 = 0.886227f, c5 = 0.247708f; + float x, y, z, sum; + + x = v[0]; + y = v[1]; + z = v[2]; + + sum = c1 * sh[8] * (x * x - y * y); + sum += c3 * sh[6] * z * z; + sum += c4 * sh[0]; + sum += -c5 * sh[6]; + sum += 2.0f * c1 * (sh[4] * x * y + sh[7] * x * z + sh[5] * y * z); + sum += 2.0f * c2 * (sh[3] * x + sh[1] * y + sh[2] * z); + + return sum; +} + +/* ------------------------------ Building --------------------------------- */ + +static void occ_face(const OccFace *face, float co[3], float normal[3], float *area) +{ + ObjectInstanceRen *obi; + VlakRen *vlr; + float v1[3], v2[3], v3[3], v4[3]; + + obi = &R.objectinstance[face->obi]; + vlr = RE_findOrAddVlak(obi->obr, face->facenr); + + if (co) { + if (vlr->v4) + mid_v3_v3v3(co, vlr->v1->co, vlr->v3->co); + else + mid_v3_v3v3v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co); + + if (obi->flag & R_TRANSFORMED) + mul_m4_v3(obi->mat, co); + } + + if (normal) { + normal[0] = -vlr->n[0]; + normal[1] = -vlr->n[1]; + normal[2] = -vlr->n[2]; + + if (obi->flag & R_TRANSFORMED) + mul_m3_v3(obi->nmat, normal); + } + + if (area) { + copy_v3_v3(v1, vlr->v1->co); + copy_v3_v3(v2, vlr->v2->co); + copy_v3_v3(v3, vlr->v3->co); + if (vlr->v4) copy_v3_v3(v4, vlr->v4->co); + + if (obi->flag & R_TRANSFORMED) { + mul_m4_v3(obi->mat, v1); + mul_m4_v3(obi->mat, v2); + mul_m4_v3(obi->mat, v3); + if (vlr->v4) mul_m4_v3(obi->mat, v4); + } + + /* todo: correct area for instances */ + if (vlr->v4) + *area = area_quad_v3(v1, v2, v3, v4); + else + *area = area_tri_v3(v1, v2, v3); + } +} + +static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node) +{ + OccNode *child; + float occ, area, totarea, rad[3]; + int a, b, indirect = tree->doindirect; + + occ = 0.0f; + totarea = 0.0f; + if (indirect) zero_v3(rad); + + for (b = 0; b < TOTCHILD; b++) { + if (node->childflag & (1 << b)) { + a = node->child[b].face; + occ_face(&tree->face[a], NULL, NULL, &area); + occ += area * tree->occlusion[a]; + if (indirect) madd_v3_v3fl(rad, tree->rad[a], area); + totarea += area; + } + else if (node->child[b].node) { + child = node->child[b].node; + occ_sum_occlusion(tree, child); + + occ += child->area * child->occlusion; + if (indirect) madd_v3_v3fl(rad, child->rad, child->area); + totarea += child->area; + } + } + + if (totarea != 0.0f) { + occ /= totarea; + if (indirect) mul_v3_fl(rad, 1.0f / totarea); + } + + node->occlusion = occ; + if (indirect) copy_v3_v3(node->rad, rad); +} + +static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max) +{ + float len, maxlen = -1.0f; + int a, axis = 0; + + INIT_MINMAX(min, max); + + for (a = begin; a < end; a++) { + minmax_v3v3_v3(min, max, tree->co[a]); + } + + for (a = 0; a < 3; a++) { + len = max[a] - min[a]; + + if (len > maxlen) { + maxlen = len; + axis = a; + } + } + + return axis; +} + +static void occ_node_from_face(OccFace *face, OccNode *node) +{ + float n[3]; + + occ_face(face, node->co, n, &node->area); + node->dco = 0.0f; + sh_from_disc(n, node->area, node->sh); +} + +static void occ_build_dco(OcclusionTree *tree, OccNode *node, const float co[3], float *dco) +{ + int b; + for (b = 0; b < TOTCHILD; b++) { + float dist, d[3], nco[3]; + + if (node->childflag & (1 << b)) { + occ_face(tree->face + node->child[b].face, nco, NULL, NULL); + } + else if (node->child[b].node) { + OccNode *child = node->child[b].node; + occ_build_dco(tree, child, co, dco); + copy_v3_v3(nco, child->co); + } + else { + continue; + } + + sub_v3_v3v3(d, nco, co); + dist = dot_v3v3(d, d); + if (dist > *dco) + *dco = dist; + } +} + +static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split) +{ + float min[3], max[3], mid; + int axis, a, enda; + + /* split in middle of boundbox. this seems faster than median split + * on complex scenes, possibly since it avoids two distant faces to + * be in the same node better? */ + axis = occ_find_bbox_axis(tree, begin, end, min, max); + mid = 0.5f * (min[axis] + max[axis]); + + a = begin; + enda = end; + while (a < enda) { + if (tree->co[a][axis] > mid) { + enda--; + SWAP(OccFace, tree->face[a], tree->face[enda]); + swap_v3_v3(tree->co[a], tree->co[enda]); + } + else + a++; + } + + *split = enda; +} + +static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count) +{ + /* split faces into eight groups */ + int b, splitx, splity[2], splitz[4]; + + occ_build_split(tree, begin, end, &splitx); + + /* force split if none found, to deal with degenerate geometry */ + if (splitx == begin || splitx == end) + splitx = (begin + end) / 2; + + occ_build_split(tree, begin, splitx, &splity[0]); + occ_build_split(tree, splitx, end, &splity[1]); + + occ_build_split(tree, begin, splity[0], &splitz[0]); + occ_build_split(tree, splity[0], splitx, &splitz[1]); + occ_build_split(tree, splitx, splity[1], &splitz[2]); + occ_build_split(tree, splity[1], end, &splitz[3]); + + offset[0] = begin; + offset[1] = splitz[0]; + offset[2] = splity[0]; + offset[3] = splitz[1]; + offset[4] = splitx; + offset[5] = splitz[2]; + offset[6] = splity[1]; + offset[7] = splitz[3]; + + for (b = 0; b < 7; b++) + count[b] = offset[b + 1] - offset[b]; + count[7] = end - offset[7]; +} + +static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth); + +static void *exec_occ_build(void *data) +{ + OcclusionBuildThread *othread = (OcclusionBuildThread *)data; + + occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth); + + return NULL; +} + +static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth) +{ + ListBase threads; + OcclusionBuildThread othreads[BLENDER_MAX_THREADS]; + OccNode *child, tmpnode; + /* OccFace *face; */ + int a, b, totthread = 0, offset[TOTCHILD], count[TOTCHILD]; + + /* add a new node */ + node->occlusion = 1.0f; + + /* leaf node with only children */ + if (end - begin <= TOTCHILD) { + for (a = begin, b = 0; a < end; a++, b++) { + /* face= &tree->face[a]; */ + node->child[b].face = a; + node->childflag |= (1 << b); + } + } + else { + /* order faces */ + occ_build_8_split(tree, begin, end, offset, count); + + if (depth == 1 && tree->dothreadedbuild) + BLI_threadpool_init(&threads, exec_occ_build, tree->totbuildthread); + + for (b = 0; b < TOTCHILD; b++) { + if (count[b] == 0) { + node->child[b].node = NULL; + } + else if (count[b] == 1) { + /* face= &tree->face[offset[b]]; */ + node->child[b].face = offset[b]; + node->childflag |= (1 << b); + } + else { + if (tree->dothreadedbuild) + BLI_thread_lock(LOCK_CUSTOM1); + + child = BLI_memarena_alloc(tree->arena, sizeof(OccNode)); + node->child[b].node = child; + + /* keep track of maximum depth for stack */ + if (depth >= tree->maxdepth) + tree->maxdepth = depth + 1; + + if (tree->dothreadedbuild) + BLI_thread_unlock(LOCK_CUSTOM1); + + if (depth == 1 && tree->dothreadedbuild) { + othreads[totthread].tree = tree; + othreads[totthread].node = child; + othreads[totthread].begin = offset[b]; + othreads[totthread].end = offset[b] + count[b]; + othreads[totthread].depth = depth + 1; + BLI_threadpool_insert(&threads, &othreads[totthread]); + totthread++; + } + else + occ_build_recursive(tree, child, offset[b], offset[b] + count[b], depth + 1); + } + } + + if (depth == 1 && tree->dothreadedbuild) + BLI_threadpool_end(&threads); + } + + /* combine area, position and sh */ + for (b = 0; b < TOTCHILD; b++) { + if (node->childflag & (1 << b)) { + child = &tmpnode; + occ_node_from_face(tree->face + node->child[b].face, &tmpnode); + } + else { + child = node->child[b].node; + } + + if (child) { + node->area += child->area; + sh_add(node->sh, node->sh, child->sh); + madd_v3_v3fl(node->co, child->co, child->area); + } + } + + if (node->area != 0.0f) + mul_v3_fl(node->co, 1.0f / node->area); + + /* compute maximum distance from center */ + node->dco = 0.0f; + if (node->area > 0.0f) + occ_build_dco(tree, node, node->co, &node->dco); +} + +static void occ_build_sh_normalize(OccNode *node) +{ + /* normalize spherical harmonics to not include area, so + * we can clamp the dot product and then multiply by area */ + int b; + + if (node->area != 0.0f) + sh_mul(node->sh, 1.0f / node->area); + + for (b = 0; b < TOTCHILD; b++) { + if (node->childflag & (1 << b)) { + /* pass */ + } + else if (node->child[b].node) { + occ_build_sh_normalize(node->child[b].node); + } + } +} + +static OcclusionTree *occ_tree_build(Render *re) +{ + const int num_threads = re->r.threads; + OcclusionTree *tree; + ObjectInstanceRen *obi; + ObjectRen *obr; + Material *ma; + VlakRen *vlr = NULL; + int a, b, c, totface; + + /* count */ + totface = 0; + for (obi = re->instancetable.first; obi; obi = obi->next) { + obr = obi->obr; + for (a = 0; a < obr->totvlak; a++) { + if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak; + else vlr++; + + ma = vlr->mat; + + if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) + totface++; + } + } + + if (totface == 0) + return NULL; + + tree = MEM_callocN(sizeof(OcclusionTree), "OcclusionTree"); + tree->totface = totface; + + /* parameters */ + tree->error = get_render_aosss_error(&re->r, re->wrld.ao_approx_error); + tree->distfac = (re->wrld.aomode & WO_AODIST) ? re->wrld.aodistfac : 0.0f; + tree->doindirect = (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0); + + /* allocation */ + tree->arena = BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena"); + BLI_memarena_use_calloc(tree->arena); + + if (re->wrld.aomode & WO_AOCACHE) + tree->cache = MEM_callocN(sizeof(OcclusionCache) * num_threads, "OcclusionCache"); + + tree->face = MEM_callocN(sizeof(OccFace) * totface, "OcclusionFace"); + tree->co = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionCo"); + tree->occlusion = MEM_callocN(sizeof(float) * totface, "OcclusionOcclusion"); + + if (tree->doindirect) + tree->rad = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionRad"); + + /* make array of face pointers */ + for (b = 0, c = 0, obi = re->instancetable.first; obi; obi = obi->next, c++) { + obr = obi->obr; + for (a = 0; a < obr->totvlak; a++) { + if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak; + else vlr++; + + ma = vlr->mat; + + if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) { + tree->face[b].obi = c; + tree->face[b].facenr = a; + tree->occlusion[b] = 1.0f; + occ_face(&tree->face[b], tree->co[b], NULL, NULL); + b++; + } + } + } + + /* threads */ + tree->totbuildthread = (re->r.threads > 1 && totface > 10000) ? 8 : 1; + tree->dothreadedbuild = (tree->totbuildthread > 1); + + /* recurse */ + tree->root = BLI_memarena_alloc(tree->arena, sizeof(OccNode)); + tree->maxdepth = 1; + occ_build_recursive(tree, tree->root, 0, totface, 1); + + if (tree->doindirect) { + if (!(re->test_break(re->tbh))) + occ_build_shade(re, tree); + + if (!(re->test_break(re->tbh))) + occ_sum_occlusion(tree, tree->root); + } + + MEM_freeN(tree->co); + tree->co = NULL; + + if (!(re->test_break(re->tbh))) + occ_build_sh_normalize(tree->root); + + for (a = 0; a < num_threads; a++) + tree->stack[a] = MEM_callocN(sizeof(OccNode) * TOTCHILD * (tree->maxdepth + 1), "OccStack"); + + tree->num_threads = num_threads; + + return tree; +} + +static void occ_free_tree(OcclusionTree *tree) +{ + int a; + + if (tree) { + if (tree->arena) BLI_memarena_free(tree->arena); + for (a = 0; a < tree->num_threads; a++) + if (tree->stack[a]) + MEM_freeN(tree->stack[a]); + if (tree->occlusion) MEM_freeN(tree->occlusion); + if (tree->cache) MEM_freeN(tree->cache); + if (tree->face) MEM_freeN(tree->face); + if (tree->rad) MEM_freeN(tree->rad); + MEM_freeN(tree); + } +} + +/* ------------------------- Traversal --------------------------- */ + +static float occ_solid_angle(OccNode *node, const float v[3], float d2, float invd2, const float receivenormal[3]) +{ + float dotreceive, dotemit; + float ev[3]; + + ev[0] = -v[0] * invd2; + ev[1] = -v[1] * invd2; + ev[2] = -v[2] * invd2; + dotemit = sh_eval(node->sh, ev); + dotreceive = dot_v3v3(receivenormal, v) * invd2; + + CLAMP(dotemit, 0.0f, 1.0f); + CLAMP(dotreceive, 0.0f, 1.0f); + + return ((node->area * dotemit * dotreceive) / (d2 + node->area * INVPI)) * INVPI; +} + +static float occ_form_factor(OccFace *face, float *p, float *n) +{ + ObjectInstanceRen *obi; + VlakRen *vlr; + float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib = 0.0f; + + obi = &R.objectinstance[face->obi]; + vlr = RE_findOrAddVlak(obi->obr, face->facenr); + + copy_v3_v3(v1, vlr->v1->co); + copy_v3_v3(v2, vlr->v2->co); + copy_v3_v3(v3, vlr->v3->co); + + if (obi->flag & R_TRANSFORMED) { + mul_m4_v3(obi->mat, v1); + mul_m4_v3(obi->mat, v2); + mul_m4_v3(obi->mat, v3); + } + + if (form_factor_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3)) + contrib += form_factor_quad(p, n, q0, q1, q2, q3); + + if (vlr->v4) { + copy_v3_v3(v4, vlr->v4->co); + if (obi->flag & R_TRANSFORMED) + mul_m4_v3(obi->mat, v4); + + if (form_factor_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3)) + contrib += form_factor_quad(p, n, q0, q1, q2, q3); + } + + return contrib; +} + +static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, + const float pp[3], const float pn[3], float *occ, float rad[3], float bentn[3]) +{ + OccNode *node, **stack; + OccFace *face; + float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2; + float distfac, fac, error, d2, weight, emitarea; + int b, f, totstack; + + /* init variables */ + copy_v3_v3(p, pp); + copy_v3_v3(n, pn); + madd_v3_v3fl(p, n, 1e-4f); + + if (bentn) + copy_v3_v3(bentn, n); + + error = tree->error; + distfac = tree->distfac; + + resultocc = 0.0f; + zero_v3(resultrad); + + /* init stack */ + stack = tree->stack[thread]; + stack[0] = tree->root; + totstack = 1; + + while (totstack) { + /* pop point off the stack */ + node = stack[--totstack]; + + sub_v3_v3v3(v, node->co, p); + d2 = dot_v3v3(v, v) + 1e-16f; + emitarea = MAX2(node->area, node->dco); + + if (d2 * error > emitarea) { + if (distfac != 0.0f) { + fac = 1.0f / (1.0f + distfac * d2); + if (fac < 0.01f) + continue; + } + else + fac = 1.0f; + + /* accumulate occlusion from spherical harmonics */ + invd2 = 1.0f / sqrtf(d2); + weight = occ_solid_angle(node, v, d2, invd2, n); + + if (rad) + madd_v3_v3fl(resultrad, node->rad, weight * fac); + + weight *= node->occlusion; + + if (bentn) { + bentn[0] -= weight * invd2 * v[0]; + bentn[1] -= weight * invd2 * v[1]; + bentn[2] -= weight * invd2 * v[2]; + } + + resultocc += weight * fac; + } + else { + /* traverse into children */ + for (b = 0; b < TOTCHILD; b++) { + if (node->childflag & (1 << b)) { + f = node->child[b].face; + face = &tree->face[f]; + + /* accumulate occlusion with face form factor */ + if (!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) { + if (bentn || distfac != 0.0f) { + occ_face(face, co, NULL, NULL); + sub_v3_v3v3(v, co, p); + d2 = dot_v3v3(v, v) + 1e-16f; + + fac = (distfac == 0.0f) ? 1.0f : 1.0f / (1.0f + distfac * d2); + if (fac < 0.01f) + continue; + } + else + fac = 1.0f; + + weight = occ_form_factor(face, p, n); + + if (rad) + madd_v3_v3fl(resultrad, tree->rad[f], weight * fac); + + weight *= tree->occlusion[f]; + + if (bentn) { + invd2 = 1.0f / sqrtf(d2); + bentn[0] -= weight * invd2 * v[0]; + bentn[1] -= weight * invd2 * v[1]; + bentn[2] -= weight * invd2 * v[2]; + } + + resultocc += weight * fac; + } + } + else if (node->child[b].node) { + /* push child on the stack */ + stack[totstack++] = node->child[b].node; + } + } + } + } + + if (occ) *occ = resultocc; + if (rad) copy_v3_v3(rad, resultrad); +#if 0 + if (rad && exclude) { + int a; + for (a = 0; a < tree->totface; a++) + if ((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr)) + copy_v3_v3(rad, tree->rad[a]); + } +#endif + if (bentn) normalize_v3(bentn); +} + +static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce) +{ + float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ; + int bounce, i; + + rad = MEM_callocN(sizeof(float) * 3 * tree->totface, "OcclusionBounceRad"); + sum = MEM_dupallocN(tree->rad); + + for (bounce = 1; bounce < totbounce; bounce++) { + for (i = 0; i < tree->totface; i++) { + occ_face(&tree->face[i], co, n, NULL); + madd_v3_v3fl(co, n, 1e-8f); + + occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL); + rad[i][0] = MAX2(rad[i][0], 0.0f); + rad[i][1] = MAX2(rad[i][1], 0.0f); + rad[i][2] = MAX2(rad[i][2], 0.0f); + add_v3_v3(sum[i], rad[i]); + + if (re->test_break(re->tbh)) + break; + } + + if (re->test_break(re->tbh)) + break; + + tmp = tree->rad; + tree->rad = rad; + rad = tmp; + + occ_sum_occlusion(tree, tree->root); + } + + MEM_freeN(rad); + MEM_freeN(tree->rad); + tree->rad = sum; + + if (!re->test_break(re->tbh)) + occ_sum_occlusion(tree, tree->root); +} + +static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass) +{ + float *occ, co[3], n[3]; + int pass, i; + + occ = MEM_callocN(sizeof(float) * tree->totface, "OcclusionPassOcc"); + + for (pass = 0; pass < totpass; pass++) { + for (i = 0; i < tree->totface; i++) { + occ_face(&tree->face[i], co, n, NULL); + negate_v3(n); + madd_v3_v3fl(co, n, 1e-8f); + + occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL); + if (re->test_break(re->tbh)) + break; + } + + if (re->test_break(re->tbh)) + break; + + for (i = 0; i < tree->totface; i++) { + tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f); + if (tree->occlusion[i] < 0.0f) + tree->occlusion[i] = 0.0f; + } + + occ_sum_occlusion(tree, tree->root); + } + + MEM_freeN(occ); +} + +static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, + const float co[3], const float n[3], int thread, int onlyshadow, + float *ao, float *env, float *indirect) +{ + float nn[3], bn[3], fac, occ, occlusion, correction, rad[3]; + int envcolor; + + envcolor = re->wrld.aocolor; + if (onlyshadow) + envcolor = WO_AOPLAIN; + + negate_v3_v3(nn, n); + + occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect) ? rad : NULL, (env && envcolor) ? bn : NULL); + + correction = re->wrld.ao_approx_correction; + + occlusion = (1.0f - correction) * (1.0f - occ); + CLAMP(occlusion, 0.0f, 1.0f); + if (correction != 0.0f) + occlusion += correction * expf(-occ); + + if (env) { + /* sky shading using bent normal */ + if (ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) { + fac = 0.5f * (1.0f + dot_v3v3(bn, re->grvec)); + env[0] = (1.0f - fac) * re->wrld.horr + fac * re->wrld.zenr; + env[1] = (1.0f - fac) * re->wrld.horg + fac * re->wrld.zeng; + env[2] = (1.0f - fac) * re->wrld.horb + fac * re->wrld.zenb; + + mul_v3_fl(env, occlusion); + } + else { + env[0] = occlusion; + env[1] = occlusion; + env[2] = occlusion; + } +#if 0 + else { /* WO_AOSKYTEX */ + float dxyview[3]; + bn[0] = -bn[0]; + bn[1] = -bn[1]; + bn[2] = -bn[2]; + dxyview[0] = 1.0f; + dxyview[1] = 1.0f; + dxyview[2] = 0.0f; + shadeSkyView(ao, co, bn, dxyview); + } +#endif + } + + if (ao) { + ao[0] = occlusion; + ao[1] = occlusion; + ao[2] = occlusion; + } + + if (tree->doindirect) copy_v3_v3(indirect, rad); + else zero_v3(indirect); +} + +/* ---------------------------- Caching ------------------------------- */ + +static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y) +{ + x -= cache->x; + y -= cache->y; + + x /= cache->step; + y /= cache->step; + x *= cache->step; + y *= cache->step; + + if (x < 0 || x >= cache->w || y < 0 || y >= cache->h) + return NULL; + else + return &cache->sample[y * cache->w + x]; +} + +static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect) +{ + OcclusionCache *cache; + OcclusionCacheSample *samples[4], *sample; + float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo; + float d[3], dist2; + int i, x1, y1, x2, y2; + + if (!tree->cache) + return 0; + + /* first try to find a sample in the same pixel */ + cache = &tree->cache[thread]; + + if (cache->sample && cache->step) { + sample = &cache->sample[(y - cache->y) * cache->w + (x - cache->x)]; + if (sample->filled) { + sub_v3_v3v3(d, sample->co, co); + dist2 = dot_v3v3(d, d); + if (dist2 < 0.5f * sample->dist2 && dot_v3v3(sample->n, n) > 0.98f) { + copy_v3_v3(ao, sample->ao); + copy_v3_v3(env, sample->env); + copy_v3_v3(indirect, sample->indirect); + return 1; + } + } + } + else + return 0; + + /* try to interpolate between 4 neighboring pixels */ + samples[0] = find_occ_sample(cache, x, y); + samples[1] = find_occ_sample(cache, x + cache->step, y); + samples[2] = find_occ_sample(cache, x, y + cache->step); + samples[3] = find_occ_sample(cache, x + cache->step, y + cache->step); + + for (i = 0; i < 4; i++) + if (!samples[i] || !samples[i]->filled) + return 0; + + /* require intensities not being too different */ + mino = min_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity); + maxo = max_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity); + + if (maxo - mino > 0.05f) + return 0; + + /* compute weighted interpolation between samples */ + zero_v3(ao); + zero_v3(env); + zero_v3(indirect); + totw = 0.0f; + + x1 = samples[0]->x; + y1 = samples[0]->y; + x2 = samples[3]->x; + y2 = samples[3]->y; + + tx = (float)(x2 - x) / (float)(x2 - x1); + ty = (float)(y2 - y) / (float)(y2 - y1); + + wb[3] = (1.0f - tx) * (1.0f - ty); + wb[2] = (tx) * (1.0f - ty); + wb[1] = (1.0f - tx) * (ty); + wb[0] = tx * ty; + + for (i = 0; i < 4; i++) { + sub_v3_v3v3(d, samples[i]->co, co); + //dist2 = dot_v3v3(d, d); + + wz[i] = 1.0f; //(samples[i]->dist2/(1e-4f + dist2)); + wn[i] = pow(dot_v3v3(samples[i]->n, n), 32.0f); + + w = wb[i] * wn[i] * wz[i]; + + totw += w; + madd_v3_v3fl(ao, samples[i]->ao, w); + madd_v3_v3fl(env, samples[i]->env, w); + madd_v3_v3fl(indirect, samples[i]->indirect, w); + } + + if (totw >= 0.9f) { + totw = 1.0f / totw; + mul_v3_fl(ao, totw); + mul_v3_fl(env, totw); + mul_v3_fl(indirect, totw); + return 1; + } + + return 0; +} + +static void sample_occ_surface(ShadeInput *shi) +{ + StrandRen *strand = shi->strand; + StrandSurface *mesh = strand->buffer->surface; + const int *face, *index = RE_strandren_get_face(shi->obr, strand, 0); + float w[4], *co1, *co2, *co3, *co4; + + if (mesh && mesh->face && mesh->co && mesh->ao && index) { + face = mesh->face[*index]; + + co1 = mesh->co[face[0]]; + co2 = mesh->co[face[1]]; + co3 = mesh->co[face[2]]; + + if (face[3]) { + co4 = mesh->co[face[3]]; + interp_weights_quad_v3(w, co1, co2, co3, co4, strand->vert->co); + } + else { + interp_weights_tri_v3(w, co1, co2, co3, strand->vert->co); + } + + zero_v3(shi->ao); + zero_v3(shi->env); + zero_v3(shi->indirect); + + madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]); + madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]); + madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]); + madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]); + madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]); + madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]); + madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]); + madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]); + madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]); + if (face[3]) { + madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]); + madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]); + madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]); + } + } + else { + shi->ao[0] = 1.0f; + shi->ao[1] = 1.0f; + shi->ao[2] = 1.0f; + zero_v3(shi->env); + zero_v3(shi->indirect); + } +} + +/* ------------------------- External Functions --------------------------- */ + +static void *exec_strandsurface_sample(void *data) +{ + OcclusionThread *othread = (OcclusionThread *)data; + Render *re = othread->re; + StrandSurface *mesh = othread->mesh; + float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4; + int a, *face; + + for (a = othread->begin; a < othread->end; a++) { + face = mesh->face[a]; + co1 = mesh->co[face[0]]; + co2 = mesh->co[face[1]]; + co3 = mesh->co[face[2]]; + + if (face[3]) { + co4 = mesh->co[face[3]]; + + mid_v3_v3v3(co, co1, co3); + normal_quad_v3(n, co1, co2, co3, co4); + } + else { + mid_v3_v3v3v3(co, co1, co2, co3); + normal_tri_v3(n, co1, co2, co3); + } + negate_v3(n); + + sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect); + copy_v3_v3(othread->faceao[a], ao); + copy_v3_v3(othread->faceenv[a], env); + copy_v3_v3(othread->faceindirect[a], indirect); + } + + return NULL; +} + +void make_occ_tree(Render *re) +{ + OcclusionThread othreads[BLENDER_MAX_THREADS]; + OcclusionTree *tree; + StrandSurface *mesh; + ListBase threads; + float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3]; + int a, totface, totthread, *face, *count; + + /* ugly, needed for occ_face */ + R = *re; + + re->i.infostr = IFACE_("Occlusion preprocessing"); + re->stats_draw(re->sdh, &re->i); + + re->occlusiontree = tree = occ_tree_build(re); + + if (tree && !re->test_break(re->tbh)) { + if (re->wrld.ao_approx_passes > 0) + occ_compute_passes(re, tree, re->wrld.ao_approx_passes); + if (tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT)) + occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces); + + for (mesh = re->strandsurface.first; mesh; mesh = mesh->next) { + if (!mesh->face || !mesh->co || !mesh->ao) + continue; + + count = MEM_callocN(sizeof(int) * mesh->totvert, "OcclusionCount"); + faceao = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceAO"); + faceenv = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceEnv"); + faceindirect = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceIndirect"); + + totthread = (mesh->totface > 10000) ? re->r.threads : 1; + totface = mesh->totface / totthread; + for (a = 0; a < totthread; a++) { + othreads[a].re = re; + othreads[a].faceao = faceao; + othreads[a].faceenv = faceenv; + othreads[a].faceindirect = faceindirect; + othreads[a].thread = a; + othreads[a].mesh = mesh; + othreads[a].begin = a * totface; + othreads[a].end = (a == totthread - 1) ? mesh->totface : (a + 1) * totface; + } + + if (totthread == 1) { + exec_strandsurface_sample(&othreads[0]); + } + else { + BLI_threadpool_init(&threads, exec_strandsurface_sample, totthread); + + for (a = 0; a < totthread; a++) + BLI_threadpool_insert(&threads, &othreads[a]); + + BLI_threadpool_end(&threads); + } + + for (a = 0; a < mesh->totface; a++) { + face = mesh->face[a]; + + copy_v3_v3(ao, faceao[a]); + copy_v3_v3(env, faceenv[a]); + copy_v3_v3(indirect, faceindirect[a]); + + add_v3_v3(mesh->ao[face[0]], ao); + add_v3_v3(mesh->env[face[0]], env); + add_v3_v3(mesh->indirect[face[0]], indirect); + count[face[0]]++; + add_v3_v3(mesh->ao[face[1]], ao); + add_v3_v3(mesh->env[face[1]], env); + add_v3_v3(mesh->indirect[face[1]], indirect); + count[face[1]]++; + add_v3_v3(mesh->ao[face[2]], ao); + add_v3_v3(mesh->env[face[2]], env); + add_v3_v3(mesh->indirect[face[2]], indirect); + count[face[2]]++; + + if (face[3]) { + add_v3_v3(mesh->ao[face[3]], ao); + add_v3_v3(mesh->env[face[3]], env); + add_v3_v3(mesh->indirect[face[3]], indirect); + count[face[3]]++; + } + } + + for (a = 0; a < mesh->totvert; a++) { + if (count[a]) { + mul_v3_fl(mesh->ao[a], 1.0f / count[a]); + mul_v3_fl(mesh->env[a], 1.0f / count[a]); + mul_v3_fl(mesh->indirect[a], 1.0f / count[a]); + } + } + + MEM_freeN(count); + MEM_freeN(faceao); + MEM_freeN(faceenv); + MEM_freeN(faceindirect); + } + } +} + +void free_occ(Render *re) +{ + if (re->occlusiontree) { + occ_free_tree(re->occlusiontree); + re->occlusiontree = NULL; + } +} + +void sample_occ(Render *re, ShadeInput *shi) +{ + OcclusionTree *tree = re->occlusiontree; + OcclusionCache *cache; + OcclusionCacheSample *sample; + OccFace exclude; + int onlyshadow; + + if (tree) { + if (shi->strand) { + sample_occ_surface(shi); + } + /* try to get result from the cache if possible */ + else if (shi->depth != 0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) { + /* no luck, let's sample the occlusion */ + exclude.obi = shi->obi - re->objectinstance; + exclude.facenr = shi->vlr->index; + onlyshadow = (shi->mat->mode & MA_ONLYSHADOW); + sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect); + + /* fill result into sample, each time */ + if (tree->cache) { + cache = &tree->cache[shi->thread]; + + if (cache->sample && cache->step) { + sample = &cache->sample[(shi->ys - cache->y) * cache->w + (shi->xs - cache->x)]; + copy_v3_v3(sample->co, shi->co); + copy_v3_v3(sample->n, shi->vno); + copy_v3_v3(sample->ao, shi->ao); + copy_v3_v3(sample->env, shi->env); + copy_v3_v3(sample->indirect, shi->indirect); + sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]); + sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2])); + sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2])); + sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco); + sample->filled = 1; + } + } + } + } + else { + shi->ao[0] = 1.0f; + shi->ao[1] = 1.0f; + shi->ao[2] = 1.0f; + + shi->env[0] = 0.0f; + shi->env[1] = 0.0f; + shi->env[2] = 0.0f; + + shi->indirect[0] = 0.0f; + shi->indirect[1] = 0.0f; + shi->indirect[2] = 0.0f; + } +} + +void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp) +{ + OcclusionTree *tree = re->occlusiontree; + PixStr ps; + OcclusionCache *cache; + OcclusionCacheSample *sample; + OccFace exclude; + ShadeInput *shi; + intptr_t *rd = NULL; + int *ro = NULL, *rp = NULL, *rz = NULL, onlyshadow; + int x, y, step = CACHE_STEP; + + if (!tree->cache) + return; + + cache = &tree->cache[pa->thread]; + cache->w = pa->rectx; + cache->h = pa->recty; + cache->x = pa->disprect.xmin; + cache->y = pa->disprect.ymin; + cache->step = step; + cache->sample = MEM_callocN(sizeof(OcclusionCacheSample) * cache->w * cache->h, "OcclusionCacheSample"); + sample = cache->sample; + + if (re->osa) { + rd = pa->rectdaps; + } + else { + /* fake pixel struct for non-osa */ + ps.next = NULL; + ps.mask = 0xFFFF; + + ro = pa->recto; + rp = pa->rectp; + rz = pa->rectz; + } + + /* compute a sample at every step pixels */ + for (y = pa->disprect.ymin; y < pa->disprect.ymax; y++) { + for (x = pa->disprect.xmin; x < pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) { + if (!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax - 1)) + continue; + if (!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax - 1)) + continue; + + if (re->osa) { + if (!*rd) continue; + + shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y); + } + else { + if (!*rp) continue; + + ps.obi = *ro; + ps.facenr = *rp; + ps.z = *rz; + shade_samples_fill_with_ps(ssamp, &ps, x, y); + } + + shi = ssamp->shi; + if (shi->vlr) { + onlyshadow = (shi->mat->mode & MA_ONLYSHADOW); + exclude.obi = shi->obi - re->objectinstance; + exclude.facenr = shi->vlr->index; + sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect); + + copy_v3_v3(sample->co, shi->co); + copy_v3_v3(sample->n, shi->vno); + copy_v3_v3(sample->ao, shi->ao); + copy_v3_v3(sample->env, shi->env); + copy_v3_v3(sample->indirect, shi->indirect); + sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]); + sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2])); + sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2])); + sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco); + sample->x = shi->xs; + sample->y = shi->ys; + sample->filled = 1; + } + + if (re->test_break(re->tbh)) + break; + } + } +} + +void free_occ_samples(Render *re, RenderPart *pa) +{ + OcclusionTree *tree = re->occlusiontree; + OcclusionCache *cache; + + if (tree->cache) { + cache = &tree->cache[pa->thread]; + + if (cache->sample) + MEM_freeN(cache->sample); + + cache->w = 0; + cache->h = 0; + cache->step = 0; + } +} + diff --git a/source/blender/render/intern/source/pipeline.c b/source/blender/render/intern/source/pipeline.c index e71cc6d063e..c9f13004836 100644 --- a/source/blender/render/intern/source/pipeline.c +++ b/source/blender/render/intern/source/pipeline.c @@ -137,7 +137,7 @@ /* here we store all renders */ static struct { ListBase renderlist; -} RenderGlobal = {{NULL, NULL}}; +} RenderGlobal = {{NULL, NULL}}; /* ********* alloc and free ******** */ @@ -424,10 +424,10 @@ void RE_AcquireResultImage(Render *re, RenderResult *rr, const int view_id) if (re->result) { RenderLayer *rl; RenderView *rv; - + rr->rectx = re->result->rectx; rr->recty = re->result->recty; - + /* actview view */ rv = RE_RenderViewGetById(re->result, view_id); rr->have_combined = (rv->rectf != NULL); @@ -494,7 +494,7 @@ Render *RE_NewRender(const char *name) /* only one render per name exists */ re = RE_GetRender(name); if (re == NULL) { - + /* new render data struct */ re = MEM_callocN(sizeof(Render), "new render"); BLI_addtail(&RenderGlobal.renderlist, re); @@ -502,7 +502,7 @@ Render *RE_NewRender(const char *name) BLI_rw_mutex_init(&re->resultmutex); BLI_rw_mutex_init(&re->partsmutex); } - + RE_InitRenderCB(re); return re; @@ -574,10 +574,10 @@ void RE_FreeRender(Render *re) /* main dbase can already be invalid now, some database-free code checks it */ re->main = NULL; re->scene = NULL; - + render_result_free(re->result); render_result_free(re->pushedresult); - + BLI_remlink(&RenderGlobal.renderlist, re); MEM_freeN(re); } @@ -715,7 +715,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd, bool had_freestyle = (re->r.mode & R_EDGE_FRS) != 0; re->ok = true; /* maybe flag */ - + re->i.starttime = PIL_check_seconds_timer(); /* copy render data and render layers for thread safety */ @@ -753,7 +753,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd, } re->r.scemode = check_mode_full_sample(&re->r); - + if (single_layer) { int index = BLI_findindex(render_layers, single_layer); if (index != -1) { @@ -761,7 +761,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd, re->r.scemode |= R_SINGLE_LAYER; } } - + /* if preview render, we try to keep old result */ BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE); @@ -794,7 +794,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd, } } else { - + /* make empty render result, so display callbacks can initialize */ render_result_free(re->result); re->result = MEM_callocN(sizeof(RenderResult), "new render result"); @@ -811,7 +811,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd, RE_parts_clamp(re); BLI_rw_mutex_unlock(&re->resultmutex); - + RE_init_threadcount(re); RE_point_density_fix_linking(); @@ -925,7 +925,7 @@ void render_update_anim_renderdata(Render *re, RenderData *rd, ListBase *render_ void RE_SetWindow(Render *re, const rctf *viewplane, float clipsta, float clipend) { /* re->ok flag? */ - + re->viewplane = *viewplane; re->clipsta = clipsta; re->clipend = clipend; @@ -934,13 +934,13 @@ void RE_SetWindow(Render *re, const rctf *viewplane, float clipsta, float clipen perspective_m4(re->winmat, re->viewplane.xmin, re->viewplane.xmax, re->viewplane.ymin, re->viewplane.ymax, re->clipsta, re->clipend); - + } void RE_SetOrtho(Render *re, const rctf *viewplane, float clipsta, float clipend) { /* re->ok flag? */ - + re->viewplane = *viewplane; re->clipsta = clipsta; re->clipend = clipend; @@ -961,7 +961,7 @@ void RE_SetView(Render *re, float mat[4][4]) void RE_GetViewPlane(Render *re, rctf *r_viewplane, rcti *r_disprect) { *r_viewplane = re->viewplane; - + /* make disprect zero when no border render, is needed to detect changes in 3d view render */ if (re->r.mode & R_BORDER) { *r_disprect = re->disprect; @@ -1028,7 +1028,7 @@ void RE_test_break_cb(Render *re, void *handle, int (*f)(void *handle)) #if 0 void RE_AddObject(Render *UNUSED(re), Object *UNUSED(ob)) { - + } #endif @@ -1121,9 +1121,9 @@ static void do_render(Render *re) /* now use renderdata and camera to set viewplane */ RE_SetCamera(re, camera); - + do_render_3d(re); - + /* when border render, check if we have to insert it in black */ render_result_uncrop(re); } @@ -1136,7 +1136,7 @@ static void render_scene(Render *re, Scene *sce, int cfra) { Render *resc = RE_NewSceneRender(sce); int winx = re->winx, winy = re->winy; - + sce->r.cfra = cfra; BKE_scene_camera_switch_update(sce); @@ -1146,7 +1146,7 @@ static void render_scene(Render *re, Scene *sce, int cfra) winx = (sce->r.size * sce->r.xsch) / 100; winy = (sce->r.size * sce->r.ysch) / 100; } - + /* initial setup */ RE_InitState(resc, re, &sce->r, &sce->view_layers, NULL, winx, winy, &re->disprect); @@ -1157,7 +1157,7 @@ static void render_scene(Render *re, Scene *sce, int cfra) resc->main = re->main; resc->scene = sce; resc->lay = sce->lay; - + /* ensure scene has depsgraph, base flags etc OK */ BKE_scene_set_background(re->main, sce); @@ -1170,7 +1170,7 @@ static void render_scene(Render *re, Scene *sce, int cfra) resc->sdh = re->sdh; resc->current_scene_update = re->current_scene_update; resc->suh = re->suh; - + do_render(resc); } @@ -1179,11 +1179,11 @@ static int composite_needs_render(Scene *sce, int this_scene) { bNodeTree *ntree = sce->nodetree; bNode *node; - + if (ntree == NULL) return 1; if (sce->use_nodes == false) return 1; if ((sce->r.scemode & R_DOCOMP) == 0) return 1; - + for (node = ntree->nodes.first; node; node = node->next) { if (node->type == CMP_NODE_R_LAYERS && (node->flag & NODE_MUTED) == 0) if (this_scene == 0 || node->id == NULL || node->id == &sce->id) @@ -1334,14 +1334,14 @@ static void tag_scenes_for_render(Render *re) { bNode *node; Scene *sce; - + for (sce = re->main->scene.first; sce; sce = sce->id.next) { sce->id.tag &= ~LIB_TAG_DOIT; #ifdef DEPSGRAPH_WORKAROUND_HACK tag_dependend_objects_for_render(re->main, sce); #endif } - + #ifdef WITH_FREESTYLE if (re->freestyle_bmain) { for (sce = re->freestyle_bmain->scene.first; sce; sce = sce->id.next) { @@ -1359,9 +1359,9 @@ static void tag_scenes_for_render(Render *re) tag_dependend_objects_for_render(re->main, re->scene); #endif } - + if (re->scene->nodetree == NULL) return; - + /* check for render-layers nodes using other scenes, we tag them LIB_TAG_DOIT */ for (node = re->scene->nodetree->nodes.first; node; node = node->next) { node->flag &= ~NODE_TEST; @@ -1397,7 +1397,7 @@ static void tag_scenes_for_render(Render *re) } } } - + } static void ntree_render_scenes(Render *re) @@ -1406,15 +1406,15 @@ static void ntree_render_scenes(Render *re) int cfra = re->scene->r.cfra; Scene *restore_scene = re->scene; bool scene_changed = false; - + if (re->scene->nodetree == NULL) return; - + tag_scenes_for_render(re); #ifdef DEPSGRAPH_WORKAROUND_GROUP_HACK tag_collections_for_render(re); #endif - + /* now foreach render-result node tagged we do a full render */ /* results are stored in a way compisitor will find it */ for (node = re->scene->nodetree->nodes.first; node; node = node->next) { @@ -1426,7 +1426,7 @@ static void ntree_render_scenes(Render *re) scene_changed |= scene != restore_scene; render_scene(re, scene, cfra); node->flag &= ~NODE_TEST; - + nodeUpdate(restore_scene->nodetree, node); } } @@ -1531,10 +1531,10 @@ static void do_render_composite(Render *re) { bNodeTree *ntree = re->scene->nodetree; int update_newframe = 0; - + /* INIT seeding, compositor can use random texture */ BLI_srandom(re->r.cfra); - + if (composite_needs_render(re->scene, 1)) { /* save memory... free all cached images */ ntreeFreeCache(ntree); @@ -1550,7 +1550,7 @@ static void do_render_composite(Render *re) /* ensure new result gets added, like for regular renders */ BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE); - + render_result_free(re->result); if ((re->r.mode & R_CROP) == 0) { render_result_disprect_to_full_resolution(re); @@ -1558,30 +1558,30 @@ static void do_render_composite(Render *re) re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM, RR_ALL_LAYERS, RR_ALL_VIEWS); BLI_rw_mutex_unlock(&re->resultmutex); - + /* scene render process already updates animsys */ update_newframe = 1; } - + /* swap render result */ if (re->r.scemode & R_SINGLE_LAYER) { BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE); render_result_single_layer_end(re); BLI_rw_mutex_unlock(&re->resultmutex); } - + if (!re->test_break(re->tbh)) { - + if (ntree) { ntreeCompositTagRender(re->scene); ntreeCompositTagAnimated(ntree); } - + if (ntree && re->scene->use_nodes && re->r.scemode & R_DOCOMP) { /* checks if there are render-result nodes that need scene */ if ((re->r.scemode & R_SINGLE_LAYER) == 0) ntree_render_scenes(re); - + if (!re->test_break(re->tbh)) { ntree->stats_draw = render_composit_stats; ntree->test_break = re->test_break; @@ -1589,16 +1589,16 @@ static void do_render_composite(Render *re) ntree->sdh = re; ntree->tbh = re->tbh; ntree->prh = re->prh; - + if (update_newframe) { /* If we have consistent depsgraph now would be a time to update them. */ } - + RenderView *rv; for (rv = re->result->views.first; rv; rv = rv->next) { ntreeCompositExecTree(re->scene, ntree, &re->r, true, G.background == 0, &re->scene->view_settings, &re->scene->display_settings, rv->name); } - + ntree->stats_draw = NULL; ntree->test_break = NULL; ntree->progress = NULL; @@ -1651,15 +1651,15 @@ int RE_seq_render_active(Scene *scene, RenderData *rd) Sequence *seq; ed = scene->ed; - + if (!(rd->scemode & R_DOSEQ) || !ed || !ed->seqbase.first) return 0; - + for (seq = ed->seqbase.first; seq; seq = seq->next) { if (seq->type != SEQ_TYPE_SOUND_RAM) return 1; } - + return 0; } @@ -1810,18 +1810,18 @@ static void do_render_all_options(Render *re) do_render_seq(re); render_seq = true; } - + re->stats_draw(re->sdh, &re->i); re->display_update(re->duh, re->result, NULL); } else { do_render_composite(re); } - + re->i.lastframetime = PIL_check_seconds_timer() - re->i.starttime; - + re->stats_draw(re->sdh, &re->i); - + /* save render result stamp if needed */ if (re->result != NULL) { camera = RE_GetCamera(re); @@ -1975,7 +1975,7 @@ static int check_composite_output(Scene *scene) bool RE_is_rendering_allowed(Scene *scene, ViewLayer *single_layer, Object *camera_override, ReportList *reports) { int scemode = check_mode_full_sample(&scene->r); - + if (scene->r.mode & R_BORDER) { if (scene->r.border.xmax <= scene->r.border.xmin || scene->r.border.ymax <= scene->r.border.ymin) @@ -1984,30 +1984,30 @@ bool RE_is_rendering_allowed(Scene *scene, ViewLayer *single_layer, Object *came return 0; } } - + if (scemode & (R_EXR_TILE_FILE | R_FULL_SAMPLE)) { char str[FILE_MAX]; - + render_result_exr_file_path(scene, "", 0, str); - + if (!BLI_file_is_writable(str)) { BKE_report(reports, RPT_ERROR, "Cannot save render buffers, check the temp default path"); return 0; } } - + if (scemode & R_DOCOMP) { if (scene->use_nodes) { if (!scene->nodetree) { BKE_report(reports, RPT_ERROR, "No node tree in scene"); return 0; } - + if (!check_composite_output(scene)) { BKE_report(reports, RPT_ERROR, "No render output node in scene"); return 0; } - + if (scemode & R_FULL_SAMPLE) { if (composite_needs_render(scene, 0) == 0) { BKE_report(reports, RPT_ERROR, "Full sample AA not supported without 3D rendering"); @@ -2016,12 +2016,12 @@ bool RE_is_rendering_allowed(Scene *scene, ViewLayer *single_layer, Object *came } } } - + /* check valid camera, without camera render is OK (compo, seq) */ if (!check_valid_camera(scene, camera_override, reports)) { return 0; } - + /* get panorama & ortho, only after camera is set */ BKE_camera_object_mode(&scene->r, camera_override ? camera_override : scene->camera); @@ -2098,19 +2098,19 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain, { int winx, winy; rcti disprect; - + /* r.xsch and r.ysch has the actual view window size * r.border is the clipping rect */ - + /* calculate actual render result and display size */ winx = (rd->size * rd->xsch) / 100; winy = (rd->size * rd->ysch) / 100; - + /* we always render smaller part, inserting it in larger image is compositor bizz, it uses disprect for it */ if (scene->r.mode & R_BORDER) { disprect.xmin = rd->border.xmin * winx; disprect.xmax = rd->border.xmax * winx; - + disprect.ymin = rd->border.ymin * winy; disprect.ymax = rd->border.ymax * winy; } @@ -2119,7 +2119,7 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain, disprect.xmax = winx; disprect.ymax = winy; } - + re->main = bmain; re->scene = scene; re->camera_override = camera_override; @@ -2134,7 +2134,7 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain, re->disprect = disprect; return 1; } - + /* check all scenes involved */ tag_scenes_for_render(re); @@ -2153,17 +2153,17 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain, ViewLayer *view_layer = BKE_view_layer_context_active_PLACEHOLDER(scene); update_physics_cache(re, scene, view_layer, anim_init); } - + if (single_layer || scene->r.scemode & R_SINGLE_LAYER) { BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE); render_result_single_layer_begin(re); BLI_rw_mutex_unlock(&re->resultmutex); } - + RE_InitState(re, NULL, &scene->r, &scene->view_layers, single_layer, winx, winy, &disprect); if (!re->ok) /* if an error was printed, abort */ return 0; - + /* initstate makes new result, have to send changed tags around */ ntreeCompositTagRender(re->scene); @@ -2171,7 +2171,7 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain, re->display_init(re->dih, re->result); re->display_clear(re->dch, re->result); - + return 1; } @@ -2188,9 +2188,9 @@ void RE_BlenderFrame(Render *re, Main *bmain, Scene *scene, ViewLayer *single_la /* ugly global still... is to prevent preview events and signal subsurfs etc to make full resol */ G.is_rendering = true; - + scene->r.cfra = frame; - + if (render_initialize_from_main(re, &scene->r, bmain, scene, single_layer, camera_override, lay_override, 0, 0)) { @@ -2473,12 +2473,12 @@ static int do_write_image_or_movie(Render *re, Main *bmain, Scene *scene, bMovie /* write images as individual images or stereo */ ok = RE_WriteRenderViewsImage(re->reports, &rres, scene, true, name); } - + RE_ReleaseResultImageViews(re, &rres); render_time = re->i.lastframetime; re->i.lastframetime = PIL_check_seconds_timer() - re->i.starttime; - + BLI_timecode_string_from_time_simple(name, sizeof(name), re->i.lastframetime); printf(" Time: %s", name); @@ -2489,7 +2489,7 @@ static int do_write_image_or_movie(Render *re, Main *bmain, Scene *scene, bMovie BLI_timecode_string_from_time_simple(name, sizeof(name), re->i.lastframetime - render_time); printf(" (Saving: %s)\n", name); - + fputc('\n', stdout); fflush(stdout); /* needed for renderd !! (not anymore... (ton)) */ @@ -2713,10 +2713,10 @@ void RE_BlenderAnim(Render *re, Main *bmain, Scene *scene, Object *camera_overri /* run callbacs before rendering, before the scene is updated */ BLI_callback_exec(re->main, (ID *)scene, BLI_CB_EVT_RENDER_PRE); - + do_render_all_options(re); totrendered++; - + if (re->test_break(re->tbh) == 0) { if (!G.is_break) if (!do_write_image_or_movie(re, bmain, scene, mh, totvideos, NULL)) @@ -2724,7 +2724,7 @@ void RE_BlenderAnim(Render *re, Main *bmain, Scene *scene, Object *camera_overri } else G.is_break = true; - + if (G.is_break == true) { /* remove touched file */ if (is_movie == false) { @@ -2753,7 +2753,7 @@ void RE_BlenderAnim(Render *re, Main *bmain, Scene *scene, Object *camera_overri } } } - + break; } @@ -2763,12 +2763,12 @@ void RE_BlenderAnim(Render *re, Main *bmain, Scene *scene, Object *camera_overri } } } - + /* end movie */ if (is_movie) { re_movie_free_all(re, mh, totvideos); } - + if (totskipped && totrendered == 0) BKE_report(re->reports, RPT_INFO, "No frames rendered, skipped to not overwrite"); @@ -2812,16 +2812,16 @@ bool RE_ReadRenderResult(Scene *scene, Scene *scenode) int winx, winy; bool success; rcti disprect; - + /* calculate actual render result and display size */ winx = (scene->r.size * scene->r.xsch) / 100; winy = (scene->r.size * scene->r.ysch) / 100; - + /* only in movie case we render smaller part */ if (scene->r.mode & R_BORDER) { disprect.xmin = scene->r.border.xmin * winx; disprect.xmax = scene->r.border.xmax * winx; - + disprect.ymin = scene->r.border.ymin * winy; disprect.ymax = scene->r.border.ymax * winy; } @@ -2830,17 +2830,17 @@ bool RE_ReadRenderResult(Scene *scene, Scene *scenode) disprect.xmax = winx; disprect.ymax = winy; } - + if (scenode) scene = scenode; - + /* get render: it can be called from UI with draw callbacks */ re = RE_GetSceneRender(scene); if (re == NULL) re = RE_NewSceneRender(scene); RE_InitState(re, NULL, &scene->r, &scene->view_layers, NULL, winx, winy, &disprect); re->scene = scene; - + BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE); success = render_result_exr_file_cache_read(re); BLI_rw_mutex_unlock(&re->resultmutex); @@ -2850,7 +2850,7 @@ bool RE_ReadRenderResult(Scene *scene, Scene *scenode) return success; } -void RE_init_threadcount(Render *re) +void RE_init_threadcount(Render *re) { re->r.threads = BKE_render_num_threads(&re->r); } @@ -3014,7 +3014,7 @@ RenderPass *RE_create_gp_pass(RenderResult *rr, const char *layername, const cha rl->rectx = rr->rectx; rl->recty = rr->recty; } - + /* clear previous pass if exist or the new image will be over previous one*/ RenderPass *rp = RE_pass_find_by_name(rl, RE_PASSNAME_COMBINED, viewname); if (rp) { diff --git a/source/blender/render/intern/source/pixelblending.c b/source/blender/render/intern/source/pixelblending.c new file mode 100644 index 00000000000..c7cfe765f5b --- /dev/null +++ b/source/blender/render/intern/source/pixelblending.c @@ -0,0 +1,400 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributor(s): Full recode, 2004-2006 Blender Foundation + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/pixelblending.c + * \ingroup render + * + * Functions to blend pixels with or without alpha, in various formats + * nzc - June 2000 + */ + + +#include <math.h> +#include <string.h> + +/* global includes */ + +/* own includes */ +#include "render_types.h" +#include "pixelblending.h" + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + + +/* ------------------------------------------------------------------------- */ +/* Debug/behavior defines */ +/* if defined: alpha blending with floats clips color, as with shorts */ +/* #define RE_FLOAT_COLOR_CLIPPING */ +/* if defined: alpha values are clipped */ +/* For now, we just keep alpha clipping. We run into thresholding and */ +/* blending difficulties otherwise. Be careful here. */ +#define RE_ALPHA_CLIPPING + + + +/* Threshold for a 'full' pixel: pixels with alpha above this level are */ +/* considered opaque This is the decimal value for 0xFFF0 / 0xFFFF */ +#define RE_FULL_COLOR_FLOAT 0.9998f +/* Threshold for an 'empty' pixel: pixels with alpha above this level are */ +/* considered completely transparent. This is the decimal value */ +/* for 0x000F / 0xFFFF */ +#define RE_EMPTY_COLOR_FLOAT 0.0002f + + +/* ------------------------------------------------------------------------- */ + +void addAlphaOverFloat(float dest[4], const float source[4]) +{ + /* d = s + (1-alpha_s)d*/ + float mul; + + mul = 1.0f - source[3]; + + dest[0] = (mul * dest[0]) + source[0]; + dest[1] = (mul * dest[1]) + source[1]; + dest[2] = (mul * dest[2]) + source[2]; + dest[3] = (mul * dest[3]) + source[3]; + +} + + +/* ------------------------------------------------------------------------- */ + +void addAlphaUnderFloat(float dest[4], const float source[4]) +{ + float mul; + + mul = 1.0f - dest[3]; + + dest[0] += (mul * source[0]); + dest[1] += (mul * source[1]); + dest[2] += (mul * source[2]); + dest[3] += (mul * source[3]); +} + + +/* ------------------------------------------------------------------------- */ +void addalphaAddfacFloat(float dest[4], const float source[4], char addfac) +{ + float m; /* weiging factor of destination */ + float c; /* intermediate color */ + + /* Addfac is a number between 0 and 1: rescale */ + /* final target is to diminish the influence of dest when addfac rises */ + m = 1.0f - (source[3] * ((255 - addfac) / 255.0f)); + + /* blend colors*/ + c = (m * dest[0]) + source[0]; +#ifdef RE_FLOAT_COLOR_CLIPPING + if (c >= RE_FULL_COLOR_FLOAT) dest[0] = RE_FULL_COLOR_FLOAT; + else +#endif + dest[0] = c; + + c = (m * dest[1]) + source[1]; +#ifdef RE_FLOAT_COLOR_CLIPPING + if (c >= RE_FULL_COLOR_FLOAT) dest[1] = RE_FULL_COLOR_FLOAT; + else +#endif + dest[1] = c; + + c = (m * dest[2]) + source[2]; +#ifdef RE_FLOAT_COLOR_CLIPPING + if (c >= RE_FULL_COLOR_FLOAT) dest[2] = RE_FULL_COLOR_FLOAT; + else +#endif + dest[2] = c; + + c = (m * dest[3]) + source[3]; +#ifdef RE_ALPHA_CLIPPING + if (c >= RE_FULL_COLOR_FLOAT) dest[3] = RE_FULL_COLOR_FLOAT; + else +#endif + dest[3] = c; + +} + + +/* ------------------------------------------------------------------------- */ + +/* filtered adding to scanlines */ +void add_filt_fmask(unsigned int mask, const float col[4], float *rowbuf, int row_w) +{ + /* calc the value of mask */ + float **fmask1 = R.samples->fmask1, **fmask2 = R.samples->fmask2; + float *rb1, *rb2, *rb3; + float val, r, g, b, al; + unsigned int a, maskand, maskshift; + int j; + + r = col[0]; + g = col[1]; + b = col[2]; + al = col[3]; + + rb2 = rowbuf - 4; + rb3 = rb2 - 4 * row_w; + rb1 = rb2 + 4 * row_w; + + maskand = (mask & 255); + maskshift = (mask >> 8); + + for (j = 2; j >= 0; j--) { + + a = j; + + val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift); + if (val != 0.0f) { + rb1[0] += val * r; + rb1[1] += val * g; + rb1[2] += val * b; + rb1[3] += val * al; + } + a += 3; + + val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift); + if (val != 0.0f) { + rb2[0] += val * r; + rb2[1] += val * g; + rb2[2] += val * b; + rb2[3] += val * al; + } + a += 3; + + val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift); + if (val != 0.0f) { + rb3[0] += val * r; + rb3[1] += val * g; + rb3[2] += val * b; + rb3[3] += val * al; + } + + rb1 += 4; + rb2 += 4; + rb3 += 4; + } +} + + +void mask_array(unsigned int mask, float filt[3][3]) +{ + float **fmask1 = R.samples->fmask1, **fmask2 = R.samples->fmask2; + unsigned int maskand = (mask & 255); + unsigned int maskshift = (mask >> 8); + int a, j; + + for (j = 2; j >= 0; j--) { + + a = j; + + filt[2][2 - j] = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift); + + a += 3; + + filt[1][2 - j] = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift); + + a += 3; + + filt[0][2 - j] = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift); + } +} + + +/** + * Index ordering, scanline based: + * + * <pre> + * --- --- --- + * | 2,0 | 2,1 | 2,2 | + * --- --- --- + * | 1,0 | 1,1 | 1,2 | + * --- --- --- + * | 0,0 | 0,1 | 0,2 | + * --- --- --- + * </pre> + */ + +void add_filt_fmask_coord(float filt[3][3], const float col[4], float *rowbuf, int row_stride, int x, int y, rcti *mask) +{ + float *fpoin[3][3]; + float val, r, g, b, al, lfilt[3][3]; + + r = col[0]; + g = col[1]; + b = col[2]; + al = col[3]; + + memcpy(lfilt, filt, sizeof(lfilt)); + + fpoin[0][1] = rowbuf - 4 * row_stride; + fpoin[1][1] = rowbuf; + fpoin[2][1] = rowbuf + 4 * row_stride; + + fpoin[0][0] = fpoin[0][1] - 4; + fpoin[1][0] = fpoin[1][1] - 4; + fpoin[2][0] = fpoin[2][1] - 4; + + fpoin[0][2] = fpoin[0][1] + 4; + fpoin[1][2] = fpoin[1][1] + 4; + fpoin[2][2] = fpoin[2][1] + 4; + + /* limit filtering to withing a mask for border rendering, so pixels don't + * leak outside of the border */ + if (y <= mask->ymin) { + fpoin[0][0] = fpoin[1][0]; + fpoin[0][1] = fpoin[1][1]; + fpoin[0][2] = fpoin[1][2]; + /* filter needs the opposite value yes! */ + lfilt[0][0] = filt[2][0]; + lfilt[0][1] = filt[2][1]; + lfilt[0][2] = filt[2][2]; + } + else if (y >= mask->ymax - 1) { + fpoin[2][0] = fpoin[1][0]; + fpoin[2][1] = fpoin[1][1]; + fpoin[2][2] = fpoin[1][2]; + + lfilt[2][0] = filt[0][0]; + lfilt[2][1] = filt[0][1]; + lfilt[2][2] = filt[0][2]; + } + + if (x <= mask->xmin) { + fpoin[2][0] = fpoin[2][1]; + fpoin[1][0] = fpoin[1][1]; + fpoin[0][0] = fpoin[0][1]; + + lfilt[2][0] = filt[2][2]; + lfilt[1][0] = filt[1][2]; + lfilt[0][0] = filt[0][2]; + } + else if (x >= mask->xmax - 1) { + fpoin[2][2] = fpoin[2][1]; + fpoin[1][2] = fpoin[1][1]; + fpoin[0][2] = fpoin[0][1]; + + lfilt[2][2] = filt[2][0]; + lfilt[1][2] = filt[1][0]; + lfilt[0][2] = filt[0][0]; + } + + + /* loop unroll */ +#define MASKFILT(i, j) \ + val = lfilt[i][j]; \ + if (val != 0.0f) { \ + float *fp = fpoin[i][j]; \ + fp[0] += val * r; \ + fp[1] += val * g; \ + fp[2] += val * b; \ + fp[3] += val * al; \ + } (void)0 + + MASKFILT(0, 0); + MASKFILT(0, 1); + MASKFILT(0, 2); + MASKFILT(1, 0); + MASKFILT(1, 1); + MASKFILT(1, 2); + MASKFILT(2, 0); + MASKFILT(2, 1); + MASKFILT(2, 2); + +#undef MASKFILT +} + +void add_filt_fmask_pixsize(unsigned int mask, float *in, float *rowbuf, int row_w, int pixsize) +{ + /* calc the value of mask */ + float **fmask1 = R.samples->fmask1, **fmask2 = R.samples->fmask2; + float *rb1, *rb2, *rb3; + float val; + unsigned int a, maskand, maskshift; + int i, j; + + rb2 = rowbuf - pixsize; + rb3 = rb2 - pixsize * row_w; + rb1 = rb2 + pixsize * row_w; + + maskand = (mask & 255); + maskshift = (mask >> 8); + + for (j = 2; j >= 0; j--) { + + a = j; + + val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift); + if (val != 0.0f) { + for (i = 0; i < pixsize; i++) + rb1[i] += val * in[i]; + } + a += 3; + + val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift); + if (val != 0.0f) { + for (i = 0; i < pixsize; i++) + rb2[i] += val * in[i]; + } + a += 3; + + val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift); + if (val != 0.0f) { + for (i = 0; i < pixsize; i++) + rb3[i] += val * in[i]; + } + + rb1 += pixsize; + rb2 += pixsize; + rb3 += pixsize; + } +} + +/* ------------------------------------------------------------------------- */ +void addalphaAddFloat(float dest[4], const float source[4]) +{ + + /* Makes me wonder whether this is required... */ + if (dest[3] < RE_EMPTY_COLOR_FLOAT) { + dest[0] = source[0]; + dest[1] = source[1]; + dest[2] = source[2]; + dest[3] = source[3]; + return; + } + + /* no clipping! */ + dest[0] = dest[0] + source[0]; + dest[1] = dest[1] + source[1]; + dest[2] = dest[2] + source[2]; + dest[3] = dest[3] + source[3]; + +} + + +/* ---------------------------------------------------------------------------- */ diff --git a/source/blender/render/intern/source/pixelshading.c b/source/blender/render/intern/source/pixelshading.c new file mode 100644 index 00000000000..7f202629ce4 --- /dev/null +++ b/source/blender/render/intern/source/pixelshading.c @@ -0,0 +1,650 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributor(s): 2004-2006, Blender Foundation, full recode + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/pixelshading.c + * \ingroup render + */ + + +#include <float.h> +#include <math.h> +#include <string.h> + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +/* External modules: */ + +#include "DNA_group_types.h" +#include "DNA_material_types.h" +#include "DNA_object_types.h" +#include "DNA_image_types.h" +#include "DNA_texture_types.h" +#include "DNA_lamp_types.h" + +#include "BKE_material.h" + + +/* own module */ +#include "render_types.h" +#include "renderdatabase.h" +#include "texture.h" +#include "rendercore.h" +#include "shadbuf.h" +#include "pixelshading.h" +#include "sunsky.h" + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + + +extern const float hashvectf[]; + +static void render_lighting_halo(HaloRen *har, float col_r[3]) +{ + GroupObject *go; + LampRen *lar; + float i, inp, inpr, rco[3], dco[3], lv[3], lampdist, ld, t, *vn; + float ir, ig, ib, shadfac, soft, lacol[3]; + + ir= ig= ib= 0.0; + + copy_v3_v3(rco, har->co); + dco[0]=dco[1]=dco[2]= 1.0f/har->rad; + + vn= har->no; + + for (go=R.lights.first; go; go= go->next) { + lar= go->lampren; + + /* test for lamplayer */ + if (lar->mode & LA_LAYER) if ((lar->lay & har->lay)==0) continue; + + /* lampdist cacluation */ + if (lar->type==LA_SUN || lar->type==LA_HEMI) { + copy_v3_v3(lv, lar->vec); + lampdist= 1.0; + } + else { + lv[0]= rco[0]-lar->co[0]; + lv[1]= rco[1]-lar->co[1]; + lv[2]= rco[2]-lar->co[2]; + ld = len_v3(lv); + lv[0]/= ld; + lv[1]/= ld; + lv[2]/= ld; + + /* ld is re-used further on (texco's) */ + + if (lar->mode & LA_QUAD) { + t= 1.0; + if (lar->ld1>0.0f) + t= lar->dist/(lar->dist+lar->ld1*ld); + if (lar->ld2>0.0f) + t*= lar->distkw/(lar->distkw+lar->ld2*ld*ld); + + lampdist= t; + } + else { + lampdist= (lar->dist/(lar->dist+ld)); + } + + if (lar->mode & LA_SPHERE) { + t= lar->dist - ld; + if (t<0.0f) continue; + + t/= lar->dist; + lampdist*= (t); + } + + } + + lacol[0]= lar->r; + lacol[1]= lar->g; + lacol[2]= lar->b; + + if (lar->mode & LA_TEXTURE) { + ShadeInput shi; + + /* Warning, This is not that nice, and possibly a bit slow, + * however some variables were not initialized properly in, unless using shade_input_initialize(...), + * we need to do a memset */ + memset(&shi, 0, sizeof(ShadeInput)); + /* end warning! - Campbell */ + + copy_v3_v3(shi.co, rco); + shi.osatex= 0; + do_lamp_tex(lar, lv, &shi, lacol, LA_TEXTURE); + } + + if (lar->type==LA_SPOT) { + + if (lar->mode & LA_SQUARE) { + if (lv[0]*lar->vec[0]+lv[1]*lar->vec[1]+lv[2]*lar->vec[2]>0.0f) { + float x, lvrot[3]; + + /* rotate view to lampspace */ + copy_v3_v3(lvrot, lv); + mul_m3_v3(lar->imat, lvrot); + + x = max_ff(fabsf(lvrot[0]/lvrot[2]), fabsf(lvrot[1]/lvrot[2])); + /* 1.0/(sqrt(1+x*x)) is equivalent to cos(atan(x)) */ + + inpr = 1.0f / (sqrtf(1.0f + x * x)); + } + else inpr= 0.0; + } + else { + inpr= lv[0]*lar->vec[0]+lv[1]*lar->vec[1]+lv[2]*lar->vec[2]; + } + + t= lar->spotsi; + if (inpr<t) continue; + else { + t= inpr-t; + soft= 1.0; + if (t<lar->spotbl && lar->spotbl!=0.0f) { + /* soft area */ + i= t/lar->spotbl; + t= i*i; + soft= (3.0f*t-2.0f*t*i); + inpr*= soft; + } + if (lar->mode & LA_ONLYSHADOW) { + /* if (ma->mode & MA_SHADOW) { */ + /* dot product positive: front side face! */ + inp= vn[0]*lv[0] + vn[1]*lv[1] + vn[2]*lv[2]; + if (inp>0.0f) { + /* testshadowbuf==0.0 : 100% shadow */ + shadfac = testshadowbuf(&R, lar->shb, rco, dco, dco, inp, 0.0f); + if ( shadfac>0.0f ) { + shadfac*= inp*soft*lar->energy; + ir -= shadfac; + ig -= shadfac; + ib -= shadfac; + + continue; + } + } + /* } */ + } + lampdist*=inpr; + } + if (lar->mode & LA_ONLYSHADOW) continue; + + } + + /* dot product and reflectivity*/ + + inp = 1.0f - fabsf(dot_v3v3(vn, lv)); + + /* inp= cos(0.5*M_PI-acos(inp)); */ + + i= inp; + + if (lar->type==LA_HEMI) { + i= 0.5f*i+0.5f; + } + if (i>0.0f) { + i*= lampdist; + } + + /* shadow */ + if (i> -0.41f) { /* heuristic valua! */ + if (lar->shb) { + shadfac = testshadowbuf(&R, lar->shb, rco, dco, dco, inp, 0.0f); + if (shadfac==0.0f) continue; + i*= shadfac; + } + } + + if (i>0.0f) { + ir+= i*lacol[0]; + ig+= i*lacol[1]; + ib+= i*lacol[2]; + } + } + + if (ir<0.0f) ir= 0.0f; + if (ig<0.0f) ig= 0.0f; + if (ib<0.0f) ib= 0.0f; + + col_r[0]*= ir; + col_r[1]*= ig; + col_r[2]*= ib; + +} + + +/** + * Converts a halo z-buffer value to distance from the camera's near plane + * \param z The z-buffer value to convert + * \return a distance from the camera's near plane in blender units + */ +static float haloZtoDist(int z) +{ + float zco = 0; + + if (z >= 0x7FFFFF) + return 10e10; + else { + zco = (float)z/(float)0x7FFFFF; + if (R.r.mode & R_ORTHO) + return (R.winmat[3][2] - zco*R.winmat[3][3])/(R.winmat[2][2]); + else + return (R.winmat[3][2])/(R.winmat[2][2] - R.winmat[2][3]*zco); + } +} + +/** + * \param col (float[4]) Store the rgb color here (with alpha) + * The alpha is used to blend the color to the background + * color_new = (1-alpha)*color_background + color + * \param zz The current zbuffer value at the place of this pixel + * \param dist Distance of the pixel from the center of the halo squared. Given in pixels + * \param xn The x coordinate of the pixel relaticve to the center of the halo. given in pixels + * \param yn The y coordinate of the pixel relaticve to the center of the halo. given in pixels + */ +int shadeHaloFloat(HaloRen *har, float col[4], int zz, + float dist, float xn, float yn, short flarec) +{ + /* fill in col */ + float t, zn, radist, ringf=0.0f, linef=0.0f, alpha, si, co; + int a; + + if (R.wrld.mode & WO_MIST) { + if (har->type & HA_ONLYSKY) { + alpha= har->alfa; + } + else { + /* a bit patchy... */ + alpha= mistfactor(-har->co[2], har->co)*har->alfa; + } + } + else alpha= har->alfa; + + if (alpha==0.0f) + return 0; + + /* soften the halo if it intersects geometry */ + if (har->mat && har->mat->mode & MA_HALO_SOFT) { + float segment_length, halo_depth, distance_from_z /* , visible_depth */ /* UNUSED */, soften; + + /* calculate halo depth */ + segment_length= har->hasize*sasqrt(1.0f - dist/(har->rad*har->rad)); + halo_depth= 2.0f*segment_length; + + if (halo_depth < FLT_EPSILON) + return 0; + + /* calculate how much of this depth is visible */ + distance_from_z = haloZtoDist(zz) - haloZtoDist(har->zs); + /* visible_depth = halo_depth; */ /* UNUSED */ + if (distance_from_z < segment_length) { + soften= (segment_length + distance_from_z)/halo_depth; + + /* apply softening to alpha */ + if (soften < 1.0f) + alpha *= soften; + if (alpha <= 0.0f) + return 0; + } + } + else { + /* not a soft halo. use the old softening code */ + /* halo being intersected? */ + if (har->zs> zz-har->zd) { + t= ((float)(zz-har->zs))/(float)har->zd; + alpha*= sqrtf(sqrtf(t)); + } + } + + radist = sqrtf(dist); + + /* watch it: not used nicely: flarec is set at zero in pixstruct */ + if (flarec) har->pixels+= (int)(har->rad-radist); + + if (har->ringc) { + const float *rc; + float fac; + int ofs; + + /* per ring an antialised circle */ + ofs= har->seed; + + for (a= har->ringc; a>0; a--, ofs+=2) { + + rc= hashvectf + (ofs % 768); + + fac = fabsf(rc[1] * (har->rad * fabsf(rc[0]) - radist)); + + if (fac< 1.0f) { + ringf+= (1.0f-fac); + } + } + } + + if (har->type & HA_VECT) { + dist= fabsf(har->cos * (yn) - har->sin * (xn)) / har->rad; + if (dist>1.0f) dist= 1.0f; + if (har->tex) { + zn= har->sin*xn - har->cos*yn; + yn= har->cos*xn + har->sin*yn; + xn= zn; + } + } + else dist= dist/har->radsq; + + if (har->type & HA_FLARECIRC) { + dist = 0.5f + fabsf(dist - 0.5f); + } + + if (har->hard>=30) { + dist = sqrtf(dist); + if (har->hard>=40) { + dist = sinf(dist*(float)M_PI_2); + if (har->hard>=50) { + dist = sqrtf(dist); + } + } + } + else if (har->hard<20) dist*=dist; + + if (dist < 1.0f) + dist= (1.0f-dist); + else + dist= 0.0f; + + if (har->linec) { + const float *rc; + float fac; + int ofs; + + /* per starpoint an antialiased line */ + ofs= har->seed; + + for (a= har->linec; a>0; a--, ofs+=3) { + + rc= hashvectf + (ofs % 768); + + fac = fabsf((xn) * rc[0] + (yn) * rc[1]); + + if (fac< 1.0f ) + linef+= (1.0f-fac); + } + + linef*= dist; + } + + if (har->starpoints) { + float ster, angle; + /* rotation */ + angle = atan2f(yn, xn); + angle *= (1.0f+0.25f*har->starpoints); + + co= cosf(angle); + si= sinf(angle); + + angle= (co*xn+si*yn)*(co*yn-si*xn); + + ster = fabsf(angle); + if (ster>1.0f) { + ster= (har->rad)/(ster); + + if (ster<1.0f) dist*= sqrtf(ster); + } + } + + /* disputable optimize... (ton) */ + if (dist<=0.00001f) + return 0; + + dist*= alpha; + ringf*= dist; + linef*= alpha; + + /* The color is either the rgb spec-ed by the user, or extracted from */ + /* the texture */ + if (har->tex) { + col[0]= har->r; + col[1]= har->g; + col[2]= har->b; + col[3]= dist; + + do_halo_tex(har, xn, yn, col); + + col[0]*= col[3]; + col[1]*= col[3]; + col[2]*= col[3]; + + } + else { + col[0]= dist*har->r; + col[1]= dist*har->g; + col[2]= dist*har->b; + if (har->type & HA_XALPHA) col[3]= dist*dist; + else col[3]= dist; + } + + if (har->mat) { + if (har->mat->mode & MA_HALO_SHADE) { + /* we test for lights because of preview... */ + if (R.lights.first) render_lighting_halo(har, col); + } + + /* Next, we do the line and ring factor modifications. */ + if (linef!=0.0f) { + Material *ma= har->mat; + + col[0]+= linef * ma->specr; + col[1]+= linef * ma->specg; + col[2]+= linef * ma->specb; + + if (har->type & HA_XALPHA) col[3]+= linef*linef; + else col[3]+= linef; + } + if (ringf!=0.0f) { + Material *ma= har->mat; + + col[0]+= ringf * ma->mirr; + col[1]+= ringf * ma->mirg; + col[2]+= ringf * ma->mirb; + + if (har->type & HA_XALPHA) col[3]+= ringf*ringf; + else col[3]+= ringf; + } + } + + /* alpha requires clip, gives black dots */ + if (col[3] > 1.0f) + col[3]= 1.0f; + + return 1; +} + +/* ------------------------------------------------------------------------- */ + +/* Only view vector is important here. Result goes to col_r[3] */ +void shadeSkyView(float col_r[3], const float rco[3], const float view[3], const float dxyview[2], short thread) +{ + float zen[3], hor[3], blend, blendm; + int skyflag; + + /* flag indicating if we render the top hemisphere */ + skyflag = WO_ZENUP; + + /* Some view vector stuff. */ + if (R.wrld.skytype & WO_SKYREAL) { + + blend = dot_v3v3(view, R.grvec); + + if (blend<0.0f) skyflag= 0; + + blend = fabsf(blend); + } + else if (R.wrld.skytype & WO_SKYPAPER) { + blend= 0.5f + 0.5f * view[1]; + } + else { + /* the fraction of how far we are above the bottom of the screen */ + blend = fabsf(0.5f + view[1]); + } + + copy_v3_v3(hor, &R.wrld.horr); + copy_v3_v3(zen, &R.wrld.zenr); + + /* Careful: SKYTEX and SKYBLEND are NOT mutually exclusive! If */ + /* SKYBLEND is active, the texture and color blend are added. */ + if (R.wrld.skytype & WO_SKYTEX) { + float lo[3]; + copy_v3_v3(lo, view); + if (R.wrld.skytype & WO_SKYREAL) { + + mul_m3_v3(R.imat, lo); + + SWAP(float, lo[1], lo[2]); + + } + do_sky_tex(rco, view, lo, dxyview, hor, zen, &blend, skyflag, thread); + } + + if (blend>1.0f) blend= 1.0f; + blendm= 1.0f-blend; + + /* No clipping, no conversion! */ + if (R.wrld.skytype & WO_SKYBLEND) { + col_r[0] = (blendm*hor[0] + blend*zen[0]); + col_r[1] = (blendm*hor[1] + blend*zen[1]); + col_r[2] = (blendm*hor[2] + blend*zen[2]); + } + else { + /* Done when a texture was grabbed. */ + col_r[0]= hor[0]; + col_r[1]= hor[1]; + col_r[2]= hor[2]; + } +} + +/* shade sky according to sun lamps, all parameters are like shadeSkyView except sunsky*/ +void shadeSunView(float col_r[3], const float view[3]) +{ + GroupObject *go; + LampRen *lar; + float sview[3]; + bool do_init = true; + + for (go=R.lights.first; go; go= go->next) { + lar= go->lampren; + if (lar->type==LA_SUN && lar->sunsky && (lar->sunsky->effect_type & LA_SUN_EFFECT_SKY)) { + float sun_collector[3]; + float colorxyz[3]; + + if (do_init) { + + normalize_v3_v3(sview, view); + mul_m3_v3(R.imat, sview); + if (sview[2] < 0.0f) + sview[2] = 0.0f; + normalize_v3(sview); + do_init = false; + } + + GetSkyXYZRadiancef(lar->sunsky, sview, colorxyz); + xyz_to_rgb(colorxyz[0], colorxyz[1], colorxyz[2], &sun_collector[0], &sun_collector[1], &sun_collector[2], + lar->sunsky->sky_colorspace); + + ramp_blend(lar->sunsky->skyblendtype, col_r, lar->sunsky->skyblendfac, sun_collector); + } + } +} + + +/* + * Stuff the sky color into the collector. + */ +void shadeSkyPixel(float collector[4], float fx, float fy, short thread) +{ + float view[3], dxyview[2]; + + /* + * The rules for sky: + * 1. Draw an image, if a background image was provided. Stop + * 2. get texture and color blend, and combine these. + */ + + float fac; + + if ((R.wrld.skytype & (WO_SKYBLEND+WO_SKYTEX))==0) { + /* 1. solid color */ + copy_v3_v3(collector, &R.wrld.horr); + + collector[3] = 0.0f; + } + else { + /* 2. */ + + /* This one true because of the context of this routine */ + if (R.wrld.skytype & WO_SKYPAPER) { + view[0]= -1.0f + 2.0f*(fx/(float)R.winx); + view[1]= -1.0f + 2.0f*(fy/(float)R.winy); + view[2]= 0.0; + + dxyview[0]= 1.0f/(float)R.winx; + dxyview[1]= 1.0f/(float)R.winy; + } + else { + calc_view_vector(view, fx, fy); + fac= normalize_v3(view); + + if (R.wrld.skytype & WO_SKYTEX) { + dxyview[0]= -R.viewdx/fac; + dxyview[1]= -R.viewdy/fac; + } + } + + /* get sky color in the collector */ + shadeSkyView(collector, NULL, view, dxyview, thread); + collector[3] = 0.0f; + } + + calc_view_vector(view, fx, fy); + shadeSunView(collector, view); +} + +/* aerial perspective */ +void shadeAtmPixel(struct SunSky *sunsky, float collector[3], float fx, float fy, float distance) +{ + float view[3]; + + calc_view_vector(view, fx, fy); + normalize_v3(view); + /*mul_m3_v3(R.imat, view);*/ + AtmospherePixleShader(sunsky, view, distance, collector); +} + +/* eof */ diff --git a/source/blender/render/intern/source/pointdensity.c b/source/blender/render/intern/source/pointdensity.c index 53359c305dc..c025a1fdef7 100644 --- a/source/blender/render/intern/source/pointdensity.c +++ b/source/blender/render/intern/source/pointdensity.c @@ -102,7 +102,7 @@ static void point_data_pointers(PointDensity *pd, const int totpoint = pd->totpoints; float *data = pd->point_data; int offset = 0; - + if (data_used & POINT_DATA_VEL) { if (r_data_velocity) *r_data_velocity = data + offset; @@ -112,7 +112,7 @@ static void point_data_pointers(PointDensity *pd, if (r_data_velocity) *r_data_velocity = NULL; } - + if (data_used & POINT_DATA_LIFE) { if (r_data_life) *r_data_life = data + offset; @@ -122,7 +122,7 @@ static void point_data_pointers(PointDensity *pd, if (r_data_life) *r_data_life = NULL; } - + if (data_used & POINT_DATA_COLOR) { if (r_data_color) *r_data_color = data + offset; @@ -283,19 +283,19 @@ static void pointdensity_cache_vertex_color(PointDensity *pd, Object *UNUSED(ob) const MLoopCol *mcol; char layername[MAX_CUSTOMDATA_LAYER_NAME]; int i; - + BLI_assert(data_color); - + if (!CustomData_has_layer(&mesh->ldata, CD_MLOOPCOL)) return; CustomData_validate_layer_name(&mesh->ldata, CD_MLOOPCOL, pd->vertex_attribute_name, layername); mcol = CustomData_get_layer_named(&mesh->ldata, CD_MLOOPCOL, layername); if (!mcol) return; - + /* Stores the number of MLoops using the same vertex, so we can normalize colors. */ int *mcorners = MEM_callocN(sizeof(int) * pd->totpoints, "point density corner count"); - + for (i = 0; i < totloop; i++) { int v = mloop[i].v; @@ -310,7 +310,7 @@ static void pointdensity_cache_vertex_color(PointDensity *pd, Object *UNUSED(ob) ++mcorners[v]; } - + /* Normalize colors by averaging over mcorners. * All the corners share the same vertex, ie. occupy the same point in space. */ @@ -318,7 +318,7 @@ static void pointdensity_cache_vertex_color(PointDensity *pd, Object *UNUSED(ob) if (mcorners[i] > 0) mul_v3_fl(&data_color[i*3], 1.0f / mcorners[i]); } - + MEM_freeN(mcorners); } @@ -328,9 +328,9 @@ static void pointdensity_cache_vertex_weight(PointDensity *pd, Object *ob, Mesh const MDeformVert *mdef, *dv; int mdef_index; int i; - + BLI_assert(data_color); - + mdef = CustomData_get_layer(&mesh->vdata, CD_MDEFORMVERT); if (!mdef) return; @@ -339,11 +339,11 @@ static void pointdensity_cache_vertex_weight(PointDensity *pd, Object *ob, Mesh mdef_index = ob->actdef - 1; if (mdef_index < 0) return; - + for (i = 0, dv = mdef; i < totvert; ++i, ++dv, data_color += 3) { MDeformWeight *dw; int j; - + for (j = 0, dw = dv->dw; j < dv->totweight; ++j, ++dw) { if (dw->def_nr == mdef_index) { copy_v3_fl(data_color, dw->weight); @@ -357,9 +357,9 @@ static void pointdensity_cache_vertex_normal(PointDensity *pd, Object *UNUSED(ob { MVert *mvert = mesh->mvert, *mv; int i; - + BLI_assert(data_color); - + for (i = 0, mv = mvert; i < pd->totpoints; i++, mv++, data_color += 3) { normal_short_to_float_v3(data_color, mv->no); } @@ -413,7 +413,7 @@ static void pointdensity_cache_object(PointDensity *pd, BLI_bvhtree_insert(pd->point_tree, i, co, 1); } - + switch (pd->ob_color_source) { case TEX_PD_COLOR_VERTCOL: pointdensity_cache_vertex_color(pd, ob, mesh, data_color); @@ -506,7 +506,7 @@ static float density_falloff(PointDensityRangeData *pdr, int index, float square { const float dist = (pdr->squared_radius - squared_dist) / pdr->squared_radius * 0.5f; float density = 0.0f; - + switch (pdr->falloff_type) { case TEX_PD_FALLOFF_STD: density = dist; @@ -536,12 +536,12 @@ static float density_falloff(PointDensityRangeData *pdr, int index, float square density = dist; break; } - + if (pdr->density_curve && dist != 0.0f) { curvemapping_initialize(pdr->density_curve); density = curvemapping_evaluateF(pdr->density_curve, 0, density / dist) * dist; } - + return density; } @@ -666,7 +666,7 @@ static void pointdensity_color(PointDensity *pd, TexResult *texres, float age, c if (pd->source == TEX_PD_PSYS) { float rgba[4]; - + switch (pd->color_source) { case TEX_PD_COLOR_PARTAGE: if (pd->coba) { @@ -681,7 +681,7 @@ static void pointdensity_color(PointDensity *pd, TexResult *texres, float age, c case TEX_PD_COLOR_PARTSPEED: { float speed = len_v3(vec) * pd->speed_scale; - + if (pd->coba) { if (BKE_colorband_evaluate(pd->coba, speed, rgba)) { texres->talpha = true; @@ -704,7 +704,7 @@ static void pointdensity_color(PointDensity *pd, TexResult *texres, float age, c } else { float rgba[4]; - + switch (pd->ob_color_source) { case TEX_PD_COLOR_VERTCOL: texres->talpha = true; diff --git a/source/blender/render/intern/source/rayshade.c b/source/blender/render/intern/source/rayshade.c new file mode 100644 index 00000000000..df1cb868230 --- /dev/null +++ b/source/blender/render/intern/source/rayshade.c @@ -0,0 +1,2503 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 1990-1998 NeoGeo BV. + * All rights reserved. + * + * Contributors: 2004/2005 Blender Foundation, full recode + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/rayshade.c + * \ingroup render + */ + +#include <stdio.h> +#include <math.h> +#include <string.h> +#include <stdlib.h> +#include <float.h> +#include <assert.h> + +#include "MEM_guardedalloc.h" + +#include "DNA_material_types.h" +#include "DNA_lamp_types.h" + +#include "BLI_blenlib.h" +#include "BLI_system.h" +#include "BLI_math.h" +#include "BLI_rand.h" +#include "BLI_utildefines.h" + +#include "BLT_translation.h" + +#include "BKE_node.h" + +#include "render_result.h" +#include "render_types.h" +#include "rendercore.h" +#include "renderdatabase.h" +#include "pixelshading.h" +#include "shading.h" +#include "volumetric.h" + +#include "rayintersection.h" +#include "rayobject.h" +#include "raycounter.h" + +#define RAY_TRA 1 +#define RAY_INSIDE 2 + +#define DEPTH_SHADOW_TRA 10 + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +static int test_break(void *data) +{ + Render *re = (Render *)data; + return re->test_break(re->tbh); +} + +static void RE_rayobject_config_control(RayObject *r, Render *re) +{ + if (RE_rayobject_isRayAPI(r)) { + r = RE_rayobject_align(r); + r->control.data = re; + r->control.test_break = test_break; + } +} + +RayObject *RE_rayobject_create(int type, int size, int octree_resolution) +{ + RayObject * res = NULL; + + if (type == R_RAYSTRUCTURE_AUTO) { + /* TODO */ + //if (detect_simd()) +#ifdef __SSE__ + type = BLI_cpu_support_sse2()? R_RAYSTRUCTURE_SIMD_SVBVH: R_RAYSTRUCTURE_VBVH; +#else + type = R_RAYSTRUCTURE_VBVH; +#endif + } + +#ifndef __SSE__ + if (type == R_RAYSTRUCTURE_SIMD_SVBVH || type == R_RAYSTRUCTURE_SIMD_QBVH) { + puts("Warning: Using VBVH (SSE was disabled at compile time)"); + type = R_RAYSTRUCTURE_VBVH; + } +#endif + + + if (type == R_RAYSTRUCTURE_OCTREE) //TODO dynamic ocres + res = RE_rayobject_octree_create(octree_resolution, size); + else if (type == R_RAYSTRUCTURE_VBVH) + res = RE_rayobject_vbvh_create(size); + else if (type == R_RAYSTRUCTURE_SIMD_SVBVH) + res = RE_rayobject_svbvh_create(size); + else if (type == R_RAYSTRUCTURE_SIMD_QBVH) + res = RE_rayobject_qbvh_create(size); + else + res = RE_rayobject_vbvh_create(size); //Fallback + + return res; +} + +static RayObject* rayobject_create(Render *re, int type, int size) +{ + RayObject * res = NULL; + + res = RE_rayobject_create(type, size, re->r.ocres); + + if (res) + RE_rayobject_config_control(res, re); + + return res; +} + +#ifdef RE_RAYCOUNTER +RayCounter re_rc_counter[BLENDER_MAX_THREADS]; +#endif + + +void freeraytree(Render *re) +{ + ObjectInstanceRen *obi; + + if (re->raytree) { + RE_rayobject_free(re->raytree); + re->raytree = NULL; + } + if (re->rayfaces) { + MEM_freeN(re->rayfaces); + re->rayfaces = NULL; + } + if (re->rayprimitives) { + MEM_freeN(re->rayprimitives); + re->rayprimitives = NULL; + } + + for (obi=re->instancetable.first; obi; obi=obi->next) { + ObjectRen *obr = obi->obr; + if (obr->raytree) { + RE_rayobject_free(obr->raytree); + obr->raytree = NULL; + } + if (obr->rayfaces) { + MEM_freeN(obr->rayfaces); + obr->rayfaces = NULL; + } + if (obi->raytree) { + RE_rayobject_free(obi->raytree); + obi->raytree = NULL; + } + } + +#ifdef RE_RAYCOUNTER + { + const int num_threads = re->r.threads; + RayCounter sum; + memset(&sum, 0, sizeof(sum)); + int i; + for (i=0; i<num_threads; i++) + RE_RC_MERGE(&sum, re_rc_counter+i); + RE_RC_INFO(&sum); + } +#endif +} + +static bool is_raytraceable_vlr(Render *re, VlakRen *vlr) +{ + /* note: volumetric must be tracable, wire must not */ + if ((re->flag & R_BAKE_TRACE) || (vlr->flag & R_TRACEBLE) || (vlr->mat->material_type == MA_TYPE_VOLUME)) + if (vlr->mat->material_type != MA_TYPE_WIRE) + return 1; + return 0; +} + +static bool is_raytraceable(Render *re, ObjectInstanceRen *obi) +{ + int v; + ObjectRen *obr = obi->obr; + + if (re->excludeob && obr->ob == re->excludeob) + return 0; + + for (v=0;v<obr->totvlak;v++) { + VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255); + + if (is_raytraceable_vlr(re, vlr)) + return 1; + } + + return 0; +} + + +RayObject* makeraytree_object(Render *re, ObjectInstanceRen *obi) +{ + /*TODO + * out-of-memory safeproof + * break render + * update render stats */ + ObjectRen *obr = obi->obr; + + if (obr->raytree == NULL) { + RayObject *raytree; + RayFace *face = NULL; + VlakPrimitive *vlakprimitive = NULL; + int v; + + //Count faces + int faces = 0; + for (v=0;v<obr->totvlak;v++) { + VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255); + if (is_raytraceable_vlr(re, vlr)) + faces++; + } + + if (faces == 0) + return NULL; + + //Create Ray cast accelaration structure + raytree = rayobject_create( re, re->r.raytrace_structure, faces ); + if ( (re->r.raytrace_options & R_RAYTRACE_USE_LOCAL_COORDS) ) + vlakprimitive = obr->rayprimitives = (VlakPrimitive *)MEM_callocN(faces * sizeof(VlakPrimitive), "ObjectRen primitives"); + else + face = obr->rayfaces = (RayFace *)MEM_callocN(faces * sizeof(RayFace), "ObjectRen faces"); + + obr->rayobi = obi; + + for (v=0;v<obr->totvlak;v++) { + VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255); + if (is_raytraceable_vlr(re, vlr)) { + if ((re->r.raytrace_options & R_RAYTRACE_USE_LOCAL_COORDS)) { + RE_rayobject_add(raytree, RE_vlakprimitive_from_vlak(vlakprimitive, obi, vlr)); + vlakprimitive++; + } + else { + RE_rayface_from_vlak(face, obi, vlr); + RE_rayobject_add(raytree, RE_rayobject_unalignRayFace(face)); + face++; + } + } + } + RE_rayobject_done(raytree); + + /* in case of cancel during build, raytree is not usable */ + if (test_break(re)) + RE_rayobject_free(raytree); + else + obr->raytree= raytree; + } + + if (obr->raytree) { + if ((obi->flag & R_TRANSFORMED) && obi->raytree == NULL) { + obi->transform_primitives = 0; + obi->raytree = RE_rayobject_instance_create( obr->raytree, obi->mat, obi, obi->obr->rayobi ); + } + } + + if (obi->raytree) return obi->raytree; + return obi->obr->raytree; +} + +static bool has_special_rayobject(Render *re, ObjectInstanceRen *obi) +{ + if ( (obi->flag & R_TRANSFORMED) && (re->r.raytrace_options & R_RAYTRACE_USE_INSTANCES) ) { + ObjectRen *obr = obi->obr; + int v, faces = 0; + + for (v=0;v<obr->totvlak;v++) { + VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255); + if (is_raytraceable_vlr(re, vlr)) { + faces++; + if (faces > 4) + return 1; + } + } + } + return 0; +} +/* + * create a single raytrace structure with all faces + */ +static void makeraytree_single(Render *re) +{ + ObjectInstanceRen *obi; + RayObject *raytree; + RayFace *face = NULL; + VlakPrimitive *vlakprimitive = NULL; + int faces = 0, special = 0; + + for (obi = re->instancetable.first; obi; obi = obi->next) { + if (is_raytraceable(re, obi)) { + ObjectRen *obr = obi->obr; + + if (has_special_rayobject(re, obi)) { + special++; + } + else { + int v; + for (v = 0;v < obr->totvlak; v++) { + VlakRen *vlr = obr->vlaknodes[v >> 8].vlak + (v&255); + if (is_raytraceable_vlr(re, vlr)) { + faces++; + } + } + } + } + } + + if (faces + special == 0) { + re->raytree = RE_rayobject_empty_create(); + return; + } + + //Create raytree + raytree = re->raytree = rayobject_create( re, re->r.raytrace_structure, faces+special ); + + if ( (re->r.raytrace_options & R_RAYTRACE_USE_LOCAL_COORDS) ) { + vlakprimitive = re->rayprimitives = (VlakPrimitive *)MEM_callocN(faces * sizeof(VlakPrimitive), "Raytrace vlak-primitives"); + } + else { + face = re->rayfaces = (RayFace *)MEM_callocN(faces * sizeof(RayFace), "Render ray faces"); + } + + for (obi=re->instancetable.first; obi; obi=obi->next) + if (is_raytraceable(re, obi)) { + if (test_break(re)) + break; + + if (has_special_rayobject(re, obi)) { + RayObject *obj = makeraytree_object(re, obi); + + if (test_break(re)) + break; + + if (obj) + RE_rayobject_add(re->raytree, obj); + } + else { + int v; + ObjectRen *obr = obi->obr; + + if (obi->flag & R_TRANSFORMED) { + obi->transform_primitives = 1; + } + + for (v=0;v<obr->totvlak;v++) { + VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255); + if (is_raytraceable_vlr(re, vlr)) { + if ((re->r.raytrace_options & R_RAYTRACE_USE_LOCAL_COORDS)) { + RayObject *obj = RE_vlakprimitive_from_vlak( vlakprimitive, obi, vlr ); + RE_rayobject_add(raytree, obj); + vlakprimitive++; + } + else { + RE_rayface_from_vlak(face, obi, vlr); + if ((obi->flag & R_TRANSFORMED)) { + mul_m4_v3(obi->mat, face->v1); + mul_m4_v3(obi->mat, face->v2); + mul_m4_v3(obi->mat, face->v3); + if (RE_rayface_isQuad(face)) + mul_m4_v3(obi->mat, face->v4); + } + + RE_rayobject_add(raytree, RE_rayobject_unalignRayFace(face)); + face++; + } + } + } + } + } + + if (!test_break(re)) { + re->i.infostr = IFACE_("Raytree.. building"); + re->stats_draw(re->sdh, &re->i); + + RE_rayobject_done(raytree); + } +} + +void makeraytree(Render *re) +{ + float min[3], max[3], sub[3]; + int i; + + re->i.infostr = IFACE_("Raytree.. preparing"); + re->stats_draw(re->sdh, &re->i); + + /* disable options not yet supported by octree, + * they might actually never be supported (unless people really need it) */ + if (re->r.raytrace_structure == R_RAYSTRUCTURE_OCTREE) + re->r.raytrace_options &= ~( R_RAYTRACE_USE_INSTANCES | R_RAYTRACE_USE_LOCAL_COORDS); + + makeraytree_single(re); + + if (test_break(re)) { + freeraytree(re); + + re->i.infostr = IFACE_("Raytree building canceled"); + re->stats_draw(re->sdh, &re->i); + } + else { + /* Calculate raytree max_size + * This is ONLY needed to kept a bogus behavior of SUN and HEMI lights */ + INIT_MINMAX(min, max); + RE_rayobject_merge_bb(re->raytree, min, max); + if (min[0] > max[0]) { /* empty raytree */ + zero_v3(min); + zero_v3(max); + } + for (i=0; i<3; i++) { + /* TODO: explain why add top both min and max??? */ + min[i] += 0.01f; + max[i] += 0.01f; + sub[i] = max[i]-min[i]; + } + + re->maxdist = len_v3(sub); + + re->i.infostr = IFACE_("Raytree finished"); + re->stats_draw(re->sdh, &re->i); + } + +#ifdef RE_RAYCOUNTER + memset(re_rc_counter, 0, sizeof(re_rc_counter)); +#endif +} + +/* if (shi->osatex) */ +static void shade_ray_set_derivative(ShadeInput *shi) +{ + float detsh, t00, t10, t01, t11; + int axis1, axis2; + + /* find most stable axis to project */ + axis_dominant_v3(&axis1, &axis2, shi->facenor); + + /* compute u,v and derivatives */ + if (shi->obi->flag & R_TRANSFORMED) { + float v1[3], v2[3], v3[3]; + + mul_v3_m3v3(v1, shi->obi->nmat, shi->v1->co); + mul_v3_m3v3(v2, shi->obi->nmat, shi->v2->co); + mul_v3_m3v3(v3, shi->obi->nmat, shi->v3->co); + + /* same as below */ + t00= v3[axis1]-v1[axis1]; t01= v3[axis2]-v1[axis2]; + t10= v3[axis1]-v2[axis1]; t11= v3[axis2]-v2[axis2]; + } + else { + const float *v1= shi->v1->co; + const float *v2= shi->v2->co; + const float *v3= shi->v3->co; + + /* same as above */ + t00= v3[axis1]-v1[axis1]; t01= v3[axis2]-v1[axis2]; + t10= v3[axis1]-v2[axis1]; t11= v3[axis2]-v2[axis2]; + } + + detsh= 1.0f/(t00*t11-t10*t01); + t00*= detsh; t01*=detsh; + t10*=detsh; t11*=detsh; + + shi->dx_u= shi->dxco[axis1]*t11- shi->dxco[axis2]*t10; + shi->dx_v= shi->dxco[axis2]*t00- shi->dxco[axis1]*t01; + shi->dy_u= shi->dyco[axis1]*t11- shi->dyco[axis2]*t10; + shi->dy_v= shi->dyco[axis2]*t00- shi->dyco[axis1]*t01; + +} + +/* main ray shader */ +void shade_ray(Isect *is, ShadeInput *shi, ShadeResult *shr) +{ + ObjectInstanceRen *obi = (ObjectInstanceRen *)is->hit.ob; + VlakRen *vlr = (VlakRen *)is->hit.face; + + /* set up view vector */ + copy_v3_v3(shi->view, is->dir); + + /* render co */ + shi->co[0]= is->start[0]+is->dist*(shi->view[0]); + shi->co[1]= is->start[1]+is->dist*(shi->view[1]); + shi->co[2]= is->start[2]+is->dist*(shi->view[2]); + + normalize_v3(shi->view); + + shi->obi= obi; + shi->obr= obi->obr; + shi->vlr= vlr; + shi->mat= vlr->mat; + shade_input_init_material(shi); + + if (is->isect==2) + shade_input_set_triangle_i(shi, obi, vlr, 0, 2, 3); + else + shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2); + + shi->u= is->u; + shi->v= is->v; + shi->dx_u= shi->dx_v= shi->dy_u= shi->dy_v= 0.0f; + + if (shi->osatex) + shade_ray_set_derivative(shi); + shade_input_set_normals(shi); + + shade_input_set_shade_texco(shi); + if (shi->mat->material_type == MA_TYPE_VOLUME) { + if (ELEM(is->mode, RE_RAY_SHADOW, RE_RAY_SHADOW_TRA)) { + shade_volume_shadow(shi, shr, is); + } + else { + shade_volume_outside(shi, shr); + } + } + else if (is->mode==RE_RAY_SHADOW_TRA) { + /* temp hack to prevent recursion */ + if (shi->nodes==0 && shi->mat->nodetree && shi->mat->use_nodes) { + ntreeShaderExecTree(shi->mat->nodetree, shi, shr); + shi->mat= vlr->mat; /* shi->mat is being set in nodetree */ + } + else + shade_color(shi, shr); + } + else { + if (shi->mat->nodetree && shi->mat->use_nodes) { + ntreeShaderExecTree(shi->mat->nodetree, shi, shr); + shi->mat= vlr->mat; /* shi->mat is being set in nodetree */ + } + else { + shade_material_loop(shi, shr); + } + + /* raytrace likes to separate the spec color */ + sub_v3_v3v3(shr->diff, shr->combined, shr->spec); + copy_v3_v3(shr->diffshad, shr->diff); + } + +} + +static int refraction(float refract[3], const float n[3], const float view[3], float index) +{ + float dot, fac; + + copy_v3_v3(refract, view); + + dot = dot_v3v3(view, n); + + if (dot>0.0f) { + index = 1.0f/index; + fac= 1.0f - (1.0f - dot*dot)*index*index; + if (fac <= 0.0f) return 0; + fac= -dot*index + sqrtf(fac); + } + else { + fac= 1.0f - (1.0f - dot*dot)*index*index; + if (fac <= 0.0f) return 0; + fac= -dot*index - sqrtf(fac); + } + + refract[0]= index*view[0] + fac*n[0]; + refract[1]= index*view[1] + fac*n[1]; + refract[2]= index*view[2] + fac*n[2]; + + return 1; +} + +static void reflection_simple(float ref[3], float n[3], const float view[3]) +{ + const float f1= -2.0f * dot_v3v3(n, view); + madd_v3_v3v3fl(ref, view, n, f1); +} + +/* orn = original face normal */ +static void reflection(float ref[3], float n[3], const float view[3], const float orn[3]) +{ + float f1; + + reflection_simple(ref, n, view); + + /* test phong normals, then we should prevent vector going to the back */ + f1= dot_v3v3(ref, orn); + if (f1>0.0f) { + f1+= 0.01f; + ref[0]-= f1*orn[0]; + ref[1]-= f1*orn[1]; + ref[2]-= f1*orn[2]; + } +} + +#if 0 +static void color_combine(float *result, float fac1, float fac2, float col1[3], float col2[3]) +{ + float col1t[3], col2t[3]; + + col1t[0]= sqrt(col1[0]); + col1t[1]= sqrt(col1[1]); + col1t[2]= sqrt(col1[2]); + col2t[0]= sqrt(col2[0]); + col2t[1]= sqrt(col2[1]); + col2t[2]= sqrt(col2[2]); + + result[0]= (fac1*col1t[0] + fac2*col2t[0]); + result[0]*= result[0]; + result[1]= (fac1*col1t[1] + fac2*col2t[1]); + result[1]*= result[1]; + result[2]= (fac1*col1t[2] + fac2*col2t[2]); + result[2]*= result[2]; +} +#endif + +static float shade_by_transmission(Isect *is, ShadeInput *shi, ShadeResult *shr) +{ + float d; + if (0 == (shi->mat->mode & MA_TRANSP)) + return -1; + + if (shi->mat->tx_limit <= 0.0f) { + d= 1.0f; + } + else { + float p; + + /* shi.co[] calculated by shade_ray() */ + const float dx= shi->co[0] - is->start[0]; + const float dy= shi->co[1] - is->start[1]; + const float dz= shi->co[2] - is->start[2]; + d = sqrtf(dx * dx + dy * dy + dz * dz); + if (d > shi->mat->tx_limit) + d= shi->mat->tx_limit; + + p = shi->mat->tx_falloff; + if (p < 0.0f) p= 0.0f; + else if (p > 10.0f) p= 10.0f; + + shr->alpha *= powf(d, p); + if (shr->alpha > 1.0f) + shr->alpha= 1.0f; + } + + return d; +} + +static void ray_fadeout_endcolor(float col[3], ShadeInput *origshi, ShadeInput *shi, ShadeResult *shr, Isect *isec, const float vec[3]) +{ + /* un-intersected rays get either rendered material color or sky color */ + if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOMAT) { + copy_v3_v3(col, shr->combined); + } + else if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOSKY) { + copy_v3_v3(shi->view, vec); + normalize_v3(shi->view); + + shadeSkyView(col, isec->start, shi->view, NULL, shi->thread); + shadeSunView(col, shi->view); + } +} + +static void ray_fadeout(Isect *is, ShadeInput *shi, float col[3], const float blendcol[3], float dist_mir) +{ + /* if fading out, linear blend against fade color */ + float blendfac; + + blendfac = 1.0f - len_v3v3(shi->co, is->start)/dist_mir; + + col[0] = col[0]*blendfac + (1.0f - blendfac)*blendcol[0]; + col[1] = col[1]*blendfac + (1.0f - blendfac)*blendcol[1]; + col[2] = col[2]*blendfac + (1.0f - blendfac)*blendcol[2]; +} + +/* the main recursive tracer itself + * note: 'col' must be initialized */ +static void traceray(ShadeInput *origshi, ShadeResult *origshr, short depth, const float start[3], const float dir[3], float col[4], ObjectInstanceRen *obi, VlakRen *vlr, int traflag) +{ + ShadeInput shi = {NULL}; + Isect isec; + float dist_mir = origshi->mat->dist_mir; + + /* with high depth the number of rays can explode due to the path splitting + * in two each time, giving 2^depth rays. we need to be able to cancel such + * a render to avoid hanging, a better solution would be random picking + * between directions and russian roulette termination */ + if (R.test_break(R.tbh)) { + zero_v4(col); + return; + } + + copy_v3_v3(isec.start, start); + copy_v3_v3(isec.dir, dir); + isec.dist = dist_mir > 0 ? dist_mir : RE_RAYTRACE_MAXDIST; + isec.mode= RE_RAY_MIRROR; + isec.check = RE_CHECK_VLR_RENDER; + isec.skip = RE_SKIP_VLR_NEIGHBOUR; + isec.hint = NULL; + + isec.orig.ob = obi; + isec.orig.face = vlr; + RE_RC_INIT(isec, shi); + + /* database is in original view, obi->imat transforms current position back to original */ + RE_instance_rotate_ray(origshi->obi, &isec); + + if (RE_rayobject_raycast(R.raytree, &isec)) { + ShadeResult shr= {{0}}; + float d= 1.0f; + + RE_instance_rotate_ray_restore(origshi->obi, &isec); + + /* for as long we don't have proper dx/dy transform for rays we copy over original */ + copy_v3_v3(shi.dxco, origshi->dxco); + copy_v3_v3(shi.dyco, origshi->dyco); + + shi.mask= origshi->mask; + shi.osatex= origshi->osatex; + shi.depth= origshi->depth + 1; /* only used to indicate tracing */ + shi.thread= origshi->thread; + //shi.sample= 0; // memset above, so don't need this + shi.xs= origshi->xs; + shi.ys= origshi->ys; + shi.do_manage= origshi->do_manage; + shi.lay= origshi->lay; + shi.passflag= SCE_PASS_COMBINED; /* result of tracing needs no pass info */ + shi.combinedflag= 0xFFFFFF; /* ray trace does all options */ + //shi.do_preview = false; // memset above, so don't need this + shi.light_override= origshi->light_override; + shi.mat_override= origshi->mat_override; + + shade_ray(&isec, &shi, &shr); + /* ray has traveled inside the material, so shade by transmission */ + if (traflag & RAY_INSIDE) + d= shade_by_transmission(&isec, &shi, &shr); + + if (depth>0) { + float fr, fg, fb, f1; + + if ((shi.mat->mode_l & MA_TRANSP) && shr.alpha < 1.0f && (shi.mat->mode_l & (MA_ZTRANSP | MA_RAYTRANSP))) { + float nf, f, refract[3], tracol[4]; + + tracol[0]= shi.r; + tracol[1]= shi.g; + tracol[2]= shi.b; + tracol[3]= col[3]; /* we pass on and accumulate alpha */ + + if ((shi.mat->mode & MA_TRANSP) && (shi.mat->mode & MA_RAYTRANSP)) { + /* don't overwrite traflag, it's value is used in mirror reflection */ + int new_traflag = traflag; + + if (new_traflag & RAY_INSIDE) { + /* inside the material, so use inverse normal */ + float norm[3]; + norm[0]= - shi.vn[0]; + norm[1]= - shi.vn[1]; + norm[2]= - shi.vn[2]; + + if (refraction(refract, norm, shi.view, shi.ang)) { + /* ray comes out from the material into air */ + new_traflag &= ~RAY_INSIDE; + } + else { + /* total internal reflection (ray stays inside the material) */ + reflection(refract, norm, shi.view, shi.vn); + } + } + else { + if (refraction(refract, shi.vn, shi.view, shi.ang)) { + /* ray goes in to the material from air */ + new_traflag |= RAY_INSIDE; + } + else { + /* total external reflection (ray doesn't enter the material) */ + reflection(refract, shi.vn, shi.view, shi.vn); + } + } + traceray(origshi, origshr, depth-1, shi.co, refract, tracol, shi.obi, shi.vlr, new_traflag); + } + else + traceray(origshi, origshr, depth-1, shi.co, shi.view, tracol, shi.obi, shi.vlr, 0); + + f= shr.alpha; f1= 1.0f-f; + nf= (shi.mat->mode & MA_RAYTRANSP) ? d * shi.mat->filter : 0.0f; + fr= 1.0f+ nf*(shi.r-1.0f); + fg= 1.0f+ nf*(shi.g-1.0f); + fb= 1.0f+ nf*(shi.b-1.0f); + shr.diff[0]= f*shr.diff[0] + f1*fr*tracol[0]; + shr.diff[1]= f*shr.diff[1] + f1*fg*tracol[1]; + shr.diff[2]= f*shr.diff[2] + f1*fb*tracol[2]; + + shr.spec[0] *=f; + shr.spec[1] *=f; + shr.spec[2] *=f; + + col[3]= f1*tracol[3] + f; + } + else { + col[3]= 1.0f; + } + + float f; + if (shi.mat->mode_l & MA_RAYMIRROR) { + f= shi.ray_mirror; + if (f!=0.0f) f*= fresnel_fac(shi.view, shi.vn, shi.mat->fresnel_mir_i, shi.mat->fresnel_mir); + } + else f= 0.0f; + + if (f!=0.0f) { + float mircol[4]; + float ref[3]; + + reflection_simple(ref, shi.vn, shi.view); + traceray(origshi, origshr, depth-1, shi.co, ref, mircol, shi.obi, shi.vlr, traflag); + + f1= 1.0f-f; + + /* combine */ + //color_combine(col, f*fr*(1.0f-shr.spec[0]), f1, col, shr.diff); + //col[0]+= shr.spec[0]; + //col[1]+= shr.spec[1]; + //col[2]+= shr.spec[2]; + + fr= shi.mirr; + fg= shi.mirg; + fb= shi.mirb; + + col[0]= f*fr*(1.0f-shr.spec[0])*mircol[0] + f1*shr.diff[0] + shr.spec[0]; + col[1]= f*fg*(1.0f-shr.spec[1])*mircol[1] + f1*shr.diff[1] + shr.spec[1]; + col[2]= f*fb*(1.0f-shr.spec[2])*mircol[2] + f1*shr.diff[2] + shr.spec[2]; + } + else { + col[0]= shr.diff[0] + shr.spec[0]; + col[1]= shr.diff[1] + shr.spec[1]; + col[2]= shr.diff[2] + shr.spec[2]; + } + + if (dist_mir > 0.0f) { + float blendcol[3]; + + /* max ray distance set, but found an intersection, so fade this color + * out towards the sky/material color for a smooth transition */ + ray_fadeout_endcolor(blendcol, origshi, &shi, origshr, &isec, dir); + ray_fadeout(&isec, &shi, col, blendcol, dist_mir); + } + } + else { + col[0]= shr.diff[0] + shr.spec[0]; + col[1]= shr.diff[1] + shr.spec[1]; + col[2]= shr.diff[2] + shr.spec[2]; + } + + } + else { + ray_fadeout_endcolor(col, origshi, &shi, origshr, &isec, dir); + } + RE_RC_MERGE(&origshi->raycounter, &shi.raycounter); +} + +/* **************** jitter blocks ********** */ + +/* calc distributed planar energy */ + +static void DP_energy(float *table, float vec[2], int tot, float xsize, float ysize) +{ + int x, y, a; + float *fp, force[3], result[3]; + float dx, dy, dist, min; + + min= MIN2(xsize, ysize); + min*= min; + result[0]= result[1]= 0.0f; + + for (y= -1; y<2; y++) { + dy= ysize*y; + for (x= -1; x<2; x++) { + dx= xsize*x; + fp= table; + for (a=0; a<tot; a++, fp+= 2) { + force[0]= vec[0] - fp[0]-dx; + force[1]= vec[1] - fp[1]-dy; + dist= force[0]*force[0] + force[1]*force[1]; + if (dist < min && dist>0.0f) { + result[0]+= force[0]/dist; + result[1]+= force[1]/dist; + } + } + } + } + vec[0] += 0.1f*min*result[0]/(float)tot; + vec[1] += 0.1f*min*result[1]/(float)tot; + /* cyclic clamping */ + vec[0]= vec[0] - xsize*floorf(vec[0]/xsize + 0.5f); + vec[1]= vec[1] - ysize*floorf(vec[1]/ysize + 0.5f); +} + +/* random offset of 1 in 2 */ +static void jitter_plane_offset(float *jitter1, float *jitter2, int tot, float sizex, float sizey, float ofsx, float ofsy) +{ + float dsizex= sizex*ofsx; + float dsizey= sizey*ofsy; + float hsizex= 0.5f*sizex, hsizey= 0.5f*sizey; + int x; + + for (x=tot; x>0; x--, jitter1+=2, jitter2+=2) { + jitter2[0]= jitter1[0] + dsizex; + jitter2[1]= jitter1[1] + dsizey; + if (jitter2[0] > hsizex) jitter2[0]-= sizex; + if (jitter2[1] > hsizey) jitter2[1]-= sizey; + } +} + +/* called from convertBlenderScene.c */ +/* we do this in advance to get consistent random, not alter the render seed, and be threadsafe */ +void init_jitter_plane(LampRen *lar) +{ + float *fp; + int x, tot= lar->ray_totsamp; + + /* test if already initialized */ + if (lar->jitter) return; + + /* at least 4, or max threads+1 tables */ + if (BLENDER_MAX_THREADS < 4) x= 4; + else x= BLENDER_MAX_THREADS+1; + fp= lar->jitter= MEM_callocN(x*tot*2*sizeof(float), "lamp jitter tab"); + + /* if 1 sample, we leave table to be zero's */ + if (tot>1) { + /* set per-lamp fixed seed */ + RNG *rng = BLI_rng_new_srandom(tot); + int iter=12; + + /* fill table with random locations, area_size large */ + for (x=0; x<tot; x++, fp+=2) { + fp[0]= (BLI_rng_get_float(rng)-0.5f)*lar->area_size; + fp[1]= (BLI_rng_get_float(rng)-0.5f)*lar->area_sizey; + } + + while (iter--) { + fp= lar->jitter; + for (x=tot; x>0; x--, fp+=2) { + DP_energy(lar->jitter, fp, tot, lar->area_size, lar->area_sizey); + } + } + + BLI_rng_free(rng); + } + /* create the dithered tables (could just check lamp type!) */ + jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.0f); + jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.5f); + jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0f, 0.5f); +} + +/* table around origin, -0.5*size to 0.5*size */ +static float *give_jitter_plane(LampRen *lar, int thread, int xs, int ys) +{ + int tot; + + tot= lar->ray_totsamp; + + if (lar->ray_samp_type & LA_SAMP_JITTER) { + /* made it threadsafe */ + + if (lar->xold[thread]!=xs || lar->yold[thread]!=ys) { + jitter_plane_offset(lar->jitter, lar->jitter+2*(thread+1)*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(thread), BLI_thread_frand(thread)); + lar->xold[thread]= xs; + lar->yold[thread]= ys; + } + return lar->jitter+2*(thread+1)*tot; + } + if (lar->ray_samp_type & LA_SAMP_DITHER) { + return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1)); + } + + return lar->jitter; +} + + +/* **************** QMC sampling *************** */ + +static void halton_sample(double *ht_invprimes, double *ht_nums, double *v) +{ + /* incremental halton sequence generator, from: + * "Instant Radiosity", Keller A. */ + unsigned int i; + + for (i = 0; i < 2; i++) { + double r = fabs((1.0 - ht_nums[i]) - 1e-10); + + if (ht_invprimes[i] >= r) { + double lasth; + double h = ht_invprimes[i]; + + do { + lasth = h; + h *= ht_invprimes[i]; + } while (h >= r); + + ht_nums[i] += ((lasth + h) - 1.0); + } + else + ht_nums[i] += ht_invprimes[i]; + + v[i] = (float)ht_nums[i]; + } +} + +/* Generate Hammersley points in [0,1)^2 + * From Lucille renderer */ +static void hammersley_create(double *out, int n) +{ + double p, t; + int k, kk; + + for (k = 0; k < n; k++) { + t = 0; + for (p = 0.5, kk = k; kk; p *= 0.5, kk >>= 1) { + if (kk & 1) { /* kk mod 2 = 1 */ + t += p; + } + } + + out[2 * k + 0] = (double)k / (double)n; + out[2 * k + 1] = t; + } +} + +static struct QMCSampler *QMC_initSampler(int type, int tot) +{ + QMCSampler *qsa = MEM_callocN(sizeof(QMCSampler), "qmc sampler"); + qsa->samp2d = MEM_callocN(2*sizeof(double)*tot, "qmc sample table"); + + qsa->tot = tot; + qsa->type = type; + + if (qsa->type==SAMP_TYPE_HAMMERSLEY) + hammersley_create(qsa->samp2d, qsa->tot); + + return qsa; +} + +static void QMC_initPixel(QMCSampler *qsa, int thread) +{ + if (qsa->type==SAMP_TYPE_HAMMERSLEY) { + /* hammersley sequence is fixed, already created in QMCSampler init. + * per pixel, gets a random offset. We create separate offsets per thread, for write-safety */ + qsa->offs[thread][0] = 0.5f * BLI_thread_frand(thread); + qsa->offs[thread][1] = 0.5f * BLI_thread_frand(thread); + } + else { /* SAMP_TYPE_HALTON */ + + /* generate a new randomized halton sequence per pixel + * to alleviate qmc artifacts and make it reproducible + * between threads/frames */ + double ht_invprimes[2], ht_nums[2]; + double r[2]; + int i; + + ht_nums[0] = BLI_thread_frand(thread); + ht_nums[1] = BLI_thread_frand(thread); + ht_invprimes[0] = 0.5; + ht_invprimes[1] = 1.0/3.0; + + for (i=0; i< qsa->tot; i++) { + halton_sample(ht_invprimes, ht_nums, r); + qsa->samp2d[2*i+0] = r[0]; + qsa->samp2d[2*i+1] = r[1]; + } + } +} + +static void QMC_freeSampler(QMCSampler *qsa) +{ + MEM_freeN(qsa->samp2d); + MEM_freeN(qsa); +} + +static void QMC_getSample(double *s, QMCSampler *qsa, int thread, int num) +{ + if (qsa->type == SAMP_TYPE_HAMMERSLEY) { + s[0] = fmod(qsa->samp2d[2*num+0] + qsa->offs[thread][0], 1.0f); + s[1] = fmod(qsa->samp2d[2*num+1] + qsa->offs[thread][1], 1.0f); + } + else { /* SAMP_TYPE_HALTON */ + s[0] = qsa->samp2d[2*num+0]; + s[1] = qsa->samp2d[2*num+1]; + } +} + +/* phong weighted disc using 'blur' for exponent, centred on 0,0 */ +static void QMC_samplePhong(float vec[3], QMCSampler *qsa, int thread, int num, float blur) +{ + double s[2]; + float phi, pz, sqr; + + QMC_getSample(s, qsa, thread, num); + + phi = s[0]*2*M_PI; + pz = pow(s[1], blur); + sqr = sqrtf(1.0f - pz * pz); + + vec[0] = (float)(cosf(phi)*sqr); + vec[1] = (float)(sinf(phi)*sqr); + vec[2] = 0.0f; +} + +/* rect of edge lengths sizex, sizey, centred on 0.0,0.0 i.e. ranging from -sizex/2 to +sizey/2 */ +static void QMC_sampleRect(float vec[3], QMCSampler *qsa, int thread, int num, float sizex, float sizey) +{ + double s[2]; + + QMC_getSample(s, qsa, thread, num); + + vec[0] = (float)(s[0] - 0.5) * sizex; + vec[1] = (float)(s[1] - 0.5) * sizey; + vec[2] = 0.0f; +} + +/* disc of radius 'radius', centred on 0,0 */ +static void QMC_sampleDisc(float vec[3], QMCSampler *qsa, int thread, int num, float radius) +{ + double s[2]; + float phi, sqr; + + QMC_getSample(s, qsa, thread, num); + + phi = s[0]*2*M_PI; + sqr = sqrt(s[1]); + + vec[0] = cosf(phi)*sqr* radius/2.0f; + vec[1] = sinf(phi)*sqr* radius/2.0f; + vec[2] = 0.0f; +} + +/* uniform hemisphere sampling */ +static void QMC_sampleHemi(float vec[3], QMCSampler *qsa, int thread, int num) +{ + double s[2]; + float phi, sqr; + + QMC_getSample(s, qsa, thread, num); + + phi = s[0]*2.0*M_PI; + sqr = sqrt(s[1]); + + vec[0] = cosf(phi)*sqr; + vec[1] = sinf(phi)*sqr; + vec[2] = (float)(1.0 - s[1]*s[1]); +} + +#if 0 /* currently not used */ +/* cosine weighted hemisphere sampling */ +static void QMC_sampleHemiCosine(float vec[3], QMCSampler *qsa, int thread, int num) +{ + double s[2]; + float phi, sqr; + + QMC_getSample(s, qsa, thread, num); + + phi = s[0]*2.f*M_PI; + sqr = s[1]*sqrt(2-s[1]*s[1]); + + vec[0] = cos(phi)*sqr; + vec[1] = sin(phi)*sqr; + vec[2] = 1.f - s[1]*s[1]; + +} +#endif + +/* called from convertBlenderScene.c */ +void init_render_qmcsampler(Render *re) +{ + const int num_threads = re->r.threads; + re->qmcsamplers= MEM_callocN(sizeof(ListBase)*num_threads, "QMCListBase"); + re->num_qmc_samplers = num_threads; +} + +static QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot) +{ + QMCSampler *qsa; + + /* create qmc samplers as needed, since recursion makes it hard to + * predict how many are needed */ + + for (qsa=re->qmcsamplers[thread].first; qsa; qsa=qsa->next) { + if (qsa->type == type && qsa->tot == tot && !qsa->used) { + qsa->used = true; + return qsa; + } + } + + qsa= QMC_initSampler(type, tot); + qsa->used = true; + BLI_addtail(&re->qmcsamplers[thread], qsa); + + return qsa; +} + +static void release_thread_qmcsampler(Render *UNUSED(re), int UNUSED(thread), QMCSampler *qsa) +{ + qsa->used= 0; +} + +void free_render_qmcsampler(Render *re) +{ + if (re->qmcsamplers) { + QMCSampler *qsa, *next; + int a; + for (a = 0; a < re->num_qmc_samplers; a++) { + for (qsa=re->qmcsamplers[a].first; qsa; qsa=next) { + next= qsa->next; + QMC_freeSampler(qsa); + } + + re->qmcsamplers[a].first= re->qmcsamplers[a].last= NULL; + } + + MEM_freeN(re->qmcsamplers); + re->qmcsamplers= NULL; + } +} + +static int adaptive_sample_variance(int samples, const float col[3], const float colsq[3], float thresh) +{ + float var[3], mean[3]; + + /* scale threshold just to give a bit more precision in input rather than dealing with + * tiny tiny numbers in the UI */ + thresh /= 2; + + mean[0] = col[0] / (float)samples; + mean[1] = col[1] / (float)samples; + mean[2] = col[2] / (float)samples; + + var[0] = (colsq[0] / (float)samples) - (mean[0]*mean[0]); + var[1] = (colsq[1] / (float)samples) - (mean[1]*mean[1]); + var[2] = (colsq[2] / (float)samples) - (mean[2]*mean[2]); + + if ((var[0] * 0.4f < thresh) && (var[1] * 0.3f < thresh) && (var[2] * 0.6f < thresh)) + return 1; + else + return 0; +} + +static int adaptive_sample_contrast_val(int samples, float prev, float val, float thresh) +{ + /* if the last sample's contribution to the total value was below a small threshold + * (i.e. the samples taken are very similar), then taking more samples that are probably + * going to be the same is wasting effort */ + if (fabsf(prev / (float)(samples - 1) - val / (float)samples ) < thresh) { + return 1; + } + else + return 0; +} + +static float get_avg_speed(ShadeInput *shi) +{ + float pre_x, pre_y, post_x, post_y, speedavg; + + pre_x = (shi->winspeed[0] == PASS_VECTOR_MAX)?0.0f:shi->winspeed[0]; + pre_y = (shi->winspeed[1] == PASS_VECTOR_MAX)?0.0f:shi->winspeed[1]; + post_x = (shi->winspeed[2] == PASS_VECTOR_MAX)?0.0f:shi->winspeed[2]; + post_y = (shi->winspeed[3] == PASS_VECTOR_MAX)?0.0f:shi->winspeed[3]; + + speedavg = (sqrtf(pre_x * pre_x + pre_y * pre_y) + sqrtf(post_x * post_x + post_y * post_y)) / 2.0f; + + return speedavg; +} + +/* ***************** main calls ************** */ + + +static void trace_refract(float col[4], ShadeInput *shi, ShadeResult *shr) +{ + QMCSampler *qsa=NULL; + int samp_type; + int traflag=0; + + float samp3d[3], orthx[3], orthy[3]; + float v_refract[3], v_refract_new[3]; + float sampcol[4], colsq[4]; + + float blur = pow3f(1.0f - shi->mat->gloss_tra); + short max_samples = shi->mat->samp_gloss_tra; + float adapt_thresh = shi->mat->adapt_thresh_tra; + + int samples=0; + + colsq[0] = colsq[1] = colsq[2] = 0.0; + col[0] = col[1] = col[2] = 0.0; + col[3]= shr->alpha; + + if (blur > 0.0f) { + if (adapt_thresh != 0.0f) samp_type = SAMP_TYPE_HALTON; + else samp_type = SAMP_TYPE_HAMMERSLEY; + + /* all samples are generated per pixel */ + qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples); + QMC_initPixel(qsa, shi->thread); + } + else + max_samples = 1; + + + while (samples < max_samples) { + if (refraction(v_refract, shi->vn, shi->view, shi->ang)) { + traflag |= RAY_INSIDE; + } + else { + /* total external reflection can happen for materials with IOR < 1.0 */ + if ((shi->vlr->flag & R_SMOOTH)) + reflection(v_refract, shi->vn, shi->view, shi->facenor); + else + reflection_simple(v_refract, shi->vn, shi->view); + + /* can't blur total external reflection */ + max_samples = 1; + } + + if (max_samples > 1) { + /* get a quasi-random vector from a phong-weighted disc */ + QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur); + + ortho_basis_v3v3_v3(orthx, orthy, v_refract); + mul_v3_fl(orthx, samp3d[0]); + mul_v3_fl(orthy, samp3d[1]); + + /* and perturb the refraction vector in it */ + add_v3_v3v3(v_refract_new, v_refract, orthx); + add_v3_v3(v_refract_new, orthy); + + normalize_v3(v_refract_new); + } + else { + /* no blurriness, use the original normal */ + copy_v3_v3(v_refract_new, v_refract); + } + + sampcol[0]= sampcol[1]= sampcol[2]= sampcol[3]= 0.0f; + + traceray(shi, shr, shi->mat->ray_depth_tra, shi->co, v_refract_new, sampcol, shi->obi, shi->vlr, traflag); + + col[0] += sampcol[0]; + col[1] += sampcol[1]; + col[2] += sampcol[2]; + col[3] += sampcol[3]; + + /* for variance calc */ + colsq[0] += sampcol[0]*sampcol[0]; + colsq[1] += sampcol[1]*sampcol[1]; + colsq[2] += sampcol[2]*sampcol[2]; + + samples++; + + /* adaptive sampling */ + if (adapt_thresh < 1.0f && samples > max_samples/2) { + if (adaptive_sample_variance(samples, col, colsq, adapt_thresh)) + break; + + /* if the pixel so far is very dark, we can get away with less samples */ + if ( (col[0] + col[1] + col[2])/3.0f/(float)samples < 0.01f ) + max_samples--; + } + } + + col[0] /= (float)samples; + col[1] /= (float)samples; + col[2] /= (float)samples; + col[3] /= (float)samples; + + if (qsa) + release_thread_qmcsampler(&R, shi->thread, qsa); +} + +static void trace_reflect(float col[3], ShadeInput *shi, ShadeResult *shr, float fresnelfac) +{ + QMCSampler *qsa=NULL; + int samp_type; + + float samp3d[3], orthx[3], orthy[3]; + float v_nor_new[3], v_reflect[3]; + float sampcol[4], colsq[4]; + + float blur = pow3f(1.0f - shi->mat->gloss_mir); + short max_samples = shi->mat->samp_gloss_mir; + float adapt_thresh = shi->mat->adapt_thresh_mir; + float aniso = 1.0f - shi->mat->aniso_gloss_mir; + + int samples=0; + + col[0] = col[1] = col[2] = 0.0; + colsq[0] = colsq[1] = colsq[2] = 0.0; + + if (blur > 0.0f) { + if (adapt_thresh != 0.0f) samp_type = SAMP_TYPE_HALTON; + else samp_type = SAMP_TYPE_HAMMERSLEY; + + /* all samples are generated per pixel */ + qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples); + QMC_initPixel(qsa, shi->thread); + } + else + max_samples = 1; + + while (samples < max_samples) { + + if (max_samples > 1) { + /* get a quasi-random vector from a phong-weighted disc */ + QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur); + + /* find the normal's perpendicular plane, blurring along tangents + * if tangent shading enabled */ + if (shi->mat->mode & (MA_TANGENT_V)) { + cross_v3_v3v3(orthx, shi->vn, shi->tang); // bitangent + copy_v3_v3(orthy, shi->tang); + mul_v3_fl(orthx, samp3d[0]); + mul_v3_fl(orthy, samp3d[1]*aniso); + } + else { + ortho_basis_v3v3_v3(orthx, orthy, shi->vn); + mul_v3_fl(orthx, samp3d[0]); + mul_v3_fl(orthy, samp3d[1]); + } + + /* and perturb the normal in it */ + add_v3_v3v3(v_nor_new, shi->vn, orthx); + add_v3_v3(v_nor_new, orthy); + normalize_v3(v_nor_new); + } + else { + /* no blurriness, use the original normal */ + copy_v3_v3(v_nor_new, shi->vn); + } + + if ((shi->vlr->flag & R_SMOOTH)) + reflection(v_reflect, v_nor_new, shi->view, shi->facenor); + else + reflection_simple(v_reflect, v_nor_new, shi->view); + + sampcol[0]= sampcol[1]= sampcol[2]= sampcol[3]= 0.0f; + + traceray(shi, shr, shi->mat->ray_depth, shi->co, v_reflect, sampcol, shi->obi, shi->vlr, 0); + + + col[0] += sampcol[0]; + col[1] += sampcol[1]; + col[2] += sampcol[2]; + + /* for variance calc */ + colsq[0] += sampcol[0]*sampcol[0]; + colsq[1] += sampcol[1]*sampcol[1]; + colsq[2] += sampcol[2]*sampcol[2]; + + samples++; + + /* adaptive sampling */ + if (adapt_thresh > 0.0f && samples > max_samples/3) { + if (adaptive_sample_variance(samples, col, colsq, adapt_thresh)) + break; + + /* if the pixel so far is very dark, we can get away with less samples */ + if ( (col[0] + col[1] + col[2])/3.0f/(float)samples < 0.01f ) + max_samples--; + + /* reduce samples when reflection is dim due to low ray mirror blend value or fresnel factor + * and when reflection is blurry */ + if (fresnelfac < 0.1f * (blur+1)) { + max_samples--; + + /* even more for very dim */ + if (fresnelfac < 0.05f * (blur+1)) + max_samples--; + } + } + } + + col[0] /= (float)samples; + col[1] /= (float)samples; + col[2] /= (float)samples; + + if (qsa) + release_thread_qmcsampler(&R, shi->thread, qsa); +} + +/* extern call from render loop */ +void ray_trace(ShadeInput *shi, ShadeResult *shr) +{ + float f1, fr, fg, fb; + float mircol[4], tracol[4]; + float diff[3]; + int do_tra, do_mir; + + do_tra = ((shi->mode & MA_TRANSP) && (shi->mode & MA_RAYTRANSP) && shr->alpha != 1.0f && (shi->depth <= shi->mat->ray_depth_tra)); + do_mir = ((shi->mat->mode & MA_RAYMIRROR) && shi->ray_mirror != 0.0f && (shi->depth <= shi->mat->ray_depth)); + + /* raytrace mirror and refract like to separate the spec color */ + if (shi->combinedflag & SCE_PASS_SPEC) + sub_v3_v3v3(diff, shr->combined, shr->spec); + else + copy_v3_v3(diff, shr->combined); + + if (do_tra) { + float olddiff[3], f; + + trace_refract(tracol, shi, shr); + + f= shr->alpha; f1= 1.0f-f; + fr= 1.0f+ shi->mat->filter*(shi->r-1.0f); + fg= 1.0f+ shi->mat->filter*(shi->g-1.0f); + fb= 1.0f+ shi->mat->filter*(shi->b-1.0f); + + /* for refract pass */ + copy_v3_v3(olddiff, diff); + + diff[0]= f*diff[0] + f1*fr*tracol[0]; + diff[1]= f*diff[1] + f1*fg*tracol[1]; + diff[2]= f*diff[2] + f1*fb*tracol[2]; + + if (shi->passflag & SCE_PASS_REFRACT) + sub_v3_v3v3(shr->refr, diff, olddiff); + + if (!(shi->combinedflag & SCE_PASS_REFRACT)) + sub_v3_v3v3(diff, diff, shr->refr); + + shr->alpha = min_ff(1.0f, tracol[3]); + } + + if (do_mir) { + const float i= shi->ray_mirror*fresnel_fac(shi->view, shi->vn, shi->mat->fresnel_mir_i, shi->mat->fresnel_mir); + if (i!=0.0f) { + + trace_reflect(mircol, shi, shr, i); + + fr= i*shi->mirr; + fg= i*shi->mirg; + fb= i*shi->mirb; + + if (shi->passflag & SCE_PASS_REFLECT) { + /* mirror pass is not blocked out with spec */ + shr->refl[0]= fr*mircol[0] - fr*diff[0]; + shr->refl[1]= fg*mircol[1] - fg*diff[1]; + shr->refl[2]= fb*mircol[2] - fb*diff[2]; + } + + if (shi->combinedflag & SCE_PASS_REFLECT) { + /* values in shr->spec can be greater than 1.0. + * In this case the mircol uses a zero blending factor, so ignoring it is ok. + * Fixes bug #18837 - when the spec is higher then 1.0, + * diff can become a negative color - Campbell */ + + f1= 1.0f-i; + + diff[0] *= f1; + diff[1] *= f1; + diff[2] *= f1; + + if (shr->spec[0]<1.0f) diff[0] += mircol[0] * (fr*(1.0f-shr->spec[0])); + if (shr->spec[1]<1.0f) diff[1] += mircol[1] * (fg*(1.0f-shr->spec[1])); + if (shr->spec[2]<1.0f) diff[2] += mircol[2] * (fb*(1.0f-shr->spec[2])); + } + } + } + /* put back together */ + if (shi->combinedflag & SCE_PASS_SPEC) + add_v3_v3v3(shr->combined, diff, shr->spec); + else + copy_v3_v3(shr->combined, diff); +} + +/* color 'shadfac' passes through 'col' with alpha and filter */ +/* filter is only applied on alpha defined transparent part */ +static void addAlphaLight(float shadfac[4], const float col[3], float alpha, float filter) +{ + float fr, fg, fb; + + fr= 1.0f+ filter*(col[0]-1.0f); + fg= 1.0f+ filter*(col[1]-1.0f); + fb= 1.0f+ filter*(col[2]-1.0f); + + shadfac[0]= alpha*col[0] + fr*(1.0f-alpha)*shadfac[0]; + shadfac[1]= alpha*col[1] + fg*(1.0f-alpha)*shadfac[1]; + shadfac[2]= alpha*col[2] + fb*(1.0f-alpha)*shadfac[2]; + + shadfac[3]= (1.0f-alpha)*shadfac[3]; +} + +static void ray_trace_shadow_tra(Isect *is, ShadeInput *origshi, int depth, int traflag, float col[4]) +{ + /* ray to lamp, find first face that intersects, check alpha properties, + * if it has col[3]>0.0f continue. so exit when alpha is full */ + const float initial_dist = is->dist; + + if (RE_rayobject_raycast(R.raytree, is)) { + /* Warning regarding initializing to zero's, This is not that nice, + * and possibly a bit slow for every ray, however some variables were + * not initialized properly in, unless using + * shade_input_initialize(...), we need to zero them. */ + ShadeInput shi= {NULL}; + /* end warning! - Campbell */ + + ShadeResult shr; + + /* we got a face */ + + shi.depth= origshi->depth + 1; /* only used to indicate tracing */ + shi.mask= origshi->mask; + shi.thread= origshi->thread; + shi.passflag= SCE_PASS_COMBINED; + shi.combinedflag= 0xFFFFFF; /* ray trace does all options */ + + shi.xs= origshi->xs; + shi.ys= origshi->ys; + shi.do_manage= origshi->do_manage; + shi.lay= origshi->lay; + shi.nodes= origshi->nodes; + + RE_instance_rotate_ray_restore(origshi->obi, is); + + shade_ray(is, &shi, &shr); + if (shi.mat->material_type == MA_TYPE_SURFACE) { + const float d = (shi.mat->mode & MA_RAYTRANSP) ? + ((traflag & RAY_TRA) ? shade_by_transmission(is, &shi, &shr) : 1.0f) : + 0.0f; + /* mix colors based on shadfac (rgb + amount of light factor) */ + addAlphaLight(col, shr.diff, shr.alpha, d*shi.mat->filter); + } + else if (shi.mat->material_type == MA_TYPE_VOLUME) { + const float a = col[3]; + + col[0] = a*col[0] + shr.alpha*shr.combined[0]; + col[1] = a*col[1] + shr.alpha*shr.combined[1]; + col[2] = a*col[2] + shr.alpha*shr.combined[2]; + + col[3] = (1.0f - shr.alpha)*a; + } + + if (depth>0 && col[3]>0.0f) { + + /* adapt isect struct */ + copy_v3_v3(is->start, shi.co); + is->dist = initial_dist-is->dist; + is->orig.ob = shi.obi; + is->orig.face = shi.vlr; + + ray_trace_shadow_tra(is, origshi, depth-1, traflag | RAY_TRA, col); + } + + RE_RC_MERGE(&origshi->raycounter, &shi.raycounter); + } +} + + +/* aolight: function to create random unit sphere vectors for total random sampling */ + +/* calc distributed spherical energy */ +static void DS_energy(float *sphere, int tot, float vec[3]) +{ + float *fp, fac, force[3], res[3]; + int a; + + res[0]= res[1]= res[2]= 0.0f; + + for (a=0, fp=sphere; a<tot; a++, fp+=3) { + sub_v3_v3v3(force, vec, fp); + fac = dot_v3v3(force, force); + if (fac!=0.0f) { + fac= 1.0f/fac; + res[0]+= fac*force[0]; + res[1]+= fac*force[1]; + res[2]+= fac*force[2]; + } + } + + mul_v3_fl(res, 0.5); + add_v3_v3(vec, res); + normalize_v3(vec); + +} + +/* called from convertBlenderScene.c */ +/* creates an equally distributed spherical sample pattern */ +/* and allocates threadsafe memory */ +void init_ao_sphere(Render *re, World *wrld) +{ + /* fixed random */ + const int num_threads = re->r.threads; + RNG *rng; + float *fp; + int a, tot, iter= 16; + + /* we make twice the amount of samples, because only a hemisphere is used */ + tot= 2*wrld->aosamp*wrld->aosamp; + + wrld->aosphere= MEM_mallocN(3*tot*sizeof(float), "AO sphere"); + rng = BLI_rng_new_srandom(tot); + + /* init */ + fp= wrld->aosphere; + for (a=0; a<tot; a++, fp+= 3) { + BLI_rng_get_float_unit_v3(rng, fp); + } + + while (iter--) { + for (a=0, fp= wrld->aosphere; a<tot; a++, fp+= 3) { + DS_energy(wrld->aosphere, tot, fp); + } + } + + /* tables */ + wrld->aotables= MEM_mallocN(num_threads*3*tot*sizeof(float), "AO tables"); + + BLI_rng_free(rng); +} + +/* give per thread a table, we have to compare xs ys because of way OSA works... */ +static float *threadsafe_table_sphere(int test, int thread, int xs, int ys, int tot) +{ + static int xso[BLENDER_MAX_THREADS], yso[BLENDER_MAX_THREADS]; + static int firsttime= 1; + + if (firsttime) { + memset(xso, 255, sizeof(xso)); + memset(yso, 255, sizeof(yso)); + firsttime= 0; + } + + if (xs==xso[thread] && ys==yso[thread]) return R.wrld.aotables+ thread*tot*3; + if (test) return NULL; + xso[thread]= xs; yso[thread]= ys; + return R.wrld.aotables+ thread*tot*3; +} + +static float *sphere_sampler(int type, int resol, int thread, int xs, int ys, int reset) +{ + int tot; + float *vec; + + tot= 2*resol*resol; + + if (type & WO_AORNDSMP) { + /* total random sampling. NOT THREADSAFE! (should be removed, is not useful) */ + RNG *rng = BLI_rng_new(BLI_thread_rand(thread)); + float *sphere; + int a; + + /* always returns table */ + sphere= threadsafe_table_sphere(0, thread, xs, ys, tot); + + vec= sphere; + for (a=0; a<tot; a++, vec+=3) { + BLI_rng_get_float_unit_v3(rng, vec); + } + + BLI_rng_free(rng); + + return sphere; + } + else { + float *sphere; + float *vec1; + + /* returns table if xs and ys were equal to last call, and not resetting */ + sphere= (reset)? NULL: threadsafe_table_sphere(1, thread, xs, ys, tot); + if (sphere==NULL) { + float cosfi, sinfi, cost, sint; + float ang; + int a; + + sphere= threadsafe_table_sphere(0, thread, xs, ys, tot); + + /* random rotation */ + ang = BLI_thread_frand(thread); + sinfi = sinf(ang); cosfi = cosf(ang); + ang = BLI_thread_frand(thread); + sint = sinf(ang); cost = cosf(ang); + + vec= R.wrld.aosphere; + vec1= sphere; + for (a=0; a<tot; a++, vec+=3, vec1+=3) { + vec1[0]= cost*cosfi*vec[0] - sinfi*vec[1] + sint*cosfi*vec[2]; + vec1[1]= cost*sinfi*vec[0] + cosfi*vec[1] + sint*sinfi*vec[2]; + vec1[2]= -sint*vec[0] + cost*vec[2]; + } + } + return sphere; + } +} + +static void ray_ao_qmc(ShadeInput *shi, float ao[3], float env[3]) +{ + Isect isec; + RayHint point_hint; + QMCSampler *qsa=NULL; + float samp3d[3]; + float up[3], side[3], dir[3], nrm[3]; + + float maxdist = R.wrld.aodist; + float fac=0.0f, prev=0.0f; + float adapt_thresh = R.wrld.ao_adapt_thresh; + float adapt_speed_fac = R.wrld.ao_adapt_speed_fac; + + int samples=0; + int max_samples = R.wrld.aosamp*R.wrld.aosamp; + + float dxyview[3], skyadded=0; + int envcolor; + + RE_RC_INIT(isec, *shi); + isec.orig.ob = shi->obi; + isec.orig.face = shi->vlr; + isec.check = RE_CHECK_VLR_NON_SOLID_MATERIAL; + isec.skip = RE_SKIP_VLR_NEIGHBOUR; + isec.hint = NULL; + + isec.hit.ob = NULL; + isec.hit.face = NULL; + + isec.last_hit = NULL; + + isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW; + isec.lay= -1; + + copy_v3_v3(isec.start, shi->co); + + RE_instance_rotate_ray_start(shi->obi, &isec); + + RE_rayobject_hint_bb(R.raytree, &point_hint, isec.start, isec.start); + isec.hint = &point_hint; + + zero_v3(ao); + zero_v3(env); + + /* prevent sky colors to be added for only shadow (shadow becomes alpha) */ + envcolor= R.wrld.aocolor; + if (shi->mat->mode & MA_ONLYSHADOW) + envcolor= WO_AOPLAIN; + + if (envcolor == WO_AOSKYTEX) { + dxyview[0]= 1.0f/(float)R.wrld.aosamp; + dxyview[1]= 1.0f/(float)R.wrld.aosamp; + dxyview[2]= 0.0f; + } + + if (shi->vlr->flag & R_SMOOTH) { + copy_v3_v3(nrm, shi->vn); + } + else { + copy_v3_v3(nrm, shi->facenor); + } + + ortho_basis_v3v3_v3(up, side, nrm); + + /* sampling init */ + if (R.wrld.ao_samp_method==WO_AOSAMP_HALTON) { + float speedfac; + + speedfac = get_avg_speed(shi) * adapt_speed_fac; + CLAMP(speedfac, 1.0f, 1000.0f); + max_samples /= speedfac; + if (max_samples < 5) max_samples = 5; + + qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples); + } + else if (R.wrld.ao_samp_method==WO_AOSAMP_HAMMERSLEY) + qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples); + + QMC_initPixel(qsa, shi->thread); + + while (samples < max_samples) { + + /* sampling, returns quasi-random vector in unit hemisphere */ + QMC_sampleHemi(samp3d, qsa, shi->thread, samples); + + dir[0] = (samp3d[0]*up[0] + samp3d[1]*side[0] + samp3d[2]*nrm[0]); + dir[1] = (samp3d[0]*up[1] + samp3d[1]*side[1] + samp3d[2]*nrm[1]); + dir[2] = (samp3d[0]*up[2] + samp3d[1]*side[2] + samp3d[2]*nrm[2]); + + normalize_v3(dir); + + isec.dir[0] = -dir[0]; + isec.dir[1] = -dir[1]; + isec.dir[2] = -dir[2]; + isec.dist = maxdist; + + RE_instance_rotate_ray_dir(shi->obi, &isec); + + prev = fac; + + if (RE_rayobject_raycast(R.raytree, &isec)) { + if (R.wrld.aomode & WO_AODIST) fac+= expf(-isec.dist*R.wrld.aodistfac); + else fac+= 1.0f; + } + else if (envcolor!=WO_AOPLAIN) { + float skycol[4]; + float view[3]; + + view[0]= -dir[0]; + view[1]= -dir[1]; + view[2]= -dir[2]; + normalize_v3(view); + + if (envcolor==WO_AOSKYCOL) { + const float skyfac= 0.5f * (1.0f + dot_v3v3(view, R.grvec)); + env[0]+= (1.0f-skyfac)*R.wrld.horr + skyfac*R.wrld.zenr; + env[1]+= (1.0f-skyfac)*R.wrld.horg + skyfac*R.wrld.zeng; + env[2]+= (1.0f-skyfac)*R.wrld.horb + skyfac*R.wrld.zenb; + } + else { /* WO_AOSKYTEX */ + shadeSkyView(skycol, isec.start, view, dxyview, shi->thread); + shadeSunView(skycol, shi->view); + env[0]+= skycol[0]; + env[1]+= skycol[1]; + env[2]+= skycol[2]; + } + skyadded++; + } + + samples++; + + if (qsa && qsa->type == SAMP_TYPE_HALTON) { + /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */ + if (adapt_thresh > 0.0f && (samples > max_samples/2) ) { + + if (adaptive_sample_contrast_val(samples, prev, fac, adapt_thresh)) { + break; + } + } + } + } + + /* average color times distances/hits formula */ + ao[0]= ao[1]= ao[2]= 1.0f - fac/(float)samples; + + if (envcolor!=WO_AOPLAIN && skyadded) + mul_v3_fl(env, (1.0f - fac/(float)samples)/((float)skyadded)); + else + copy_v3_v3(env, ao); + + if (qsa) + release_thread_qmcsampler(&R, shi->thread, qsa); +} + +/* extern call from shade_lamp_loop, ambient occlusion calculus */ +static void ray_ao_spheresamp(ShadeInput *shi, float ao[3], float env[3]) +{ + Isect isec; + RayHint point_hint; + float *vec, *nrm, bias, sh=0.0f; + float maxdist = R.wrld.aodist; + float dxyview[3]; + int j= -1, tot, actual=0, skyadded=0, envcolor, resol= R.wrld.aosamp; + + RE_RC_INIT(isec, *shi); + isec.orig.ob = shi->obi; + isec.orig.face = shi->vlr; + isec.check = RE_CHECK_VLR_RENDER; + isec.skip = RE_SKIP_VLR_NEIGHBOUR; + isec.hint = NULL; + + isec.hit.ob = NULL; + isec.hit.face = NULL; + + isec.last_hit = NULL; + + isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW; + isec.lay= -1; + + copy_v3_v3(isec.start, shi->co); + RE_instance_rotate_ray_start(shi->obi, &isec); + + RE_rayobject_hint_bb(R.raytree, &point_hint, isec.start, isec.start); + isec.hint = &point_hint; + + zero_v3(ao); + zero_v3(env); + + /* bias prevents smoothed faces to appear flat */ + if (shi->vlr->flag & R_SMOOTH) { + bias= R.wrld.aobias; + nrm= shi->vn; + } + else { + bias= 0.0f; + nrm= shi->facenor; + } + + /* prevent sky colors to be added for only shadow (shadow becomes alpha) */ + envcolor= R.wrld.aocolor; + if (shi->mat->mode & MA_ONLYSHADOW) + envcolor= WO_AOPLAIN; + + if (resol>32) resol= 32; + + /* get sphere samples. for faces we get the same samples for sample x/y values, + * for strand render we always require a new sampler because x/y are not set */ + vec= sphere_sampler(R.wrld.aomode, resol, shi->thread, shi->xs, shi->ys, shi->strand != NULL); + + /* warning: since we use full sphere now, and dotproduct is below, we do twice as much */ + tot= 2*resol*resol; + + if (envcolor == WO_AOSKYTEX) { + dxyview[0]= 1.0f/(float)resol; + dxyview[1]= 1.0f/(float)resol; + dxyview[2]= 0.0f; + } + + while (tot--) { + + if (dot_v3v3(vec, nrm) > bias) { + /* only ao samples for mask */ + if (R.r.mode & R_OSA) { + j++; + if (j==R.osa) j= 0; + if (!(shi->mask & (1<<j))) { + vec+=3; + continue; + } + } + + actual++; + + /* always set start/vec/dist */ + isec.dir[0] = -vec[0]; + isec.dir[1] = -vec[1]; + isec.dir[2] = -vec[2]; + isec.dist = maxdist; + + RE_instance_rotate_ray_dir(shi->obi, &isec); + + /* do the trace */ + if (RE_rayobject_raycast(R.raytree, &isec)) { + if (R.wrld.aomode & WO_AODIST) sh+= expf(-isec.dist*R.wrld.aodistfac); + else sh+= 1.0f; + } + else if (envcolor!=WO_AOPLAIN) { + float skycol[4]; + float view[3]; + + view[0]= -vec[0]; + view[1]= -vec[1]; + view[2]= -vec[2]; + normalize_v3(view); + + if (envcolor==WO_AOSKYCOL) { + const float fac = 0.5f * (1.0f + dot_v3v3(view, R.grvec)); + env[0]+= (1.0f-fac)*R.wrld.horr + fac*R.wrld.zenr; + env[1]+= (1.0f-fac)*R.wrld.horg + fac*R.wrld.zeng; + env[2]+= (1.0f-fac)*R.wrld.horb + fac*R.wrld.zenb; + } + else { /* WO_AOSKYTEX */ + shadeSkyView(skycol, isec.start, view, dxyview, shi->thread); + shadeSunView(skycol, shi->view); + env[0]+= skycol[0]; + env[1]+= skycol[1]; + env[2]+= skycol[2]; + } + skyadded++; + } + } + /* samples */ + vec+= 3; + } + + if (actual==0) sh= 1.0f; + else sh = 1.0f - sh/((float)actual); + + /* average color times distances/hits formula */ + ao[0]= ao[1]= ao[2]= sh; + + if (envcolor!=WO_AOPLAIN && skyadded) + mul_v3_fl(env, sh/((float)skyadded)); + else + copy_v3_v3(env, ao); +} + +void ray_ao(ShadeInput *shi, float ao[3], float env[3]) +{ + /* Unfortunately, the unusual way that the sphere sampler calculates roughly twice as many + * samples as are actually traced, and skips them based on bias and OSA settings makes it very difficult + * to reuse code between these two functions. This is the easiest way I can think of to do it + * --broken */ + if (ELEM(R.wrld.ao_samp_method, WO_AOSAMP_HAMMERSLEY, WO_AOSAMP_HALTON)) + ray_ao_qmc(shi, ao, env); + else if (R.wrld.ao_samp_method == WO_AOSAMP_CONSTANT) + ray_ao_spheresamp(shi, ao, env); +} + +static void ray_shadow_jittered_coords(ShadeInput *shi, int max, float jitco[RE_MAX_OSA][3], int *totjitco) +{ + /* magic numbers for reordering sample positions to give better + * results with adaptive sample, when it usually only takes 4 samples */ + int order8[8] = {0, 1, 5, 6, 2, 3, 4, 7}; + int order11[11] = {1, 3, 8, 10, 0, 2, 4, 5, 6, 7, 9}; + int order16[16] = {1, 3, 9, 12, 0, 6, 7, 8, 13, 2, 4, 5, 10, 11, 14, 15}; + int count = count_mask(shi->mask); + + /* for better antialising shadow samples are distributed over the subpixel + * sample coordinates, this only works for raytracing depth 0 though */ + if (!shi->strand && shi->depth == 0 && count > 1 && count <= max) { + float xs, ys, zs, view[3]; + int samp, ordsamp, tot= 0; + + for (samp=0; samp<R.osa; samp++) { + if (R.osa == 8) ordsamp = order8[samp]; + else if (R.osa == 11) ordsamp = order11[samp]; + else if (R.osa == 16) ordsamp = order16[samp]; + else ordsamp = samp; + + if (shi->mask & (1<<ordsamp)) { + /* zbuffer has this inverse corrected, ensures xs,ys are inside pixel */ + xs= (float)shi->scanco[0] + R.jit[ordsamp][0] + 0.5f; + ys= (float)shi->scanco[1] + R.jit[ordsamp][1] + 0.5f; + zs= shi->scanco[2]; + + shade_input_calc_viewco(shi, xs, ys, zs, view, NULL, jitco[tot], NULL, NULL); + tot++; + } + } + + *totjitco= tot; + } + else { + copy_v3_v3(jitco[0], shi->co); + *totjitco= 1; + } +} + +static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, const float lampco[3], float shadfac[4], Isect *isec) +{ + QMCSampler *qsa=NULL; + int samples=0; + float samp3d[3]; + + float fac=0.0f, vec[3], end[3]; + float colsq[4]; + float adapt_thresh = lar->adapt_thresh; + int min_adapt_samples=4, max_samples = lar->ray_totsamp; + float start[3]; + bool do_soft = true, full_osa = false; + int i; + + float min[3], max[3]; + RayHint bb_hint; + + float jitco[RE_MAX_OSA][3]; + int totjitco; + + colsq[0] = colsq[1] = colsq[2] = 0.0; + if (isec->mode==RE_RAY_SHADOW_TRA) { + shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f; + } + else + shadfac[3]= 1.0f; + + if (lar->ray_totsamp < 2) do_soft = false; + if ((R.r.mode & R_OSA) && (R.osa > 0) && (shi->vlr->flag & R_FULL_OSA)) full_osa = true; + + if (full_osa) { + if (do_soft) max_samples = max_samples/R.osa + 1; + else max_samples = 1; + } + else { + if (do_soft) max_samples = lar->ray_totsamp; + else if (shi->depth == 0) max_samples = (R.osa > 4)?R.osa:5; + else max_samples = 1; + } + + ray_shadow_jittered_coords(shi, max_samples, jitco, &totjitco); + + /* sampling init */ + if (lar->ray_samp_method==LA_SAMP_HALTON) + qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples); + else if (lar->ray_samp_method==LA_SAMP_HAMMERSLEY) + qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples); + + QMC_initPixel(qsa, shi->thread); + + INIT_MINMAX(min, max); + for (i = 0; i < totjitco; i++) { + minmax_v3v3_v3(min, max, jitco[i]); + } + if (shi->obi->flag & R_ENV_TRANSFORMED) { + mul_m4_v3(shi->obi->imat, min); + mul_m4_v3(shi->obi->imat, max); + } + RE_rayobject_hint_bb(R.raytree, &bb_hint, min, max); + + isec->hint = &bb_hint; + isec->check = RE_CHECK_VLR_RENDER; + isec->skip = RE_SKIP_VLR_NEIGHBOUR; + copy_v3_v3(vec, lampco); + + while (samples < max_samples) { + + isec->orig.ob = shi->obi; + isec->orig.face = shi->vlr; + + /* manually jitter the start shading co-ord per sample + * based on the pre-generated OSA texture sampling offsets, + * for anti-aliasing sharp shadow edges. */ + copy_v3_v3(start, jitco[samples % totjitco]); + + if (do_soft) { + /* sphere shadow source */ + if (lar->type == LA_LOCAL) { + float ru[3], rv[3], v[3], s[3]; + + /* calc tangent plane vectors */ + sub_v3_v3v3(v, start, lampco); + normalize_v3(v); + ortho_basis_v3v3_v3(ru, rv, v); + + /* sampling, returns quasi-random vector in area_size disc */ + QMC_sampleDisc(samp3d, qsa, shi->thread, samples, lar->area_size); + + /* distribute disc samples across the tangent plane */ + s[0] = samp3d[0]*ru[0] + samp3d[1]*rv[0]; + s[1] = samp3d[0]*ru[1] + samp3d[1]*rv[1]; + s[2] = samp3d[0]*ru[2] + samp3d[1]*rv[2]; + + copy_v3_v3(samp3d, s); + } + else { + /* sampling, returns quasi-random vector in [sizex,sizey]^2 plane */ + QMC_sampleRect(samp3d, qsa, shi->thread, samples, lar->area_size, lar->area_sizey); + + /* align samples to lamp vector */ + mul_m3_v3(lar->mat, samp3d); + } + end[0] = vec[0]+samp3d[0]; + end[1] = vec[1]+samp3d[1]; + end[2] = vec[2]+samp3d[2]; + } + else { + copy_v3_v3(end, vec); + } + + if (shi->strand) { + /* bias away somewhat to avoid self intersection */ + float jitbias= 0.5f*(len_v3(shi->dxco) + len_v3(shi->dyco)); + float v[3]; + + sub_v3_v3v3(v, start, end); + normalize_v3(v); + + start[0] -= jitbias*v[0]; + start[1] -= jitbias*v[1]; + start[2] -= jitbias*v[2]; + } + + copy_v3_v3(isec->start, start); + sub_v3_v3v3(isec->dir, end, start); + isec->dist = normalize_v3(isec->dir); + + RE_instance_rotate_ray(shi->obi, isec); + + /* trace the ray */ + if (isec->mode==RE_RAY_SHADOW_TRA) { + float col[4] = {1.0f, 1.0f, 1.0f, 1.0f}; + + ray_trace_shadow_tra(isec, shi, DEPTH_SHADOW_TRA, 0, col); + shadfac[0] += col[0]; + shadfac[1] += col[1]; + shadfac[2] += col[2]; + shadfac[3] += col[3]; + + /* for variance calc */ + colsq[0] += col[0]*col[0]; + colsq[1] += col[1]*col[1]; + colsq[2] += col[2]*col[2]; + } + else { + if ( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f; + } + + samples++; + + if (lar->ray_samp_method == LA_SAMP_HALTON) { + + /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */ + if ((max_samples > min_adapt_samples) && (adapt_thresh > 0.0f) && (samples > max_samples / 3)) { + if (isec->mode==RE_RAY_SHADOW_TRA) { + if ((shadfac[3] / samples > (1.0f-adapt_thresh)) || (shadfac[3] / samples < adapt_thresh)) + break; + else if (adaptive_sample_variance(samples, shadfac, colsq, adapt_thresh)) + break; + } + else { + if ((fac / samples > (1.0f-adapt_thresh)) || (fac / samples < adapt_thresh)) + break; + } + } + } + } + + if (isec->mode==RE_RAY_SHADOW_TRA) { + shadfac[0] /= samples; + shadfac[1] /= samples; + shadfac[2] /= samples; + shadfac[3] /= samples; + } + else + shadfac[3]= 1.0f-fac/samples; + + if (qsa) + release_thread_qmcsampler(&R, shi->thread, qsa); +} + +static void ray_shadow_jitter(ShadeInput *shi, LampRen *lar, const float lampco[3], float shadfac[4], Isect *isec) +{ + /* area soft shadow */ + const float *jitlamp; + float fac=0.0f, div=0.0f, vec[3]; + int a, j= -1, mask; + RayHint point_hint; + + if (isec->mode==RE_RAY_SHADOW_TRA) { + shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f; + } + else shadfac[3]= 1.0f; + + fac= 0.0f; + jitlamp= give_jitter_plane(lar, shi->thread, shi->xs, shi->ys); + + a= lar->ray_totsamp; + + /* this correction to make sure we always take at least 1 sample */ + mask= shi->mask; + if (a==4) mask |= (mask>>4)|(mask>>8); + else if (a==9) mask |= (mask>>9); + + copy_v3_v3(isec->start, shi->co); + RE_instance_rotate_ray_start(shi->obi, isec); + + isec->orig.ob = shi->obi; + isec->orig.face = shi->vlr; + RE_rayobject_hint_bb(R.raytree, &point_hint, isec->start, isec->start); + isec->hint = &point_hint; + + while (a--) { + + if (R.r.mode & R_OSA) { + j++; + if (j>=R.osa) j= 0; + if (!(mask & (1<<j))) { + jitlamp+= 2; + continue; + } + } + + vec[0]= jitlamp[0]; + vec[1]= jitlamp[1]; + vec[2]= 0.0f; + mul_m3_v3(lar->mat, vec); + + /* set start and vec */ + isec->dir[0] = vec[0]+lampco[0]-shi->co[0]; + isec->dir[1] = vec[1]+lampco[1]-shi->co[1]; + isec->dir[2] = vec[2]+lampco[2]-shi->co[2]; + + RE_instance_rotate_ray_dir(shi->obi, isec); + + isec->dist = 1.0f; + isec->check = RE_CHECK_VLR_RENDER; + isec->skip = RE_SKIP_VLR_NEIGHBOUR; + + if (isec->mode==RE_RAY_SHADOW_TRA) { + /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */ + float col[4] = {1.0f, 1.0f, 1.0f, 1.0f}; + + ray_trace_shadow_tra(isec, shi, DEPTH_SHADOW_TRA, 0, col); + shadfac[0] += col[0]; + shadfac[1] += col[1]; + shadfac[2] += col[2]; + shadfac[3] += col[3]; + } + else if ( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f; + + div+= 1.0f; + jitlamp+= 2; + } + + if (isec->mode==RE_RAY_SHADOW_TRA) { + shadfac[0] /= div; + shadfac[1] /= div; + shadfac[2] /= div; + shadfac[3] /= div; + } + else { + /* sqrt makes nice umbra effect */ + if (lar->ray_samp_type & LA_SAMP_UMBRA) + shadfac[3] = sqrtf(1.0f - fac / div); + else + shadfac[3] = 1.0f - fac / div; + } +} +/* extern call from shade_lamp_loop */ +void ray_shadow(ShadeInput *shi, LampRen *lar, float shadfac[4]) +{ + Isect isec; + float lampco[3]; + + /* setup isec */ + RE_RC_INIT(isec, *shi); + if (shi->mat->mode & MA_SHADOW_TRA) isec.mode= RE_RAY_SHADOW_TRA; + else isec.mode= RE_RAY_SHADOW; + isec.hint = NULL; + + if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) + isec.lay= lar->lay; + else + isec.lay= -1; + + /* only when not mir tracing, first hit optimm */ + if (shi->depth==0) { + isec.last_hit = lar->last_hit[shi->thread]; + } + else { + isec.last_hit = NULL; + } + + if (lar->type==LA_SUN || lar->type==LA_HEMI) { + /* jitter and QMC sampling add a displace vector to the lamp position + * that's incorrect because a SUN lamp does not has an exact position + * and the displace should be done at the ray vector instead of the + * lamp position. + * This is easily verified by noticing that shadows of SUN lights change + * with the scene BB. + * + * This was detected during SoC 2009 - Raytrace Optimization, but to keep + * consistency with older render code it wasn't removed. + * + * If the render code goes through some recode/serious bug-fix then this + * is something to consider! + */ + lampco[0]= shi->co[0] - R.maxdist*lar->vec[0]; + lampco[1]= shi->co[1] - R.maxdist*lar->vec[1]; + lampco[2]= shi->co[2] - R.maxdist*lar->vec[2]; + } + else { + copy_v3_v3(lampco, lar->co); + } + + if (ELEM(lar->ray_samp_method, LA_SAMP_HALTON, LA_SAMP_HAMMERSLEY)) { + + ray_shadow_qmc(shi, lar, lampco, shadfac, &isec); + + } + else { + if (lar->ray_totsamp<2) { + + isec.orig.ob = shi->obi; + isec.orig.face = shi->vlr; + + shadfac[3]= 1.0f; /* 1.0=full light */ + + /* set up isec.dir */ + copy_v3_v3(isec.start, shi->co); + sub_v3_v3v3(isec.dir, lampco, isec.start); + isec.dist = normalize_v3(isec.dir); + + RE_instance_rotate_ray(shi->obi, &isec); + + if (isec.mode==RE_RAY_SHADOW_TRA) { + /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */ + float col[4] = {1.0f, 1.0f, 1.0f, 1.0f}; + + ray_trace_shadow_tra(&isec, shi, DEPTH_SHADOW_TRA, 0, col); + copy_v4_v4(shadfac, col); + } + else if (RE_rayobject_raycast(R.raytree, &isec)) + shadfac[3]= 0.0f; + } + else { + ray_shadow_jitter(shi, lar, lampco, shadfac, &isec); + } + } + + /* for first hit optim, set last interesected shadow face */ + if (shi->depth==0) { + lar->last_hit[shi->thread] = isec.last_hit; + } + +} + diff --git a/source/blender/render/intern/source/render_result.c b/source/blender/render/intern/source/render_result.c index 5fd897219c4..e0cacdf4b8f 100644 --- a/source/blender/render/intern/source/render_result.c +++ b/source/blender/render/intern/source/render_result.c @@ -95,7 +95,7 @@ void render_result_free(RenderResult *res) if (rl->acolrect) MEM_freeN(rl->acolrect); if (rl->scolrect) MEM_freeN(rl->scolrect); if (rl->display_buffer) MEM_freeN(rl->display_buffer); - + while (rl->passes.first) { RenderPass *rpass = rl->passes.first; if (rpass->rect) MEM_freeN(rpass->rect); @@ -128,13 +128,13 @@ void render_result_free(RenderResult *res) void render_result_free_list(ListBase *lb, RenderResult *rr) { RenderResult *rrnext; - + for (; rr; rr = rrnext) { rrnext = rr->next; - + if (lb && lb->first) BLI_remlink(lb, rr); - + render_result_free(rr); } } @@ -206,7 +206,7 @@ static RenderPass *render_layer_add_pass(RenderResult *rr, RenderLayer *rl, int const int view_id = BLI_findstringindex(&rr->views, viewname, offsetof(RenderView, name)); RenderPass *rpass = MEM_callocN(sizeof(RenderPass), name); size_t rectsize = ((size_t)rr->rectx) * rr->recty * channels; - + rpass->channels = channels; rpass->rectx = rl->rectx; rpass->recty = rl->recty; @@ -216,7 +216,7 @@ static RenderPass *render_layer_add_pass(RenderResult *rr, RenderLayer *rl, int BLI_strncpy(rpass->chan_id, chan_id, sizeof(rpass->chan_id)); BLI_strncpy(rpass->view, viewname, sizeof(rpass->view)); set_pass_full_name(rpass->fullname, rpass->name, -1, rpass->view, rpass->chan_id); - + if (rl->exrhandle) { int a; for (a = 0; a < channels; a++) { @@ -227,13 +227,13 @@ static RenderPass *render_layer_add_pass(RenderResult *rr, RenderLayer *rl, int else { float *rect; int x; - + rpass->rect = MEM_mapallocN(sizeof(float) * rectsize, name); if (rpass->rect == NULL) { MEM_freeN(rpass); return NULL; } - + if (STREQ(rpass->name, RE_PASSNAME_VECTOR)) { /* initialize to max speed */ rect = rpass->rect; @@ -267,13 +267,13 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf RenderLayer *rl; RenderView *rv; int rectx, recty; - + rectx = BLI_rcti_size_x(partrct); recty = BLI_rcti_size_y(partrct); - + if (rectx <= 0 || recty <= 0) return NULL; - + rr = MEM_callocN(sizeof(RenderResult), "new render result"); rr->rectx = rectx; rr->recty = recty; @@ -286,7 +286,7 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf rr->tilerect.xmax = partrct->xmax - re->disprect.xmin; rr->tilerect.ymin = partrct->ymin - re->disprect.ymin; rr->tilerect.ymax = partrct->ymax - re->disprect.ymin; - + if (savebuffers) { rr->do_exr_tile = true; } @@ -304,14 +304,14 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf rl = MEM_callocN(sizeof(RenderLayer), "new render layer"); BLI_addtail(&rr->layers, rl); - + BLI_strncpy(rl->name, view_layer->name, sizeof(rl->name)); rl->layflag = view_layer->layflag; rl->passflag = view_layer->passflag; /* for debugging: view_layer->passflag | SCE_PASS_RAYHITS; */ rl->pass_xor = view_layer->pass_xor; rl->rectx = rectx; rl->recty = recty; - + if (rr->do_exr_tile) { rl->display_buffer = MEM_mapallocN((size_t)rectx * recty * sizeof(unsigned int), "Combined display space rgba"); @@ -412,7 +412,7 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf if (BLI_listbase_is_empty(&rr->layers) && !(layername && layername[0])) { rl = MEM_callocN(sizeof(RenderLayer), "new render layer"); BLI_addtail(&rr->layers, rl); - + rl->rectx = rectx; rl->recty = recty; @@ -439,15 +439,15 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf /* note, this has to be in sync with scene.c */ rl->layflag = 0x7FFF; /* solid ztra halo strand */ rl->passflag = SCE_PASS_COMBINED; - + re->active_view_layer = 0; } - + /* border render; calculate offset for use in compositor. compo is centralized coords */ /* XXX obsolete? I now use it for drawing border render offset (ton) */ rr->xof = re->disprect.xmin + BLI_rcti_cent_x(&re->disprect) - (re->winx / 2); rr->yof = re->disprect.ymin + BLI_rcti_cent_y(&re->disprect) - (re->winy / 2); - + return rr; } @@ -554,7 +554,7 @@ static void *ml_addlayer_cb(void *base, const char *str) { RenderResult *rr = base; RenderLayer *rl; - + rl = MEM_callocN(sizeof(RenderLayer), "new render layer"); BLI_addtail(&rr->layers, rl); @@ -676,7 +676,7 @@ RenderResult *render_result_new_from_exr(void *exrhandle, const char *colorspace rr->rectx = rectx; rr->recty = recty; - + IMB_exr_multilayer_convert(exrhandle, rr, ml_addview_cb, ml_addlayer_cb, ml_addpass_cb); for (rl = rr->layers.first; rl; rl = rl->next) { @@ -695,7 +695,7 @@ RenderResult *render_result_new_from_exr(void *exrhandle, const char *colorspace } } } - + return rr; } @@ -740,16 +740,16 @@ static void do_merge_tile(RenderResult *rr, RenderResult *rrpart, float *target, { int y, tilex, tiley; size_t ofs, copylen; - + copylen = tilex = rrpart->rectx; tiley = rrpart->recty; - + if (rrpart->crop) { /* filters add pixel extra */ tile += pixsize * (rrpart->crop + ((size_t)rrpart->crop) * tilex); - + copylen = tilex - 2 * rrpart->crop; tiley -= 2 * rrpart->crop; - + ofs = (((size_t)rrpart->tilerect.ymin) + rrpart->crop) * rr->rectx + (rrpart->tilerect.xmin + rrpart->crop); target += pixsize * ofs; } @@ -776,7 +776,7 @@ void render_result_merge(RenderResult *rr, RenderResult *rrpart) { RenderLayer *rl, *rlp; RenderPass *rpass, *rpassp; - + for (rl = rr->layers.first; rl; rl = rl->next) { rlp = RE_GetRenderLayer(rrpart, rl->name); if (rlp) { @@ -956,7 +956,7 @@ void render_result_single_layer_begin(Render *re) /* officially pushed result should be NULL... error can happen with do_seq */ RE_FreeRenderResult(re->pushedresult); - + re->pushedresult = re->result; re->result = NULL; } @@ -980,10 +980,10 @@ void render_result_single_layer_end(Render *re) if (re->pushedresult->rectx == re->result->rectx && re->pushedresult->recty == re->result->recty) { /* find which layer in re->pushedresult should be replaced */ rl = re->result->layers.first; - + /* render result should be empty after this */ BLI_remlink(&re->result->layers, rl); - + /* reconstruct render result layers */ for (nr = 0, view_layer = re->view_layers.first; view_layer; view_layer = view_layer->next, nr++) { if (nr == re->active_view_layer) { @@ -1010,9 +1010,9 @@ static void save_render_result_tile(RenderResult *rr, RenderResult *rrpart, cons RenderLayer *rlp, *rl; RenderPass *rpassp; int offs, partx, party; - + BLI_thread_lock(LOCK_IMAGE); - + for (rlp = rrpart->layers.first; rlp; rlp = rlp->next) { rl = RE_GetRenderLayer(rr, rlp->name); @@ -1042,7 +1042,7 @@ static void save_render_result_tile(RenderResult *rr, RenderResult *rrpart, cons xstride, xstride * rrpart->rectx, rpassp->rect + a + xstride * offs); } } - + } party = rrpart->tilerect.ymin + rrpart->crop; @@ -1068,7 +1068,7 @@ void render_result_save_empty_result_tiles(Render *re) RenderPart *pa; RenderResult *rr; RenderLayer *rl; - + for (rr = re->result; rr; rr = rr->next) { for (rl = rr->layers.first; rl; rl = rl->next) { for (pa = re->parts.first; pa; pa = pa->next) { @@ -1112,7 +1112,7 @@ void render_result_exr_file_end(Render *re) rr->do_exr_tile = false; } - + render_result_free_list(&re->fullresult, re->result); re->result = NULL; @@ -1131,7 +1131,7 @@ void render_result_exr_file_path(Scene *scene, const char *layname, int sample, { char name[FILE_MAXFILE + MAX_ID_NAME + MAX_ID_NAME + 100]; const char *fi = BLI_path_basename(BKE_main_blendfile_path_from_global()); - + if (sample == 0) { BLI_snprintf(name, sizeof(name), "%s_%s_%s.exr", fi, scene->id.name + 2, layname); } @@ -1194,7 +1194,7 @@ int render_result_exr_file_read_path(RenderResult *rr, RenderLayer *rl_single, c for (rl = rr->layers.first; rl; rl = rl->next) { if (rl_single && rl_single != rl) continue; - + /* passes are allocated in sync */ for (rpass = rl->passes.first; rpass; rpass = rpass->next) { const int xstride = rpass->channels; @@ -1292,7 +1292,7 @@ ImBuf *render_result_rect_to_ibuf(RenderResult *rr, RenderData *rd, const int vi /* float factor for random dither, imbuf takes care of it */ ibuf->dither = rd->dither_intensity; - + /* prepare to gamma correct to sRGB color space * note that sequence editor can generate 8bpc render buffers */ @@ -1333,7 +1333,7 @@ void RE_render_result_rect_from_ibuf(RenderResult *rr, RenderData *UNUSED(rd), I if (!rv->rectf) rv->rectf = MEM_mallocN(4 * sizeof(float) * rr->rectx * rr->recty, "render_seq rectf"); - + memcpy(rv->rectf, ibuf->rect_float, 4 * sizeof(float) * rr->rectx * rr->recty); /* TSK! Since sequence render doesn't free the *rr render result, the old rect32 diff --git a/source/blender/render/intern/source/render_texture.c b/source/blender/render/intern/source/render_texture.c index 79d13ecab5b..99da5b3ca01 100644 --- a/source/blender/render/intern/source/render_texture.c +++ b/source/blender/render/intern/source/render_texture.c @@ -95,7 +95,7 @@ static void tex_normal_derivate(Tex *tex, TexResult *texres) float col[4]; if (BKE_colorband_evaluate(tex->coba, texres->tin, col)) { float fac0, fac1, fac2, fac3; - + fac0= (col[0]+col[1]+col[2]); BKE_colorband_evaluate(tex->coba, texres->nor[0], col); fac1= (col[0]+col[1]+col[2]); @@ -103,11 +103,11 @@ static void tex_normal_derivate(Tex *tex, TexResult *texres) fac2= (col[0]+col[1]+col[2]); BKE_colorband_evaluate(tex->coba, texres->nor[2], col); fac3= (col[0]+col[1]+col[2]); - + texres->nor[0]= (fac0 - fac1) / 3.0f; texres->nor[1]= (fac0 - fac2) / 3.0f; texres->nor[2]= (fac0 - fac3) / 3.0f; - + return; } } @@ -173,7 +173,7 @@ static int blend(Tex *tex, const float texvec[3], TexResult *texres) static int clouds(Tex *tex, const float texvec[3], TexResult *texres) { int rv = TEX_INT; - + texres->tin = BLI_gTurbulence(tex->noisesize, texvec[0], texvec[1], texvec[2], tex->noisedepth, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis); if (texres->nor!=NULL) { @@ -181,7 +181,7 @@ static int clouds(Tex *tex, const float texvec[3], TexResult *texres) texres->nor[0] = BLI_gTurbulence(tex->noisesize, texvec[0] + tex->nabla, texvec[1], texvec[2], tex->noisedepth, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis); texres->nor[1] = BLI_gTurbulence(tex->noisesize, texvec[0], texvec[1] + tex->nabla, texvec[2], tex->noisedepth, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis); texres->nor[2] = BLI_gTurbulence(tex->noisesize, texvec[0], texvec[1], texvec[2] + tex->nabla, tex->noisedepth, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis); - + tex_normal_derivate(tex, texres); rv |= TEX_NOR; } @@ -215,7 +215,7 @@ static float tex_sin(float a) static float tex_saw(float a) { const float b = 2*M_PI; - + int n = (int)(a / b); a -= n*b; if (a < 0) a += b; @@ -227,9 +227,9 @@ static float tex_tri(float a) { const float b = 2*M_PI; const float rmax = 1.0; - + a = rmax - 2.0f*fabsf(floorf((a*(1.0f/b))+0.5f) - (a*(1.0f/b))); - + return a; } @@ -244,9 +244,9 @@ static float wood_int(Tex *tex, float x, float y, float z) waveform[0] = tex_sin; /* assign address of tex_sin() function to pointer array */ waveform[1] = tex_saw; waveform[2] = tex_tri; - + if ((wf>TEX_TRI) || (wf<TEX_SIN)) wf=0; /* check to be sure noisebasis2 is initialized ahead of time */ - + if (wt==TEX_BAND) { wi = waveform[wf]((x + y + z)*10.0f); } @@ -261,7 +261,7 @@ static float wood_int(Tex *tex, float x, float y, float z) wi = tex->turbul*BLI_gNoise(tex->noisesize, x, y, z, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis); wi = waveform[wf](sqrtf(x*x + y*y + z*z)*20.0f + wi); } - + return wi; } @@ -275,7 +275,7 @@ static int wood(Tex *tex, const float texvec[3], TexResult *texres) texres->nor[0] = wood_int(tex, texvec[0] + tex->nabla, texvec[1], texvec[2]); texres->nor[1] = wood_int(tex, texvec[0], texvec[1] + tex->nabla, texvec[2]); texres->nor[2] = wood_int(tex, texvec[0], texvec[1], texvec[2] + tex->nabla); - + tex_normal_derivate(tex, texres); rv |= TEX_NOR; } @@ -291,16 +291,16 @@ static float marble_int(Tex *tex, float x, float y, float z) float n, mi; short wf = tex->noisebasis2; /* wave form: TEX_SIN=0, TEX_SAW=1, TEX_TRI=2 */ short mt = tex->stype; /* marble type: TEX_SOFT=0, TEX_SHARP=1,TEX_SHAPER=2 */ - + float (*waveform[3])(float); /* create array of pointers to waveform functions */ waveform[0] = tex_sin; /* assign address of tex_sin() function to pointer array */ waveform[1] = tex_saw; waveform[2] = tex_tri; - + if ((wf>TEX_TRI) || (wf<TEX_SIN)) wf=0; /* check to be sure noisebasis2 isn't initialized ahead of time */ - + n = 5.0f * (x + y + z); - + mi = n + tex->turbul * BLI_gTurbulence(tex->noisesize, x, y, z, tex->noisedepth, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis); if (mt>=TEX_SOFT) { /* TEX_SOFT always true */ @@ -327,9 +327,9 @@ static int marble(Tex *tex, const float texvec[3], TexResult *texres) texres->nor[0] = marble_int(tex, texvec[0] + tex->nabla, texvec[1], texvec[2]); texres->nor[1] = marble_int(tex, texvec[0], texvec[1] + tex->nabla, texvec[2]); texres->nor[2] = marble_int(tex, texvec[0], texvec[1], texvec[2] + tex->nabla); - + tex_normal_derivate(tex, texres); - + rv |= TEX_NOR; } @@ -397,8 +397,8 @@ static int magic(Tex *tex, const float texvec[3], TexResult *texres) if (turb!=0.0f) { turb*= 2.0f; - x/= turb; - y/= turb; + x/= turb; + y/= turb; z/= turb; } texres->tr = 0.5f - x; @@ -406,10 +406,10 @@ static int magic(Tex *tex, const float texvec[3], TexResult *texres) texres->tb = 0.5f - z; texres->tin= (1.0f / 3.0f) * (texres->tr + texres->tg + texres->tb); - + BRICONTRGB; texres->ta = 1.0f; - + return TEX_RGB; } @@ -420,9 +420,9 @@ static int stucci(Tex *tex, const float texvec[3], TexResult *texres) { float nor[3], b2, ofs; int retval= TEX_INT; - + b2= BLI_gNoise(tex->noisesize, texvec[0], texvec[1], texvec[2], (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis); - + ofs= tex->turbul/200.0f; if (tex->stype) ofs*=(b2*b2); @@ -431,27 +431,27 @@ static int stucci(Tex *tex, const float texvec[3], TexResult *texres) nor[2] = BLI_gNoise(tex->noisesize, texvec[0], texvec[1], texvec[2]+ofs, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis); texres->tin= nor[2]; - + if (texres->nor) { - + copy_v3_v3(texres->nor, nor); tex_normal_derivate(tex, texres); - + if (tex->stype==TEX_WALLOUT) { texres->nor[0]= -texres->nor[0]; texres->nor[1]= -texres->nor[1]; texres->nor[2]= -texres->nor[2]; } - + retval |= TEX_NOR; } - + if (tex->stype==TEX_WALLOUT) texres->tin= 1.0f-texres->tin; - + if (texres->tin<0.0f) texres->tin= 0.0f; - + return retval; } @@ -477,7 +477,7 @@ static float mg_mFractalOrfBmTex(Tex *tex, const float texvec[3], TexResult *tex texres->nor[0] = tex->ns_outscale*mgravefunc(texvec[0] + offs, texvec[1], texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->noisebasis); texres->nor[1] = tex->ns_outscale*mgravefunc(texvec[0], texvec[1] + offs, texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->noisebasis); texres->nor[2] = tex->ns_outscale*mgravefunc(texvec[0], texvec[1], texvec[2] + offs, tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->noisebasis); - + tex_normal_derivate(tex, texres); rv |= TEX_NOR; } @@ -507,7 +507,7 @@ static float mg_ridgedOrHybridMFTex(Tex *tex, const float texvec[3], TexResult * texres->nor[0] = tex->ns_outscale*mgravefunc(texvec[0] + offs, texvec[1], texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->mg_gain, tex->noisebasis); texres->nor[1] = tex->ns_outscale*mgravefunc(texvec[0], texvec[1] + offs, texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->mg_gain, tex->noisebasis); texres->nor[2] = tex->ns_outscale*mgravefunc(texvec[0], texvec[1], texvec[2] + offs, tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->mg_gain, tex->noisebasis); - + tex_normal_derivate(tex, texres); rv |= TEX_NOR; } @@ -532,7 +532,7 @@ static float mg_HTerrainTex(Tex *tex, const float texvec[3], TexResult *texres) texres->nor[0] = tex->ns_outscale*mg_HeteroTerrain(texvec[0] + offs, texvec[1], texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->noisebasis); texres->nor[1] = tex->ns_outscale*mg_HeteroTerrain(texvec[0], texvec[1] + offs, texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->noisebasis); texres->nor[2] = tex->ns_outscale*mg_HeteroTerrain(texvec[0], texvec[1], texvec[2] + offs, tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->noisebasis); - + tex_normal_derivate(tex, texres); rv |= TEX_NOR; } @@ -630,7 +630,7 @@ static float voronoiTex(Tex *tex, const float texvec[3], TexResult *texres) texres->nor[1] = sc * fabsf(tex->vn_w1*da[0] + tex->vn_w2*da[1] + tex->vn_w3*da[2] + tex->vn_w4*da[3]); voronoi(texvec[0], texvec[1], texvec[2] + offs, da, pa, tex->vn_mexp, tex->vn_distm); texres->nor[2] = sc * fabsf(tex->vn_w1*da[0] + tex->vn_w2*da[1] + tex->vn_w3*da[2] + tex->vn_w4*da[3]); - + tex_normal_derivate(tex, texres); rv |= TEX_NOR; } @@ -640,7 +640,7 @@ static float voronoiTex(Tex *tex, const float texvec[3], TexResult *texres) texres->ta = 1.0; return (rv | TEX_RGB); } - + BRICONT; return rv; @@ -653,20 +653,20 @@ static int texnoise(Tex *tex, TexResult *texres, int thread) { float div=3.0; int val, ran, loop, shift = 29; - + ran= BLI_rng_thread_rand(random_tex_array, thread); - + loop= tex->noisedepth; /* start from top bits since they have more variance */ val= ((ran >> shift) & 3); - + while (loop--) { - shift -= 2; + shift -= 2; val *= ((ran >> shift) & 3); div *= 3.0f; } - + texres->tin= ((float)val)/div; BRICONT; @@ -679,7 +679,7 @@ static int cubemap_glob(const float n[3], float x, float y, float z, float *adr1 { float x1, y1, z1, nor[3]; int ret; - + if (n==NULL) { nor[0]= x; nor[1]= y; nor[2]= z; /* use local render coord */ } @@ -690,7 +690,7 @@ static int cubemap_glob(const float n[3], float x, float y, float z, float *adr1 x1 = fabsf(nor[0]); y1 = fabsf(nor[1]); z1 = fabsf(nor[2]); - + if (z1>=x1 && z1>=y1) { *adr1 = (x + 1.0f) / 2.0f; *adr2 = (y + 1.0f) / 2.0f; @@ -719,13 +719,13 @@ static void do_2d_mapping( Tex *tex; float fx, fy, fac1, area[8]; int ok, proj, areaflag= 0, wrap; - + /* mtex variables localized, only cubemap doesn't cooperate yet... */ wrap= mtex->mapping; tex= mtex->tex; if (!(dxt && dyt)) { - + if (wrap==MTEX_FLAT) { fx = (texvec[0] + 1.0f) / 2.0f; fy = (texvec[1] + 1.0f) / 2.0f; @@ -735,15 +735,15 @@ static void do_2d_mapping( else { cubemap_glob(n, texvec[0], texvec[1], texvec[2], &fx, &fy); } - + /* repeat */ if (tex->extend==TEX_REPEAT) { if (tex->xrepeat>1) { float origf= fx *= tex->xrepeat; - + if (fx>1.0f) fx -= (int)(fx); else if (fx<0.0f) fx+= 1-(int)(fx); - + if (tex->flag & TEX_REPEAT_XMIR) { int orig= (int)floor(origf); if (orig & 1) @@ -752,10 +752,10 @@ static void do_2d_mapping( } if (tex->yrepeat>1) { float origf= fy *= tex->yrepeat; - + if (fy>1.0f) fy -= (int)(fy); else if (fy<0.0f) fy+= 1-(int)(fy); - + if (tex->flag & TEX_REPEAT_YMIR) { int orig= (int)floor(origf); if (orig & 1) @@ -777,7 +777,7 @@ static void do_2d_mapping( texvec[1]= fy; } else { - + if (wrap==MTEX_FLAT) { fx= (texvec[0] + 1.0f) / 2.0f; fy= (texvec[1] + 1.0f) / 2.0f; @@ -854,55 +854,55 @@ static void do_2d_mapping( dyt[2] *= 0.5f; } - + /* if area, then reacalculate dxt[] and dyt[] */ if (areaflag) { - fx= area[0]; + fx= area[0]; fy= area[1]; dxt[0]= area[2]-fx; dxt[1]= area[3]-fy; dyt[0]= area[4]-fx; dyt[1]= area[5]-fy; } - + /* repeat */ if (tex->extend==TEX_REPEAT) { float max= 1.0f; if (tex->xrepeat>1) { float origf= fx *= tex->xrepeat; - + /* TXF: omit mirror here, see comments in do_material_tex() after do_2d_mapping() call */ if (tex->texfilter == TXF_BOX) { if (fx>1.0f) fx -= (int)(fx); else if (fx<0.0f) fx+= 1-(int)(fx); - + if (tex->flag & TEX_REPEAT_XMIR) { int orig= (int)floor(origf); if (orig & 1) fx= 1.0f-fx; } } - + max= tex->xrepeat; - + dxt[0]*= tex->xrepeat; dyt[0]*= tex->xrepeat; } if (tex->yrepeat>1) { float origf= fy *= tex->yrepeat; - + /* TXF: omit mirror here, see comments in do_material_tex() after do_2d_mapping() call */ if (tex->texfilter == TXF_BOX) { if (fy>1.0f) fy -= (int)(fy); else if (fy<0.0f) fy+= 1-(int)(fy); - + if (tex->flag & TEX_REPEAT_YMIR) { int orig= (int)floor(origf); if (orig & 1) fy= 1.0f-fy; } } - + if (max<tex->yrepeat) max= tex->yrepeat; @@ -913,7 +913,7 @@ static void do_2d_mapping( dxt[2]*= max; dyt[2]*= max; } - + } /* crop */ if (tex->cropxmin!=0.0f || tex->cropxmax!=1.0f) { @@ -928,7 +928,7 @@ static void do_2d_mapping( dxt[1]*= fac1; dyt[1]*= fac1; } - + texvec[0]= fx; texvec[1]= fy; @@ -953,7 +953,7 @@ static int multitex(Tex *tex, int retval = 0; /* return value, int:0, col:1, nor:2, everything:3 */ texres->talpha = false; /* is set when image texture returns alpha (considered premul) */ - + if (use_nodes && tex->use_nodes && tex->nodetree) { const float cfra = 1.0f; /* This was only set for Blender Internal render before. */ retval = ntreeTexExecTree(tex->nodetree, texres, texvec, dxt, dyt, osatex, thread, @@ -1072,7 +1072,7 @@ static int multitex_nodes_intern(Tex *tex, if (mtex) which_output= mtex->which_output; - + if (tex->type==TEX_IMAGE) { int rgbnor; @@ -1093,7 +1093,7 @@ static int multitex_nodes_intern(Tex *tex, if (mtex->mapto & (MAP_COL)) { ImBuf *ibuf = BKE_image_pool_acquire_ibuf(tex->ima, &tex->iuser, pool); - + /* don't linearize float buffers, assumed to be linear */ if (ibuf != NULL && ibuf->rect_float == NULL && @@ -1110,12 +1110,12 @@ static int multitex_nodes_intern(Tex *tex, /* we don't have mtex, do default flat 2d projection */ MTex localmtex; float texvec_l[3], dxt_l[3], dyt_l[3]; - + localmtex.mapping= MTEX_FLAT; localmtex.tex= tex; localmtex.object= NULL; localmtex.texco= TEXCO_ORCO; - + copy_v3_v3(texvec_l, texvec); if (dxt && dyt) { copy_v3_v3(dxt_l, dxt); @@ -1125,7 +1125,7 @@ static int multitex_nodes_intern(Tex *tex, zero_v3(dxt_l); zero_v3(dyt_l); } - + do_2d_mapping(&localmtex, texvec_l, NULL, dxt_l, dyt_l); rgbnor = multitex(tex, texvec_l, @@ -1244,7 +1244,7 @@ int multitex_ext_safe(Tex *tex, float texvec[3], TexResult *texres, struct Image void texture_rgb_blend(float in[3], const float tex[3], const float out[3], float fact, float facg, int blendtype) { float facm; - + switch (blendtype) { case MTEX_BLEND: fact*= facg; @@ -1254,7 +1254,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa in[1]= (fact*tex[1] + facm*out[1]); in[2]= (fact*tex[2] + facm*out[2]); break; - + case MTEX_MUL: fact*= facg; facm= 1.0f-fact; @@ -1274,7 +1274,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa case MTEX_OVERLAY: fact*= facg; facm= 1.0f-fact; - + if (out[0] < 0.5f) in[0] = out[0] * (facm + 2.0f*fact*tex[0]); else @@ -1288,7 +1288,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa else in[2] = 1.0f - (facm + 2.0f*fact*(1.0f - tex[2])) * (1.0f - out[2]); break; - + case MTEX_SUB: fact= -fact; ATTR_FALLTHROUGH; @@ -1302,7 +1302,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa case MTEX_DIV: fact*= facg; facm= 1.0f-fact; - + if (tex[0]!=0.0f) in[0]= facm*out[0] + fact*out[0]/tex[0]; if (tex[1]!=0.0f) @@ -1323,7 +1323,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa case MTEX_DARK: fact*= facg; facm= 1.0f-fact; - + in[0] = min_ff(out[0], tex[0])*fact + out[0]*facm; in[1] = min_ff(out[1], tex[1])*fact + out[1]*facm; in[2] = min_ff(out[2], tex[2])*fact + out[2]*facm; @@ -1336,7 +1336,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa in[1] = max_ff(fact * tex[1], out[1]); in[2] = max_ff(fact * tex[2], out[2]); break; - + case MTEX_BLEND_HUE: fact*= facg; copy_v3_v3(in, out); @@ -1357,16 +1357,16 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa copy_v3_v3(in, out); ramp_blend(MA_RAMP_COLOR, in, fact, tex); break; - case MTEX_SOFT_LIGHT: - fact*= facg; + case MTEX_SOFT_LIGHT: + fact*= facg; copy_v3_v3(in, out); ramp_blend(MA_RAMP_SOFT, in, fact, tex); - break; - case MTEX_LIN_LIGHT: - fact*= facg; + break; + case MTEX_LIN_LIGHT: + fact*= facg; copy_v3_v3(in, out); ramp_blend(MA_RAMP_LINEAR, in, fact, tex); - break; + break; } } @@ -1376,7 +1376,7 @@ float texture_value_blend(float tex, float out, float fact, float facg, int blen int flip= (facg < 0.0f); facg= fabsf(facg); - + fact*= facg; facm= 1.0f-fact; if (flip) SWAP(float, fact, facm); @@ -1429,19 +1429,19 @@ float texture_value_blend(float tex, float out, float fact, float facg, int blen if (col > out) in= col; else in= out; break; - case MTEX_SOFT_LIGHT: + case MTEX_SOFT_LIGHT: scf=1.0f - (1.0f - tex) * (1.0f - out); in= facm*out + fact * ((1.0f - out) * tex * out) + (out * scf); - break; + break; - case MTEX_LIN_LIGHT: + case MTEX_LIN_LIGHT: if (tex > 0.5f) in = out + fact*(2.0f*(tex - 0.5f)); - else + else in = out + fact*(2.0f*tex - 1.0f); break; } - + return in; } @@ -1459,26 +1459,26 @@ int externtex(const MTex *mtex, TexResult texr; float dxt[3], dyt[3], texvec[3]; int rgb; - + tex= mtex->tex; if (tex==NULL) return 0; texr.nor= NULL; - + /* placement */ if (mtex->projx) texvec[0]= mtex->size[0]*(vec[mtex->projx-1]+mtex->ofs[0]); else texvec[0]= mtex->size[0]*(mtex->ofs[0]); - + if (mtex->projy) texvec[1]= mtex->size[1]*(vec[mtex->projy-1]+mtex->ofs[1]); else texvec[1]= mtex->size[1]*(mtex->ofs[1]); - + if (mtex->projz) texvec[2]= mtex->size[2]*(vec[mtex->projz-1]+mtex->ofs[2]); else texvec[2]= mtex->size[2]*(mtex->ofs[2]); - + /* texture */ if (tex->type==TEX_IMAGE) { do_2d_mapping(mtex, texvec, NULL, dxt, dyt); } - + rgb = multitex(tex, texvec, dxt, dyt, @@ -1489,7 +1489,7 @@ int externtex(const MTex *mtex, skip_load_image, texnode_preview, true); - + if (rgb) { texr.tin = IMB_colormanagement_get_luminance(&texr.tr); } @@ -1498,7 +1498,7 @@ int externtex(const MTex *mtex, texr.tg= mtex->g; texr.tb= mtex->b; } - + *tin= texr.tin; *tr= texr.tr; *tg= texr.tg; diff --git a/source/blender/render/intern/source/rendercore.c b/source/blender/render/intern/source/rendercore.c new file mode 100644 index 00000000000..99d2436d4bc --- /dev/null +++ b/source/blender/render/intern/source/rendercore.c @@ -0,0 +1,2030 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributors: Hos, Robert Wenzlaff. + * Contributors: 2004/2005/2006 Blender Foundation, full recode + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/rendercore.c + * \ingroup render + */ + + +/* system includes */ +#include <stdio.h> +#include <math.h> +#include <float.h> +#include <string.h> +#include <assert.h> + +/* External modules: */ +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_blenlib.h" +#include "BLI_rand.h" +#include "BLI_threads.h" +#include "BLI_utildefines.h" + +#include "DNA_image_types.h" +#include "DNA_lamp_types.h" +#include "DNA_material_types.h" +#include "DNA_group_types.h" + +/* local include */ +#include "renderpipeline.h" +#include "render_result.h" +#include "render_types.h" +#include "renderdatabase.h" +#include "occlusion.h" +#include "pixelblending.h" +#include "pixelshading.h" +#include "shadbuf.h" +#include "shading.h" +#include "sss.h" +#include "zbuf.h" + +/* own include */ +#include "rendercore.h" + + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +/* x and y are current pixels in rect to be rendered */ +/* do not normalize! */ +void calc_view_vector(float view[3], float x, float y) +{ + + view[2]= -ABS(R.clipsta); + + if (R.r.mode & R_ORTHO) { + view[0]= view[1]= 0.0f; + } + else { + + if (R.r.mode & R_PANORAMA) { + x-= R.panodxp; + } + + /* move x and y to real viewplane coords */ + x = (x / (float)R.winx); + view[0] = R.viewplane.xmin + x * BLI_rctf_size_x(&R.viewplane); + + y = (y / (float)R.winy); + view[1] = R.viewplane.ymin + y * BLI_rctf_size_y(&R.viewplane); + +// if (R.flag & R_SEC_FIELD) { +// if (R.r.mode & R_ODDFIELD) view[1]= (y+R.ystart)*R.ycor; +// else view[1]= (y+R.ystart+1.0)*R.ycor; +// } +// else view[1]= (y+R.ystart+R.bluroffsy+0.5)*R.ycor; + + if (R.r.mode & R_PANORAMA) { + float u= view[0] + R.panodxv; float v= view[2]; + view[0]= R.panoco*u + R.panosi*v; + view[2]= -R.panosi*u + R.panoco*v; + } + } +} + +void calc_renderco_ortho(float co[3], float x, float y, int z) +{ + /* x and y 3d coordinate can be derived from pixel coord and winmat */ + float fx= 2.0f/(R.winx*R.winmat[0][0]); + float fy= 2.0f/(R.winy*R.winmat[1][1]); + float zco; + + co[0]= (x - 0.5f*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0]; + co[1]= (y - 0.5f*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1]; + + zco= ((float)z)/2147483647.0f; + co[2]= R.winmat[3][2]/( R.winmat[2][3]*zco - R.winmat[2][2] ); +} + +void calc_renderco_zbuf(float co[3], const float view[3], int z) +{ + float fac, zco; + + /* inverse of zbuf calc: zbuf = MAXZ*hoco_z/hoco_w */ + zco= ((float)z)/2147483647.0f; + co[2]= R.winmat[3][2]/( R.winmat[2][3]*zco - R.winmat[2][2] ); + + fac= co[2]/view[2]; + co[0]= fac*view[0]; + co[1]= fac*view[1]; +} + +/* also used in zbuf.c and shadbuf.c */ +int count_mask(unsigned short mask) +{ + if (R.samples) + return (R.samples->cmask[mask & 255]+R.samples->cmask[mask>>8]); + return 0; +} + +static int calchalo_z(HaloRen *har, int zz) +{ + + if (har->type & HA_ONLYSKY) { + if (zz < 0x7FFFFFF0) zz= - 0x7FFFFF; /* edge render messes zvalues */ + } + else { + zz= (zz>>8); + } + return zz; +} + + + +static void halo_pixelstruct(HaloRen *har, RenderLayer **rlpp, int totsample, int od, float dist, float xn, float yn, PixStr *ps) +{ + float col[4], accol[4], fac; + int amount, amountm, zz, flarec, sample, fullsample, mask=0; + + fullsample= (totsample > 1); + amount= 0; + accol[0] = accol[1] = accol[2] = accol[3]= 0.0f; + col[0] = col[1] = col[2] = col[3]= 0.0f; + flarec= har->flarec; + + while (ps) { + amountm= count_mask(ps->mask); + amount+= amountm; + + zz= calchalo_z(har, ps->z); + if ((zz> har->zs) || (har->mat && (har->mat->mode & MA_HALO_SOFT))) { + if (shadeHaloFloat(har, col, zz, dist, xn, yn, flarec)) { + flarec= 0; + + if (fullsample) { + for (sample=0; sample<totsample; sample++) { + if (ps->mask & (1 << sample)) { + float *pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname); + addalphaAddfacFloat(pass + od*4, col, har->add); + } + } + } + else { + fac= ((float)amountm)/(float)R.osa; + accol[0]+= fac*col[0]; + accol[1]+= fac*col[1]; + accol[2]+= fac*col[2]; + accol[3]+= fac*col[3]; + } + } + } + + mask |= ps->mask; + ps= ps->next; + } + + /* now do the sky sub-pixels */ + amount= R.osa-amount; + if (amount) { + if (shadeHaloFloat(har, col, 0x7FFFFF, dist, xn, yn, flarec)) { + if (!fullsample) { + fac= ((float)amount)/(float)R.osa; + accol[0]+= fac*col[0]; + accol[1]+= fac*col[1]; + accol[2]+= fac*col[2]; + accol[3]+= fac*col[3]; + } + } + } + + if (fullsample) { + for (sample=0; sample<totsample; sample++) { + if (!(mask & (1 << sample))) { + float *pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname); + addalphaAddfacFloat(pass + od*4, col, har->add); + } + } + } + else { + col[0]= accol[0]; + col[1]= accol[1]; + col[2]= accol[2]; + col[3]= accol[3]; + + for (sample=0; sample<totsample; sample++) { + float *pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname); + addalphaAddfacFloat(pass + od*4, col, har->add); + } + } +} + +static void halo_tile(RenderPart *pa, RenderLayer *rl) +{ + RenderLayer *rlpp[RE_MAX_OSA]; + HaloRen *har; + rcti disprect= pa->disprect, testrect= pa->disprect; + float dist, xsq, ysq, xn, yn; + float col[4]; + intptr_t *rd= NULL; + int a, *rz, zz, y, sample, totsample, od; + short minx, maxx, miny, maxy, x; + unsigned int lay= rl->lay; + + /* we don't render halos in the cropped area, gives errors in flare counter */ + if (pa->crop) { + testrect.xmin+= pa->crop; + testrect.xmax-= pa->crop; + testrect.ymin+= pa->crop; + testrect.ymax-= pa->crop; + } + + totsample= get_sample_layers(pa, rl, rlpp); + + for (a=0; a<R.tothalo; a++) { + har= R.sortedhalos[a]; + + /* layer test, clip halo with y */ + if ((har->lay & lay) == 0) { + /* pass */ + } + else if (testrect.ymin > har->maxy) { + /* pass */ + } + else if (testrect.ymax < har->miny) { + /* pass */ + } + else { + + minx= floor(har->xs-har->rad); + maxx= ceil(har->xs+har->rad); + + if (testrect.xmin > maxx) { + /* pass */ + } + else if (testrect.xmax < minx) { + /* pass */ + } + else { + + minx = max_ii(minx, testrect.xmin); + maxx = min_ii(maxx, testrect.xmax); + + miny = max_ii(har->miny, testrect.ymin); + maxy = min_ii(har->maxy, testrect.ymax); + + for (y=miny; y<maxy; y++) { + int rectofs= (y-disprect.ymin)*pa->rectx + (minx - disprect.xmin); + rz= pa->rectz + rectofs; + od= rectofs; + + if (pa->rectdaps) + rd= pa->rectdaps + rectofs; + + yn= (y-har->ys)*R.ycor; + ysq= yn*yn; + + for (x=minx; x<maxx; x++, rz++, od++) { + xn= x- har->xs; + xsq= xn*xn; + dist= xsq+ysq; + if (dist<har->radsq) { + if (rd && *rd) { + halo_pixelstruct(har, rlpp, totsample, od, dist, xn, yn, (PixStr *)*rd); + } + else { + zz= calchalo_z(har, *rz); + if ((zz> har->zs) || (har->mat && (har->mat->mode & MA_HALO_SOFT))) { + if (shadeHaloFloat(har, col, zz, dist, xn, yn, har->flarec)) { + for (sample=0; sample<totsample; sample++) { + float * rect= RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname); + addalphaAddfacFloat(rect + od*4, col, har->add); + } + } + } + } + } + if (rd) rd++; + } + } + } + } + if (R.test_break(R.tbh) ) break; + } +} + +static void lamphalo_tile(RenderPart *pa, RenderLayer *rl) +{ + RenderLayer *rlpp[RE_MAX_OSA]; + ShadeInput shi; + float *pass; + float fac, col[4]; + intptr_t *rd= pa->rectdaps; + const int *rz= pa->rectz; + int x, y, sample, totsample, fullsample, od; + + totsample= get_sample_layers(pa, rl, rlpp); + fullsample= (totsample > 1); + + shade_input_initialize(&shi, pa, rl, 0); /* this zero's ShadeInput for us */ + + for (od=0, y=pa->disprect.ymin; y<pa->disprect.ymax; y++) { + for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, rz++, od++) { + + calc_view_vector(shi.view, x, y); + + if (rd && *rd) { + PixStr *ps= (PixStr *)*rd; + int count, totsamp= 0, mask= 0; + + while (ps) { + if (R.r.mode & R_ORTHO) + calc_renderco_ortho(shi.co, (float)x, (float)y, ps->z); + else + calc_renderco_zbuf(shi.co, shi.view, ps->z); + + totsamp+= count= count_mask(ps->mask); + mask |= ps->mask; + + col[0]= col[1]= col[2]= col[3]= 0.0f; + renderspothalo(&shi, col, 1.0f); + + if (fullsample) { + for (sample=0; sample<totsample; sample++) { + if (ps->mask & (1 << sample)) { + pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname); + pass += od * 4; + pass[0]+= col[0]; + pass[1]+= col[1]; + pass[2]+= col[2]; + pass[3]+= col[3]; + if (pass[3]>1.0f) pass[3]= 1.0f; + } + } + } + else { + fac= ((float)count)/(float)R.osa; + pass = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname); + pass += od * 4; + pass[0]+= fac*col[0]; + pass[1]+= fac*col[1]; + pass[2]+= fac*col[2]; + pass[3]+= fac*col[3]; + if (pass[3]>1.0f) pass[3]= 1.0f; + } + + ps= ps->next; + } + + if (totsamp<R.osa) { + shi.co[2]= 0.0f; + + col[0]= col[1]= col[2]= col[3]= 0.0f; + renderspothalo(&shi, col, 1.0f); + + if (fullsample) { + for (sample=0; sample<totsample; sample++) { + if (!(mask & (1 << sample))) { + + pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname); + pass += od * 4; + pass[0]+= col[0]; + pass[1]+= col[1]; + pass[2]+= col[2]; + pass[3]+= col[3]; + if (pass[3]>1.0f) pass[3]= 1.0f; + } + } + } + else { + fac= ((float)R.osa-totsamp)/(float)R.osa; + pass = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname); + pass += od * 4; + pass[0]+= fac*col[0]; + pass[1]+= fac*col[1]; + pass[2]+= fac*col[2]; + pass[3]+= fac*col[3]; + if (pass[3]>1.0f) pass[3]= 1.0f; + } + } + } + else { + if (R.r.mode & R_ORTHO) + calc_renderco_ortho(shi.co, (float)x, (float)y, *rz); + else + calc_renderco_zbuf(shi.co, shi.view, *rz); + + col[0]= col[1]= col[2]= col[3]= 0.0f; + renderspothalo(&shi, col, 1.0f); + + for (sample=0; sample<totsample; sample++) { + pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname); + pass += od * 4; + pass[0]+= col[0]; + pass[1]+= col[1]; + pass[2]+= col[2]; + pass[3]+= col[3]; + if (pass[3]>1.0f) pass[3]= 1.0f; + } + } + + if (rd) rd++; + } + if (y&1) + if (R.test_break(R.tbh)) break; + } +} + + +/* ********************* MAINLOOPS ******************** */ + +/* osa version */ +static void add_filt_passes(RenderLayer *rl, int curmask, int rectx, int offset, ShadeInput *shi, ShadeResult *shr) +{ + RenderPass *rpass; + + for (rpass= rl->passes.first; rpass; rpass= rpass->next) { + float *fp, *col= NULL; + int pixsize= 3; + + if (STREQ(rpass->name, RE_PASSNAME_COMBINED)) { + add_filt_fmask(curmask, shr->combined, rpass->rect + 4*offset, rectx); + } + else if (STREQ(rpass->name, RE_PASSNAME_Z)) { + fp = rpass->rect + offset; + *fp = shr->z; + } + else if (STREQ(rpass->name, RE_PASSNAME_RGBA)) { + col = shr->col; + pixsize = 4; + } + else if (STREQ(rpass->name, RE_PASSNAME_EMIT)) { + col = shr->emit; + } + else if (STREQ(rpass->name, RE_PASSNAME_DIFFUSE)) { + col = shr->diff; + } + else if (STREQ(rpass->name, RE_PASSNAME_SPEC)) { + col = shr->spec; + } + else if (STREQ(rpass->name, RE_PASSNAME_SHADOW)) { + col = shr->shad; + } + else if (STREQ(rpass->name, RE_PASSNAME_AO)) { + col = shr->ao; + } + else if (STREQ(rpass->name, RE_PASSNAME_ENVIRONMENT)) { + col = shr->env; + } + else if (STREQ(rpass->name, RE_PASSNAME_INDIRECT)) { + col = shr->indirect; + } + else if (STREQ(rpass->name, RE_PASSNAME_REFLECT)) { + col = shr->refl; + } + else if (STREQ(rpass->name, RE_PASSNAME_REFRACT)) { + col = shr->refr; + } + else if (STREQ(rpass->name, RE_PASSNAME_NORMAL)) { + col = shr->nor; + } + else if (STREQ(rpass->name, RE_PASSNAME_UV)) { + /* box filter only, gauss will screwup UV too much */ + if (shi->totuv) { + float mult = (float)count_mask(curmask)/(float)R.osa; + fp = rpass->rect + 3*offset; + fp[0]+= mult*(0.5f + 0.5f*shi->uv[shi->actuv].uv[0]); + fp[1]+= mult*(0.5f + 0.5f*shi->uv[shi->actuv].uv[1]); + fp[2]+= mult; + } + } + else if (STREQ(rpass->name, RE_PASSNAME_INDEXOB)) { + /* no filter */ + if (shi->vlr) { + fp = rpass->rect + offset; + if (*fp==0.0f) + *fp = (float)shi->obr->ob->index; + } + } + else if (STREQ(rpass->name, RE_PASSNAME_INDEXMA)) { + /* no filter */ + if (shi->vlr) { + fp = rpass->rect + offset; + if (*fp==0.0f) + *fp = (float)shi->mat->index; + } + } + else if (STREQ(rpass->name, RE_PASSNAME_MIST)) { + /* */ + col = &shr->mist; + pixsize = 1; + } + else if (STREQ(rpass->name, RE_PASSNAME_VECTOR)) { + /* add minimum speed in pixel, no filter */ + fp = rpass->rect + 4*offset; + if ( (ABS(shr->winspeed[0]) + ABS(shr->winspeed[1]))< (ABS(fp[0]) + ABS(fp[1])) ) { + fp[0] = shr->winspeed[0]; + fp[1] = shr->winspeed[1]; + } + if ( (ABS(shr->winspeed[2]) + ABS(shr->winspeed[3]))< (ABS(fp[2]) + ABS(fp[3])) ) { + fp[2] = shr->winspeed[2]; + fp[3] = shr->winspeed[3]; + } + } + else if (STREQ(rpass->name, RE_PASSNAME_RAYHITS)) { + /* */ + col = shr->rayhits; + pixsize= 4; + } + + if (col) { + fp= rpass->rect + pixsize*offset; + add_filt_fmask_pixsize(curmask, col, fp, rectx, pixsize); + } + } +} + +/* non-osa version */ +static void add_passes(RenderLayer *rl, int offset, ShadeInput *shi, ShadeResult *shr) +{ + RenderPass *rpass; + float *fp; + + for (rpass= rl->passes.first; rpass; rpass= rpass->next) { + float *col= NULL, uvcol[3]; + int a, pixsize= 3; + + if (STREQ(rpass->name, RE_PASSNAME_COMBINED)) { + /* copy combined to use for preview */ + copy_v4_v4(rpass->rect + 4*offset, shr->combined); + } + else if (STREQ(rpass->name, RE_PASSNAME_Z)) { + fp = rpass->rect + offset; + *fp = shr->z; + } + else if (STREQ(rpass->name, RE_PASSNAME_RGBA)) { + col = shr->col; + pixsize = 4; + } + else if (STREQ(rpass->name, RE_PASSNAME_EMIT)) { + col = shr->emit; + } + else if (STREQ(rpass->name, RE_PASSNAME_DIFFUSE)) { + col = shr->diff; + } + else if (STREQ(rpass->name, RE_PASSNAME_SPEC)) { + col = shr->spec; + } + else if (STREQ(rpass->name, RE_PASSNAME_SHADOW)) { + col = shr->shad; + } + else if (STREQ(rpass->name, RE_PASSNAME_AO)) { + col = shr->ao; + } + else if (STREQ(rpass->name, RE_PASSNAME_ENVIRONMENT)) { + col = shr->env; + } + else if (STREQ(rpass->name, RE_PASSNAME_INDIRECT)) { + col = shr->indirect; + } + else if (STREQ(rpass->name, RE_PASSNAME_REFLECT)) { + col = shr->refl; + } + else if (STREQ(rpass->name, RE_PASSNAME_REFRACT)) { + col = shr->refr; + } + else if (STREQ(rpass->name, RE_PASSNAME_NORMAL)) { + col = shr->nor; + } + else if (STREQ(rpass->name, RE_PASSNAME_UV)) { + if (shi->totuv) { + uvcol[0] = 0.5f + 0.5f*shi->uv[shi->actuv].uv[0]; + uvcol[1] = 0.5f + 0.5f*shi->uv[shi->actuv].uv[1]; + uvcol[2] = 1.0f; + col = uvcol; + } + } + else if (STREQ(rpass->name, RE_PASSNAME_VECTOR)) { + col = shr->winspeed; + pixsize = 4; + } + else if (STREQ(rpass->name, RE_PASSNAME_INDEXOB)) { + if (shi->vlr) { + fp = rpass->rect + offset; + *fp = (float)shi->obr->ob->index; + } + } + else if (STREQ(rpass->name, RE_PASSNAME_INDEXMA)) { + if (shi->vlr) { + fp = rpass->rect + offset; + *fp = (float)shi->mat->index; + } + } + else if (STREQ(rpass->name, RE_PASSNAME_MIST)) { + fp = rpass->rect + offset; + *fp = shr->mist; + } + else if (STREQ(rpass->name, RE_PASSNAME_RAYHITS)) { + col = shr->rayhits; + pixsize = 4; + } + + if (col) { + fp = rpass->rect + pixsize*offset; + for (a=0; a<pixsize; a++) + fp[a] = col[a]; + } + } +} + +int get_sample_layers(RenderPart *pa, RenderLayer *rl, RenderLayer **rlpp) +{ + + if (pa->fullresult.first) { + int sample, nr= BLI_findindex(&pa->result->layers, rl); + + for (sample=0; sample<R.osa; sample++) { + RenderResult *rr= BLI_findlink(&pa->fullresult, sample); + + rlpp[sample]= BLI_findlink(&rr->layers, nr); + } + return R.osa; + } + else { + rlpp[0]= rl; + return 1; + } +} + + +/* only do sky, is default in the solid layer (shade_tile) btw */ +static void sky_tile(RenderPart *pa, RenderLayer *rl) +{ + RenderLayer *rlpp[RE_MAX_OSA]; + int x, y, od=0, totsample; + + if (R.r.alphamode!=R_ADDSKY) + return; + + totsample= get_sample_layers(pa, rl, rlpp); + + for (y=pa->disprect.ymin; y<pa->disprect.ymax; y++) { + for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, od+=4) { + float col[4]; + int sample; + bool done = false; + + for (sample= 0; sample<totsample; sample++) { + float *pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname); + pass += od; + + if (pass[3]<1.0f) { + + if (done==0) { + shadeSkyPixel(col, x, y, pa->thread); + done = true; + } + + if (pass[3]==0.0f) { + copy_v4_v4(pass, col); + pass[3] = 1.0f; + } + else { + addAlphaUnderFloat(pass, col); + pass[3] = 1.0f; + } + } + } + } + + if (y&1) + if (R.test_break(R.tbh)) break; + } +} + +static void atm_tile(RenderPart *pa, RenderLayer *rl) +{ + RenderPass *zpass; + GroupObject *go; + LampRen *lar; + RenderLayer *rlpp[RE_MAX_OSA]; + int totsample; + int x, y, od= 0; + + totsample= get_sample_layers(pa, rl, rlpp); + + /* check that z pass is enabled */ + if (pa->rectz==NULL) return; + for (zpass= rl->passes.first; zpass; zpass= zpass->next) + if (STREQ(zpass->name, RE_PASSNAME_Z)) + break; + + if (zpass==NULL) return; + + /* check for at least one sun lamp that its atmosphere flag is enabled */ + for (go=R.lights.first; go; go= go->next) { + lar= go->lampren; + if (lar->type==LA_SUN && lar->sunsky && (lar->sunsky->effect_type & LA_SUN_EFFECT_AP)) + break; + } + /* do nothign and return if there is no sun lamp */ + if (go==NULL) + return; + + /* for each x,y and each sample, and each sun lamp*/ + for (y=pa->disprect.ymin; y<pa->disprect.ymax; y++) { + for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, od++) { + int sample; + + for (sample=0; sample<totsample; sample++) { + const float *zrect = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_Z, R.viewname) + od; + float *rgbrect = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname) + 4*od; + float rgb[3] = {0}; + bool done = false; + + for (go=R.lights.first; go; go= go->next) { + + + lar= go->lampren; + if (lar->type==LA_SUN && lar->sunsky) { + + /* if it's sky continue and don't apply atmosphere effect on it */ + if (*zrect >= 9.9e10f || rgbrect[3]==0.0f) { + continue; + } + + if ((lar->sunsky->effect_type & LA_SUN_EFFECT_AP)) { + float tmp_rgb[3]; + + /* skip if worldspace lamp vector is below horizon */ + if (go->ob->obmat[2][2] < 0.f) { + continue; + } + + copy_v3_v3(tmp_rgb, rgbrect); + if (rgbrect[3]!=1.0f) { /* de-premul */ + mul_v3_fl(tmp_rgb, 1.0f/rgbrect[3]); + } + shadeAtmPixel(lar->sunsky, tmp_rgb, x, y, *zrect); + if (rgbrect[3]!=1.0f) { /* premul */ + mul_v3_fl(tmp_rgb, rgbrect[3]); + } + + if (done==0) { + copy_v3_v3(rgb, tmp_rgb); + done = true; + } + else { + rgb[0] = 0.5f*rgb[0] + 0.5f*tmp_rgb[0]; + rgb[1] = 0.5f*rgb[1] + 0.5f*tmp_rgb[1]; + rgb[2] = 0.5f*rgb[2] + 0.5f*tmp_rgb[2]; + } + } + } + } + + /* if at least for one sun lamp aerial perspective was applied*/ + if (done) { + copy_v3_v3(rgbrect, rgb); + } + } + } + } +} + +static void shadeDA_tile(RenderPart *pa, RenderLayer *rl) +{ + RenderResult *rr= pa->result; + ShadeSample ssamp; + intptr_t *rd, *rectdaps= pa->rectdaps; + int samp; + int x, y, seed, crop=0, offs=0, od; + + if (R.test_break(R.tbh)) return; + + /* irregular shadowb buffer creation */ + if (R.r.mode & R_SHADOW) + ISB_create(pa, NULL); + + /* we set per pixel a fixed seed, for random AO and shadow samples */ + seed= pa->rectx*pa->disprect.ymin; + + /* general shader info, passes */ + shade_sample_initialize(&ssamp, pa, rl); + + /* occlusion caching */ + if (R.occlusiontree) + cache_occ_samples(&R, pa, &ssamp); + + /* filtered render, for now we assume only 1 filter size */ + if (pa->crop) { + crop= 1; + rectdaps+= pa->rectx + 1; + offs= pa->rectx + 1; + } + + /* scanline updates have to be 2 lines behind */ + rr->renrect.ymin = 0; + rr->renrect.ymax = -2*crop; + rr->renlay= rl; + + for (y=pa->disprect.ymin+crop; y<pa->disprect.ymax-crop; y++, rr->renrect.ymax++) { + rd= rectdaps; + od= offs; + + for (x=pa->disprect.xmin+crop; x<pa->disprect.xmax-crop; x++, rd++, od++) { + BLI_thread_srandom(pa->thread, seed++); + + if (*rd) { + if (shade_samples(&ssamp, (PixStr *)(*rd), x, y)) { + + /* multisample buffers or filtered mask filling? */ + if (pa->fullresult.first) { + int a; + for (samp=0; samp<ssamp.tot; samp++) { + int smask= ssamp.shi[samp].mask; + for (a=0; a<R.osa; a++) { + int mask= 1<<a; + if (smask & mask) + add_passes(ssamp.rlpp[a], od, &ssamp.shi[samp], &ssamp.shr[samp]); + } + } + } + else { + for (samp=0; samp<ssamp.tot; samp++) + add_filt_passes(rl, ssamp.shi[samp].mask, pa->rectx, od, &ssamp.shi[samp], &ssamp.shr[samp]); + } + } + } + } + + rectdaps+= pa->rectx; + offs+= pa->rectx; + + if (y&1) if (R.test_break(R.tbh)) break; + } + + /* disable scanline updating */ + rr->renlay= NULL; + + if (R.r.mode & R_SHADOW) + ISB_free(pa); + + if (R.occlusiontree) + free_occ_samples(&R, pa); +} + +/* ************* pixel struct ******** */ + + +static PixStrMain *addpsmain(ListBase *lb) +{ + PixStrMain *psm; + + psm= (PixStrMain *)MEM_mallocN(sizeof(PixStrMain), "pixstrMain"); + BLI_addtail(lb, psm); + + psm->ps= (PixStr *)MEM_mallocN(4096*sizeof(PixStr), "pixstr"); + psm->counter= 0; + + return psm; +} + +static void freeps(ListBase *lb) +{ + PixStrMain *psm, *psmnext; + + for (psm= lb->first; psm; psm= psmnext) { + psmnext= psm->next; + if (psm->ps) + MEM_freeN(psm->ps); + MEM_freeN(psm); + } + BLI_listbase_clear(lb); +} + +static void addps(ListBase *lb, intptr_t *rd, int obi, int facenr, int z, int maskz, unsigned short mask) +{ + PixStrMain *psm; + PixStr *ps, *last= NULL; + + if (*rd) { + ps= (PixStr *)(*rd); + + while (ps) { + if ( ps->obi == obi && ps->facenr == facenr ) { + ps->mask |= mask; + return; + } + last= ps; + ps= ps->next; + } + } + + /* make new PS (pixel struct) */ + psm= lb->last; + + if (psm->counter==4095) + psm= addpsmain(lb); + + ps= psm->ps + psm->counter++; + + if (last) last->next= ps; + else *rd= (intptr_t)ps; + + ps->next= NULL; + ps->obi= obi; + ps->facenr= facenr; + ps->z= z; + ps->maskz= maskz; + ps->mask = mask; + ps->shadfac= 0; +} + +static void edge_enhance_add(RenderPart *pa, float *rectf, float *arect) +{ + float addcol[4]; + int pix; + + if (arect==NULL) + return; + + for (pix= pa->rectx*pa->recty; pix>0; pix--, arect++, rectf+=4) { + if (*arect != 0.0f) { + addcol[0]= *arect * R.r.edgeR; + addcol[1]= *arect * R.r.edgeG; + addcol[2]= *arect * R.r.edgeB; + addcol[3]= *arect; + addAlphaOverFloat(rectf, addcol); + } + } +} + +/* clamp alpha and RGB to 0..1 and 0..inf, can go outside due to filter */ +static void clamp_alpha_rgb_range(RenderPart *pa, RenderLayer *rl) +{ + RenderLayer *rlpp[RE_MAX_OSA]; + int y, sample, totsample; + + totsample= get_sample_layers(pa, rl, rlpp); + + /* not for full sample, there we clamp after compositing */ + if (totsample > 1) + return; + + for (sample= 0; sample<totsample; sample++) { + float *rectf = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname); + + for (y= pa->rectx*pa->recty; y>0; y--, rectf+=4) { + rectf[0] = MAX2(rectf[0], 0.0f); + rectf[1] = MAX2(rectf[1], 0.0f); + rectf[2] = MAX2(rectf[2], 0.0f); + CLAMP(rectf[3], 0.0f, 1.0f); + } + } +} + +/* adds only alpha values */ +static void edge_enhance_tile(RenderPart *pa, float *rectf, int *rectz) +{ + /* use zbuffer to define edges, add it to the image */ + int y, x, col, *rz, *rz1, *rz2, *rz3; + int zval1, zval2, zval3; + float *rf; + + /* shift values in zbuffer 4 to the right (anti overflows), for filter we need multiplying with 12 max */ + rz= rectz; + if (rz==NULL) return; + + for (y=0; y<pa->recty; y++) + for (x=0; x<pa->rectx; x++, rz++) (*rz)>>= 4; + + rz1= rectz; + rz2= rz1+pa->rectx; + rz3= rz2+pa->rectx; + + rf= rectf+pa->rectx+1; + + for (y=0; y<pa->recty-2; y++) { + for (x=0; x<pa->rectx-2; x++, rz1++, rz2++, rz3++, rf++) { + + /* prevent overflow with sky z values */ + zval1= rz1[0] + 2*rz1[1] + rz1[2]; + zval2= 2*rz2[0] + 2*rz2[2]; + zval3= rz3[0] + 2*rz3[1] + rz3[2]; + + col= ( 4*rz2[1] - (zval1 + zval2 + zval3)/3 ); + if (col<0) col= -col; + + col >>= 5; + if (col > (1<<16)) col= (1<<16); + else col= (R.r.edgeint*col)>>8; + + if (col>0) { + float fcol; + + if (col>255) fcol= 1.0f; + else fcol= (float)col/255.0f; + + if (R.osa) + *rf+= fcol/(float)R.osa; + else + *rf= fcol; + } + } + rz1+= 2; + rz2+= 2; + rz3+= 2; + rf+= 2; + } + + /* shift back zbuf values, we might need it still */ + rz= rectz; + for (y=0; y<pa->recty; y++) + for (x=0; x<pa->rectx; x++, rz++) (*rz)<<= 4; + +} + +static void reset_sky_speed(RenderPart *pa, RenderLayer *rl) +{ + /* for all pixels with max speed, set to zero */ + RenderLayer *rlpp[RE_MAX_OSA]; + float *fp; + int a, sample, totsample; + + totsample= get_sample_layers(pa, rl, rlpp); + + for (sample= 0; sample<totsample; sample++) { + fp= RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_VECTOR, R.viewname); + if (fp==NULL) break; + + for (a= 4*pa->rectx*pa->recty - 1; a>=0; a--) + if (fp[a] == PASS_VECTOR_MAX) fp[a]= 0.0f; + } +} + +static unsigned short *make_solid_mask(RenderPart *pa) +{ + intptr_t *rd= pa->rectdaps; + unsigned short *solidmask, *sp; + int x; + + if (rd==NULL) return NULL; + + sp=solidmask= MEM_mallocN(sizeof(short)*pa->rectx*pa->recty, "solidmask"); + + for (x=pa->rectx*pa->recty; x>0; x--, rd++, sp++) { + if (*rd) { + PixStr *ps= (PixStr *)*rd; + + *sp= ps->mask; + for (ps= ps->next; ps; ps= ps->next) + *sp |= ps->mask; + } + else + *sp= 0; + } + + return solidmask; +} + +static void addAlphaOverFloatMask(float *dest, float *source, unsigned short dmask, unsigned short smask) +{ + unsigned short shared= dmask & smask; + float mul= 1.0f - source[3]; + + if (shared) { /* overlapping masks */ + + /* masks differ, we make a mixture of 'add' and 'over' */ + if (shared!=dmask) { + float shared_bits= (float)count_mask(shared); /* alpha over */ + float tot_bits= (float)count_mask(smask|dmask); /* alpha add */ + + float add= (tot_bits - shared_bits)/tot_bits; /* add level */ + mul= add + (1.0f-add)*mul; + } + } + else if (dmask && smask) { + /* works for premul only, of course */ + dest[0]+= source[0]; + dest[1]+= source[1]; + dest[2]+= source[2]; + dest[3]+= source[3]; + + return; + } + + dest[0]= (mul*dest[0]) + source[0]; + dest[1]= (mul*dest[1]) + source[1]; + dest[2]= (mul*dest[2]) + source[2]; + dest[3]= (mul*dest[3]) + source[3]; +} + +typedef struct ZbufSolidData { + RenderLayer *rl; + ListBase *psmlist; + float *edgerect; +} ZbufSolidData; + +static void make_pixelstructs(RenderPart *pa, ZSpan *zspan, int sample, void *data) +{ + ZbufSolidData *sdata = (ZbufSolidData *)data; + ListBase *lb= sdata->psmlist; + intptr_t *rd= pa->rectdaps; + const int *ro= zspan->recto; + const int *rp= zspan->rectp; + const int *rz= zspan->rectz; + const int *rm= zspan->rectmask; + int x, y; + int mask= 1<<sample; + + for (y=0; y<pa->recty; y++) { + for (x=0; x<pa->rectx; x++, rd++, rp++, ro++, rz++, rm++) { + if (*rp) { + addps(lb, rd, *ro, *rp, *rz, (zspan->rectmask)? *rm: 0, mask); + } + } + } + + if (sdata->rl->layflag & SCE_LAY_EDGE) + if (R.r.mode & R_EDGE) + edge_enhance_tile(pa, sdata->edgerect, zspan->rectz); +} + +/* main call for shading Delta Accum, for OSA */ +/* supposed to be fully threadable! */ +void zbufshadeDA_tile(RenderPart *pa) +{ + RenderResult *rr= pa->result; + RenderLayer *rl; + ListBase psmlist= {NULL, NULL}; + float *edgerect= NULL; + + /* allocate the necessary buffers */ + /* zbuffer inits these rects */ + pa->recto= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "recto"); + pa->rectp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectp"); + pa->rectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectz"); + for (rl= rr->layers.first; rl; rl= rl->next) { + float *rect = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname); + + if ((rl->layflag & SCE_LAY_ZMASK) && (rl->layflag & SCE_LAY_NEG_ZMASK)) + pa->rectmask= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectmask"); + + /* initialize pixelstructs and edge buffer */ + addpsmain(&psmlist); + pa->rectdaps= MEM_callocN(sizeof(intptr_t)*pa->rectx*pa->recty+4, "zbufDArectd"); + + if (rl->layflag & SCE_LAY_EDGE) + if (R.r.mode & R_EDGE) + edgerect= MEM_callocN(sizeof(float)*pa->rectx*pa->recty, "rectedge"); + + /* always fill visibility */ + for (pa->sample=0; pa->sample<R.osa; pa->sample+=4) { + ZbufSolidData sdata; + + sdata.rl= rl; + sdata.psmlist= &psmlist; + sdata.edgerect= edgerect; + zbuffer_solid(pa, rl, make_pixelstructs, &sdata); + if (R.test_break(R.tbh)) break; + } + + /* shades solid */ + if (rl->layflag & SCE_LAY_SOLID) + shadeDA_tile(pa, rl); + + /* lamphalo after solid, before ztra, looks nicest because ztra does own halo */ + if (R.flag & R_LAMPHALO) + if (rl->layflag & SCE_LAY_HALO) + lamphalo_tile(pa, rl); + + /* halo before ztra, because ztra fills in zbuffer now */ + if (R.flag & R_HALO) + if (rl->layflag & SCE_LAY_HALO) + halo_tile(pa, rl); + + /* transp layer */ + if (R.flag & R_ZTRA || R.totstrand) { + if (rl->layflag & (SCE_LAY_ZTRA|SCE_LAY_STRAND)) { + if (pa->fullresult.first) { + zbuffer_transp_shade(pa, rl, rect, &psmlist); + } + else { + unsigned short *ztramask, *solidmask= NULL; /* 16 bits, MAX_OSA */ + + /* allocate, but not free here, for asynchronous display of this rect in main thread */ + rl->acolrect= MEM_callocN(4*sizeof(float)*pa->rectx*pa->recty, "alpha layer"); + + /* swap for live updates, and it is used in zbuf.c!!! */ + SWAP(float *, rl->acolrect, rect); + ztramask = zbuffer_transp_shade(pa, rl, rect, &psmlist); + SWAP(float *, rl->acolrect, rect); + + /* zbuffer transp only returns ztramask if there's solid rendered */ + if (ztramask) + solidmask= make_solid_mask(pa); + + if (ztramask && solidmask) { + unsigned short *sps= solidmask, *spz= ztramask; + unsigned short fullmask= (1<<R.osa)-1; + float *fcol= rect; + float *acol= rl->acolrect; + int x; + + for (x=pa->rectx*pa->recty; x>0; x--, acol+=4, fcol+=4, sps++, spz++) { + if (*sps == fullmask) + addAlphaOverFloat(fcol, acol); + else + addAlphaOverFloatMask(fcol, acol, *sps, *spz); + } + } + else { + float *fcol= rect; + float *acol= rl->acolrect; + int x; + for (x=pa->rectx*pa->recty; x>0; x--, acol+=4, fcol+=4) { + addAlphaOverFloat(fcol, acol); + } + } + if (solidmask) MEM_freeN(solidmask); + if (ztramask) MEM_freeN(ztramask); + } + } + } + + /* sun/sky */ + if (rl->layflag & SCE_LAY_SKY) + atm_tile(pa, rl); + + /* sky before edge */ + if (rl->layflag & SCE_LAY_SKY) + sky_tile(pa, rl); + + /* extra layers */ + if (rl->layflag & SCE_LAY_EDGE) + if (R.r.mode & R_EDGE) + edge_enhance_add(pa, rect, edgerect); + + if (rl->passflag & SCE_PASS_VECTOR) + reset_sky_speed(pa, rl); + + /* clamp alpha to 0..1 range, can go outside due to filter */ + clamp_alpha_rgb_range(pa, rl); + + /* free stuff within loop! */ + MEM_freeN(pa->rectdaps); pa->rectdaps= NULL; + freeps(&psmlist); + + if (edgerect) MEM_freeN(edgerect); + edgerect= NULL; + + if (pa->rectmask) { + MEM_freeN(pa->rectmask); + pa->rectmask= NULL; + } + } + + /* free all */ + MEM_freeN(pa->recto); pa->recto= NULL; + MEM_freeN(pa->rectp); pa->rectp= NULL; + MEM_freeN(pa->rectz); pa->rectz= NULL; + + /* display active layer */ + rr->renrect.ymin=rr->renrect.ymax = 0; + rr->renlay= render_get_active_layer(&R, rr); +} + + +/* ------------------------------------------------------------------------ */ + +/* non OSA case, full tile render */ +/* supposed to be fully threadable! */ +void zbufshade_tile(RenderPart *pa) +{ + ShadeSample ssamp; + RenderResult *rr= pa->result; + RenderLayer *rl; + PixStr ps; + float *edgerect= NULL; + + /* fake pixel struct, to comply to osa render */ + ps.next= NULL; + ps.mask= 0xFFFF; + + /* zbuffer code clears/inits rects */ + pa->recto= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "recto"); + pa->rectp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectp"); + pa->rectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectz"); + + for (rl= rr->layers.first; rl; rl= rl->next) { + float *rect= RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname); + if ((rl->layflag & SCE_LAY_ZMASK) && (rl->layflag & SCE_LAY_NEG_ZMASK)) + pa->rectmask= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectmask"); + + /* general shader info, passes */ + shade_sample_initialize(&ssamp, pa, rl); + + zbuffer_solid(pa, rl, NULL, NULL); + + if (!R.test_break(R.tbh)) { /* NOTE: this if () is not consistent */ + + /* edges only for solid part, ztransp doesn't support it yet anti-aliased */ + if (rl->layflag & SCE_LAY_EDGE) { + if (R.r.mode & R_EDGE) { + edgerect= MEM_callocN(sizeof(float)*pa->rectx*pa->recty, "rectedge"); + edge_enhance_tile(pa, edgerect, pa->rectz); + } + } + + /* initialize scanline updates for main thread */ + rr->renrect.ymin = 0; + rr->renlay= rl; + + if (rl->layflag & SCE_LAY_SOLID) { + const float *fcol = rect; + const int *ro= pa->recto, *rp= pa->rectp, *rz= pa->rectz; + int x, y, offs=0, seed; + + /* we set per pixel a fixed seed, for random AO and shadow samples */ + seed= pa->rectx*pa->disprect.ymin; + + /* irregular shadowb buffer creation */ + if (R.r.mode & R_SHADOW) + ISB_create(pa, NULL); + + if (R.occlusiontree) + cache_occ_samples(&R, pa, &ssamp); + + for (y=pa->disprect.ymin; y<pa->disprect.ymax; y++, rr->renrect.ymax++) { + for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, ro++, rz++, rp++, fcol+=4, offs++) { + /* per pixel fixed seed */ + BLI_thread_srandom(pa->thread, seed++); + + if (*rp) { + ps.obi= *ro; + ps.facenr= *rp; + ps.z= *rz; + if (shade_samples(&ssamp, &ps, x, y)) { + /* combined and passes */ + add_passes(rl, offs, ssamp.shi, ssamp.shr); + } + } + } + if (y&1) + if (R.test_break(R.tbh)) break; + } + + if (R.occlusiontree) + free_occ_samples(&R, pa); + + if (R.r.mode & R_SHADOW) + ISB_free(pa); + } + + /* disable scanline updating */ + rr->renlay= NULL; + } + + /* lamphalo after solid, before ztra, looks nicest because ztra does own halo */ + if (R.flag & R_LAMPHALO) + if (rl->layflag & SCE_LAY_HALO) + lamphalo_tile(pa, rl); + + /* halo before ztra, because ztra fills in zbuffer now */ + if (R.flag & R_HALO) + if (rl->layflag & SCE_LAY_HALO) + halo_tile(pa, rl); + + if (R.flag & R_ZTRA || R.totstrand) { + if (rl->layflag & (SCE_LAY_ZTRA|SCE_LAY_STRAND)) { + float *fcol, *acol; + int x; + + /* allocate, but not free here, for asynchronous display of this rect in main thread */ + rl->acolrect= MEM_callocN(4*sizeof(float)*pa->rectx*pa->recty, "alpha layer"); + + /* swap for live updates */ + SWAP(float *, rl->acolrect, rect); + zbuffer_transp_shade(pa, rl, rect, NULL); + SWAP(float *, rl->acolrect, rect); + + fcol= rect; acol= rl->acolrect; + for (x=pa->rectx*pa->recty; x>0; x--, acol+=4, fcol+=4) { + addAlphaOverFloat(fcol, acol); + } + } + } + + /* sun/sky */ + if (rl->layflag & SCE_LAY_SKY) + atm_tile(pa, rl); + + /* sky before edge */ + if (rl->layflag & SCE_LAY_SKY) + sky_tile(pa, rl); + + if (!R.test_break(R.tbh)) { + if (rl->layflag & SCE_LAY_EDGE) + if (R.r.mode & R_EDGE) + edge_enhance_add(pa, rect, edgerect); + } + + if (rl->passflag & SCE_PASS_VECTOR) + reset_sky_speed(pa, rl); + + if (edgerect) MEM_freeN(edgerect); + edgerect= NULL; + + if (pa->rectmask) { + MEM_freeN(pa->rectmask); + pa->rectmask= NULL; + } + } + + /* display active layer */ + rr->renrect.ymin=rr->renrect.ymax = 0; + rr->renlay= render_get_active_layer(&R, rr); + + MEM_freeN(pa->recto); pa->recto= NULL; + MEM_freeN(pa->rectp); pa->rectp= NULL; + MEM_freeN(pa->rectz); pa->rectz= NULL; +} + +/* SSS preprocess tile render, fully threadable */ +typedef struct ZBufSSSHandle { + RenderPart *pa; + ListBase psmlist; + int totps; +} ZBufSSSHandle; + +static void addps_sss(void *cb_handle, int obi, int facenr, int x, int y, int z) +{ + ZBufSSSHandle *handle = cb_handle; + RenderPart *pa= handle->pa; + + /* extra border for filter gives double samples on part edges, + * don't use those */ + if (x<pa->crop || x>=pa->rectx-pa->crop) + return; + if (y<pa->crop || y>=pa->recty-pa->crop) + return; + + if (pa->rectall) { + intptr_t *rs= pa->rectall + pa->rectx*y + x; + + addps(&handle->psmlist, rs, obi, facenr, z, 0, 0); + handle->totps++; + } + if (pa->rectz) { + int *rz= pa->rectz + pa->rectx*y + x; + int *rp= pa->rectp + pa->rectx*y + x; + int *ro= pa->recto + pa->rectx*y + x; + + if (z < *rz) { + if (*rp == 0) + handle->totps++; + *rz= z; + *rp= facenr; + *ro= obi; + } + } + if (pa->rectbackz) { + int *rz= pa->rectbackz + pa->rectx*y + x; + int *rp= pa->rectbackp + pa->rectx*y + x; + int *ro= pa->rectbacko + pa->rectx*y + x; + + if (z >= *rz) { + if (*rp == 0) + handle->totps++; + *rz= z; + *rp= facenr; + *ro= obi; + } + } +} + +static void shade_sample_sss(ShadeSample *ssamp, Material *mat, ObjectInstanceRen *obi, VlakRen *vlr, int quad, float x, float y, float z, float *co, float color[3], float *area) +{ + ShadeInput *shi= ssamp->shi; + ShadeResult shr; + float /* texfac,*/ /* UNUSED */ orthoarea, nor[3], alpha, sx, sy; + + /* cache for shadow */ + shi->samplenr= R.shadowsamplenr[shi->thread]++; + + if (quad) + shade_input_set_triangle_i(shi, obi, vlr, 0, 2, 3); + else + shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2); + + /* center pixel */ + sx = x + 0.5f; + sy = y + 0.5f; + + /* we estimate the area here using shi->dxco and shi->dyco. we need to + * enabled shi->osatex these are filled. we compute two areas, one with + * the normal pointed at the camera and one with the original normal, and + * then clamp to avoid a too large contribution from a single pixel */ + shi->osatex= 1; + + copy_v3_v3(nor, shi->facenor); + calc_view_vector(shi->facenor, sx, sy); + normalize_v3(shi->facenor); + shade_input_set_viewco(shi, x, y, sx, sy, z); + orthoarea= len_v3(shi->dxco)*len_v3(shi->dyco); + + copy_v3_v3(shi->facenor, nor); + shade_input_set_viewco(shi, x, y, sx, sy, z); + *area = min_ff(len_v3(shi->dxco) * len_v3(shi->dyco), 2.0f * orthoarea); + + shade_input_set_uv(shi); + shade_input_set_normals(shi); + + /* we don't want flipped normals, they screw up back scattering */ + if (shi->flippednor) + shade_input_flip_normals(shi); + + /* not a pretty solution, but fixes common cases */ + if (shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) { + negate_v3(shi->vn); + negate_v3(shi->vno); + negate_v3(shi->nmapnorm); + } + + /* if nodetree, use the material that we are currently preprocessing + * instead of the node material */ + if (shi->mat->nodetree && shi->mat->use_nodes) + shi->mat= mat; + + /* init material vars */ + shade_input_init_material(shi); + + /* render */ + shade_input_set_shade_texco(shi); + + shade_samples_do_AO(ssamp); + shade_material_loop(shi, &shr); + + copy_v3_v3(co, shi->co); + copy_v3_v3(color, shr.combined); + + /* texture blending */ + /* texfac= shi->mat->sss_texfac; */ /* UNUSED */ + + alpha= shr.combined[3]; + *area *= alpha; +} + +static void zbufshade_sss_free(RenderPart *pa) +{ +#if 0 + MEM_freeN(pa->rectall); pa->rectall= NULL; + freeps(&handle.psmlist); +#else + MEM_freeN(pa->rectz); pa->rectz= NULL; + MEM_freeN(pa->rectp); pa->rectp= NULL; + MEM_freeN(pa->recto); pa->recto= NULL; + MEM_freeN(pa->rectbackz); pa->rectbackz= NULL; + MEM_freeN(pa->rectbackp); pa->rectbackp= NULL; + MEM_freeN(pa->rectbacko); pa->rectbacko= NULL; +#endif +} + +void zbufshade_sss_tile(RenderPart *pa) +{ + Render *re= &R; + ShadeSample ssamp; + ZBufSSSHandle handle; + RenderResult *rr= pa->result; + RenderLayer *rl; + VlakRen *vlr; + Material *mat= re->sss_mat; + float (*co)[3], (*color)[3], *area, *fcol; + int x, y, seed, quad, totpoint; + const bool display = (re->r.scemode & (R_BUTS_PREVIEW | R_VIEWPORT_PREVIEW)) == 0; + int *ro, *rz, *rp, *rbo, *rbz, *rbp, lay; +#if 0 + PixStr *ps; + intptr_t *rs; + int z; +#endif + + /* setup pixelstr list and buffer for zbuffering */ + handle.pa= pa; + handle.totps= 0; + +#if 0 + handle.psmlist.first= handle.psmlist.last= NULL; + addpsmain(&handle.psmlist); + + pa->rectall= MEM_callocN(sizeof(intptr_t)*pa->rectx*pa->recty+4, "rectall"); +#else + pa->recto= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "recto"); + pa->rectp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectp"); + pa->rectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectz"); + pa->rectbacko= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectbacko"); + pa->rectbackp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectbackp"); + pa->rectbackz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectbackz"); +#endif + + /* setup shade sample with correct passes */ + memset(&ssamp, 0, sizeof(ssamp)); + shade_sample_initialize(&ssamp, pa, rr->layers.first); + ssamp.tot= 1; + + for (rl=rr->layers.first; rl; rl=rl->next) { + ssamp.shi[0].lay |= rl->lay; + ssamp.shi[0].layflag |= rl->layflag; + ssamp.shi[0].passflag |= rl->passflag; + ssamp.shi[0].combinedflag |= ~rl->pass_xor; + } + + rl= rr->layers.first; + ssamp.shi[0].passflag |= SCE_PASS_RGBA|SCE_PASS_COMBINED; + ssamp.shi[0].combinedflag &= ~(SCE_PASS_SPEC); + ssamp.shi[0].mat_override= NULL; + ssamp.shi[0].light_override= NULL; + lay= ssamp.shi[0].lay; + + /* create the pixelstrs to be used later */ + zbuffer_sss(pa, lay, &handle, addps_sss); + + if (handle.totps==0) { + zbufshade_sss_free(pa); + return; + } + + fcol= RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname); + + co= MEM_mallocN(sizeof(float)*3*handle.totps, "SSSCo"); + color= MEM_mallocN(sizeof(float)*3*handle.totps, "SSSColor"); + area= MEM_mallocN(sizeof(float)*handle.totps, "SSSArea"); + +#if 0 + /* create ISB (does not work currently!) */ + if (re->r.mode & R_SHADOW) + ISB_create(pa, NULL); +#endif + + if (display) { + /* initialize scanline updates for main thread */ + rr->renrect.ymin = 0; + rr->renlay= rl; + } + + seed= pa->rectx*pa->disprect.ymin; +#if 0 + rs= pa->rectall; +#else + rz= pa->rectz; + rp= pa->rectp; + ro= pa->recto; + rbz= pa->rectbackz; + rbp= pa->rectbackp; + rbo= pa->rectbacko; +#endif + totpoint= 0; + + for (y=pa->disprect.ymin; y<pa->disprect.ymax; y++, rr->renrect.ymax++) { + for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, fcol+=4) { + /* per pixel fixed seed */ + BLI_thread_srandom(pa->thread, seed++); + +#if 0 + if (rs) { + /* for each sample in this pixel, shade it */ + for (ps = (PixStr *)(*rs); ps; ps=ps->next) { + ObjectInstanceRen *obi= &re->objectinstance[ps->obi]; + ObjectRen *obr= obi->obr; + vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK); + quad= (ps->facenr & RE_QUAD_OFFS); + z= ps->z; + + shade_sample_sss(&ssamp, mat, obi, vlr, quad, x, y, z, + co[totpoint], color[totpoint], &area[totpoint]); + + totpoint++; + + add_v3_v3(fcol, color); + fcol[3]= 1.0f; + } + + rs++; + } +#else + if (rp) { + if (*rp != 0) { + ObjectInstanceRen *obi= &re->objectinstance[*ro]; + ObjectRen *obr= obi->obr; + + /* shade front */ + vlr= RE_findOrAddVlak(obr, (*rp-1) & RE_QUAD_MASK); + quad= ((*rp) & RE_QUAD_OFFS); + + shade_sample_sss(&ssamp, mat, obi, vlr, quad, x, y, *rz, + co[totpoint], color[totpoint], &area[totpoint]); + + add_v3_v3(fcol, color[totpoint]); + fcol[3]= 1.0f; + totpoint++; + } + + rp++; rz++; ro++; + } + + if (rbp) { + if (*rbp != 0 && !(*rbp == *(rp-1) && *rbo == *(ro-1))) { + ObjectInstanceRen *obi= &re->objectinstance[*rbo]; + ObjectRen *obr= obi->obr; + + /* shade back */ + vlr= RE_findOrAddVlak(obr, (*rbp-1) & RE_QUAD_MASK); + quad= ((*rbp) & RE_QUAD_OFFS); + + shade_sample_sss(&ssamp, mat, obi, vlr, quad, x, y, *rbz, + co[totpoint], color[totpoint], &area[totpoint]); + + /* to indicate this is a back sample */ + area[totpoint]= -area[totpoint]; + + add_v3_v3(fcol, color[totpoint]); + fcol[3]= 1.0f; + totpoint++; + } + + rbz++; rbp++; rbo++; + } +#endif + } + + if (y&1) + if (re->test_break(re->tbh)) break; + } + + /* note: after adding we do not free these arrays, sss keeps them */ + if (totpoint > 0) { + sss_add_points(re, co, color, area, totpoint); + } + else { + MEM_freeN(co); + MEM_freeN(color); + MEM_freeN(area); + } + +#if 0 + if (re->r.mode & R_SHADOW) + ISB_free(pa); +#endif + + if (display) { + /* display active layer */ + rr->renrect.ymin=rr->renrect.ymax = 0; + rr->renlay= render_get_active_layer(&R, rr); + } + + zbufshade_sss_free(pa); +} + +/* ------------------------------------------------------------------------ */ + +static void renderhalo_post(RenderResult *rr, float *rectf, HaloRen *har) /* postprocess version */ +{ + float dist, xsq, ysq, xn, yn, colf[4], *rectft, *rtf; + float haloxs, haloys; + int minx, maxx, miny, maxy, x, y; + + /* calculate the disprect mapped coordinate for halo. note: rectx is disprect corrected */ + haloxs= har->xs - R.disprect.xmin; + haloys= har->ys - R.disprect.ymin; + + har->miny= miny= haloys - har->rad/R.ycor; + har->maxy= maxy= haloys + har->rad/R.ycor; + + if (maxy < 0) { + /* pass */ + } + else if (rr->recty < miny) { + /* pass */ + } + else { + minx = floor(haloxs - har->rad); + maxx = ceil(haloxs + har->rad); + + if (maxx < 0) { + /* pass */ + } + else if (rr->rectx < minx) { + /* pass */ + } + else { + if (minx<0) minx= 0; + if (maxx>=rr->rectx) maxx= rr->rectx-1; + if (miny<0) miny= 0; + if (maxy>rr->recty) maxy= rr->recty; + + rectft= rectf+ 4*rr->rectx*miny; + + for (y=miny; y<maxy; y++) { + + rtf= rectft+4*minx; + + yn= (y - haloys)*R.ycor; + ysq= yn*yn; + + for (x=minx; x<=maxx; x++) { + xn= x - haloxs; + xsq= xn*xn; + dist= xsq+ysq; + if (dist<har->radsq) { + + if (shadeHaloFloat(har, colf, 0x7FFFFF, dist, xn, yn, har->flarec)) + addalphaAddfacFloat(rtf, colf, har->add); + } + rtf+=4; + } + + rectft+= 4*rr->rectx; + + if (R.test_break(R.tbh)) break; + } + } + } +} +/* ------------------------------------------------------------------------ */ + +static void renderflare(RenderResult *rr, float *rectf, HaloRen *har) +{ + extern const float hashvectf[]; + HaloRen fla; + Material *ma; + const float *rc; + float rad, alfa, visifac, vec[3]; + int b, type; + + fla= *har; + fla.linec= fla.ringc= fla.flarec= 0; + + rad= har->rad; + alfa= har->alfa; + + visifac= R.ycor*(har->pixels); + /* all radials added / r^3 == 1.0f! */ + visifac /= (har->rad*har->rad*har->rad); + visifac*= visifac; + + ma= har->mat; + + /* first halo: just do */ + + har->rad= rad*ma->flaresize*visifac; + har->radsq= har->rad*har->rad; + har->zs= fla.zs= 0; + + har->alfa= alfa*visifac; + + renderhalo_post(rr, rectf, har); + + /* next halo's: the flares */ + rc= hashvectf + ma->seed2; + + for (b=1; b<har->flarec; b++) { + + fla.r = fabsf(rc[0]); + fla.g = fabsf(rc[1]); + fla.b = fabsf(rc[2]); + fla.alfa= ma->flareboost*fabsf(alfa*visifac*rc[3]); + fla.hard= 20.0f + fabsf(70.0f*rc[7]); + fla.tex= 0; + + type= (int)(fabsf(3.9f*rc[6])); + + fla.rad = ma->subsize * sqrtf(fabsf(2.0f * har->rad * rc[4])); + + if (type==3) { + fla.rad*= 3.0f; + fla.rad+= R.rectx/10; + } + + fla.radsq= fla.rad*fla.rad; + + vec[0]= 1.4f*rc[5]*(har->xs-R.winx/2); + vec[1]= 1.4f*rc[5]*(har->ys-R.winy/2); + vec[2]= 32.0f*sqrtf(vec[0]*vec[0] + vec[1]*vec[1] + 1.0f); + + fla.xs= R.winx/2 + vec[0] + (1.2f+rc[8])*R.rectx*vec[0]/vec[2]; + fla.ys= R.winy/2 + vec[1] + (1.2f+rc[8])*R.rectx*vec[1]/vec[2]; + + if (R.flag & R_SEC_FIELD) { + if (R.r.mode & R_ODDFIELD) fla.ys += 0.5f; + else fla.ys -= 0.5f; + } + if (type & 1) fla.type= HA_FLARECIRC; + else fla.type= 0; + renderhalo_post(rr, rectf, &fla); + + fla.alfa*= 0.5f; + if (type & 2) fla.type= HA_FLARECIRC; + else fla.type= 0; + renderhalo_post(rr, rectf, &fla); + + rc+= 7; + } +} + +/* needs recode... integrate this better! */ +void add_halo_flare(Render *re) +{ + RenderResult *rr= re->result; + RenderLayer *rl; + HaloRen *har; + int a, mode; + float *rect; + + /* for now, we get the first renderlayer in list with halos set */ + for (rl= rr->layers.first; rl; rl= rl->next) { + bool do_draw = false; + + if ((rl->layflag & SCE_LAY_HALO) == 0) + continue; + + rect = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, re->viewname); + + if (rect==NULL) + continue; + + mode= R.r.mode; + R.r.mode &= ~R_PANORAMA; + + project_renderdata(&R, projectverto, 0, 0, 0); + + for (a=0; a<R.tothalo; a++) { + har= R.sortedhalos[a]; + + if (har->flarec && (har->lay & rl->lay)) { + do_draw = true; + renderflare(rr, rect, har); + } + } + + if (do_draw) { + /* weak... the display callback wants an active renderlayer pointer... */ + rr->renlay= rl; + re->display_update(re->duh, rr, NULL); + } + + R.r.mode= mode; + } +} + +void render_internal_update_passes(RenderEngine *engine, Scene *scene, SceneRenderLayer *srl) +{ + int type; + + RE_engine_register_pass(engine, scene, srl, RE_PASSNAME_COMBINED, 4, "RGBA", SOCK_RGBA); + +#define CHECK_PASS(name, channels, chanid) \ + if (srl->passflag & (SCE_PASS_ ## name)) { \ + if (channels == 4) type = SOCK_RGBA; \ + else if (channels == 3) type = SOCK_VECTOR; \ + else type = SOCK_FLOAT; \ + RE_engine_register_pass(engine, scene, srl, RE_PASSNAME_ ## name, channels, chanid, type); \ + } + + CHECK_PASS(Z, 1, "Z"); + CHECK_PASS(VECTOR, 4, "XYZW"); + CHECK_PASS(NORMAL, 3, "XYZ"); + CHECK_PASS(UV, 3, "UVA"); + CHECK_PASS(RGBA, 4, "RGBA"); + CHECK_PASS(EMIT, 3, "RGB"); + CHECK_PASS(DIFFUSE, 3, "RGB"); + CHECK_PASS(SPEC, 3, "RGB"); + CHECK_PASS(AO, 3, "RGB"); + CHECK_PASS(ENVIRONMENT, 3, "RGB"); + CHECK_PASS(INDIRECT, 3, "RGB"); + CHECK_PASS(SHADOW, 3, "RGB"); + CHECK_PASS(REFLECT, 3, "RGB"); + CHECK_PASS(REFRACT, 3, "RGB"); + CHECK_PASS(INDEXOB, 1, "X"); + CHECK_PASS(INDEXMA, 1, "X"); + CHECK_PASS(MIST, 1, "Z"); + +#undef CHECK_PASS +} diff --git a/source/blender/render/intern/source/renderdatabase.c b/source/blender/render/intern/source/renderdatabase.c new file mode 100644 index 00000000000..67bfd1bfdc7 --- /dev/null +++ b/source/blender/render/intern/source/renderdatabase.c @@ -0,0 +1,1603 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributor(s): 2004-2006, Blender Foundation, full recode + * + * ***** END GPL/BL DUAL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/renderdatabase.c + * \ingroup render + */ + + +/* + * Storage, retrieval and query of render specific data. + * + * All data from a Blender scene is converted by the renderconverter/ + * into a special format that is used by the render module to make + * images out of. These functions interface to the render-specific + * database. + * + * The blo{ha/ve/vl} arrays store pointers to blocks of 256 data + * entries each. + * + * The index of an entry is >>8 (the highest 24 * bits), to find an + * offset in a 256-entry block. + * + * - If the 256-entry block entry has an entry in the + * vertnodes/vlaknodes/bloha array of the current block, the i-th entry in + * that block is allocated to this entry. + * + * - If the entry has no block allocated for it yet, memory is + * allocated. + * + * The pointer to the correct entry is returned. Memory is guaranteed + * to exist (as long as the malloc does not break). Since guarded + * allocation is used, memory _must_ be available. Otherwise, an + * exit(0) would occur. + * + */ + +#include <limits.h> +#include <math.h> +#include <string.h> + +#include "MEM_guardedalloc.h" + + +#include "BLI_math.h" +#include "BLI_blenlib.h" +#include "BLI_utildefines.h" +#include "BLI_hash.h" + +#include "DNA_material_types.h" +#include "DNA_meshdata_types.h" +#include "DNA_texture_types.h" +#include "DNA_listBase.h" +#include "DNA_particle_types.h" + +#include "BKE_customdata.h" +#include "BKE_DerivedMesh.h" + +#include "RE_render_ext.h" /* externtex */ + +#include "rayintersection.h" +#include "rayobject.h" +#include "render_types.h" +#include "renderdatabase.h" +#include "zbuf.h" + +/* ------------------------------------------------------------------------- */ + +/* More dynamic allocation of options for render vertices and faces, so we don't + * have to reserve this space inside vertices. + * Important; vertices and faces, should have been created already (to get tables + * checked) that's a reason why the calls demand VertRen/VlakRen * as arg, not + * the index */ + +/* NOTE! the hardcoded table size 256 is used still in code for going quickly over vertices/faces */ +#define RE_STRESS_ELEMS 1 +#define RE_RAD_ELEMS 4 +#define RE_STRAND_ELEMS 1 +#define RE_TANGENT_ELEMS 3 +#define RE_WINSPEED_ELEMS 4 +#define RE_MTFACE_ELEMS 1 +#define RE_MCOL_ELEMS 4 +#define RE_UV_ELEMS 2 +#define RE_VLAK_ORIGINDEX_ELEMS 1 +#define RE_VERT_ORIGINDEX_ELEMS 1 +#define RE_SURFNOR_ELEMS 3 +#define RE_RADFACE_ELEMS 1 +#define RE_SIMPLIFY_ELEMS 2 +#define RE_FACE_ELEMS 1 +#define RE_NMAP_TANGENT_ELEMS 16 + +float *RE_vertren_get_stress(ObjectRen *obr, VertRen *ver, int verify) +{ + float *stress; + int nr= ver->index>>8; + + stress= obr->vertnodes[nr].stress; + if (stress==NULL) { + if (verify) + stress= obr->vertnodes[nr].stress= MEM_mallocN(256*RE_STRESS_ELEMS*sizeof(float), "stress table"); + else + return NULL; + } + return stress + (ver->index & 255)*RE_STRESS_ELEMS; +} + +/* this one callocs! */ +float *RE_vertren_get_rad(ObjectRen *obr, VertRen *ver, int verify) +{ + float *rad; + int nr= ver->index>>8; + + rad= obr->vertnodes[nr].rad; + if (rad==NULL) { + if (verify) + rad= obr->vertnodes[nr].rad= MEM_callocN(256*RE_RAD_ELEMS*sizeof(float), "rad table"); + else + return NULL; + } + return rad + (ver->index & 255)*RE_RAD_ELEMS; +} + +float *RE_vertren_get_strand(ObjectRen *obr, VertRen *ver, int verify) +{ + float *strand; + int nr= ver->index>>8; + + strand= obr->vertnodes[nr].strand; + if (strand==NULL) { + if (verify) + strand= obr->vertnodes[nr].strand= MEM_mallocN(256*RE_STRAND_ELEMS*sizeof(float), "strand table"); + else + return NULL; + } + return strand + (ver->index & 255)*RE_STRAND_ELEMS; +} + +/* needs calloc */ +float *RE_vertren_get_tangent(ObjectRen *obr, VertRen *ver, int verify) +{ + float *tangent; + int nr= ver->index>>8; + + tangent= obr->vertnodes[nr].tangent; + if (tangent==NULL) { + if (verify) + tangent= obr->vertnodes[nr].tangent= MEM_callocN(256*RE_TANGENT_ELEMS*sizeof(float), "tangent table"); + else + return NULL; + } + return tangent + (ver->index & 255)*RE_TANGENT_ELEMS; +} + +/* needs calloc! not all renderverts have them */ +/* also winspeed is exception, it is stored per instance */ +float *RE_vertren_get_winspeed(ObjectInstanceRen *obi, VertRen *ver, int verify) +{ + float *winspeed; + int totvector; + + winspeed= obi->vectors; + if (winspeed==NULL) { + if (verify) { + totvector= obi->obr->totvert + obi->obr->totstrand; + winspeed= obi->vectors= MEM_callocN(totvector*RE_WINSPEED_ELEMS*sizeof(float), "winspeed table"); + } + else + return NULL; + } + return winspeed + ver->index*RE_WINSPEED_ELEMS; +} + +int *RE_vertren_get_origindex(ObjectRen *obr, VertRen *ver, int verify) +{ + int *origindex; + int nr= ver->index>>8; + + origindex= obr->vertnodes[nr].origindex; + if (origindex==NULL) { + if (verify) + origindex= obr->vertnodes[nr].origindex= MEM_mallocN(256*RE_VERT_ORIGINDEX_ELEMS*sizeof(int), "origindex table"); + else + return NULL; + } + return origindex + (ver->index & 255)*RE_VERT_ORIGINDEX_ELEMS; +} + +VertRen *RE_vertren_copy(ObjectRen *obr, VertRen *ver) +{ + VertRen *v1= RE_findOrAddVert(obr, obr->totvert++); + float *fp1, *fp2; + int *int1, *int2; + int index= v1->index; + + *v1= *ver; + v1->index= index; + + fp1= RE_vertren_get_stress(obr, ver, 0); + if (fp1) { + fp2= RE_vertren_get_stress(obr, v1, 1); + memcpy(fp2, fp1, RE_STRESS_ELEMS*sizeof(float)); + } + fp1= RE_vertren_get_rad(obr, ver, 0); + if (fp1) { + fp2= RE_vertren_get_rad(obr, v1, 1); + memcpy(fp2, fp1, RE_RAD_ELEMS*sizeof(float)); + } + fp1= RE_vertren_get_strand(obr, ver, 0); + if (fp1) { + fp2= RE_vertren_get_strand(obr, v1, 1); + memcpy(fp2, fp1, RE_STRAND_ELEMS*sizeof(float)); + } + fp1= RE_vertren_get_tangent(obr, ver, 0); + if (fp1) { + fp2= RE_vertren_get_tangent(obr, v1, 1); + memcpy(fp2, fp1, RE_TANGENT_ELEMS*sizeof(float)); + } + int1= RE_vertren_get_origindex(obr, ver, 0); + if (int1) { + int2= RE_vertren_get_origindex(obr, v1, 1); + memcpy(int2, int1, RE_VERT_ORIGINDEX_ELEMS*sizeof(int)); + } + return v1; +} + +VertRen *RE_findOrAddVert(ObjectRen *obr, int nr) +{ + VertTableNode *temp; + VertRen *v; + int a; + + if (nr<0) { + printf("error in findOrAddVert: %d\n", nr); + return NULL; + } + a= nr>>8; + + if (a>=obr->vertnodeslen-1) { /* Need to allocate more columns..., and keep last element NULL for free loop */ + temp= obr->vertnodes; + + obr->vertnodes= MEM_mallocN(sizeof(VertTableNode)*(obr->vertnodeslen+TABLEINITSIZE), "vertnodes"); + if (temp) memcpy(obr->vertnodes, temp, obr->vertnodeslen*sizeof(VertTableNode)); + memset(obr->vertnodes+obr->vertnodeslen, 0, TABLEINITSIZE*sizeof(VertTableNode)); + + obr->vertnodeslen+=TABLEINITSIZE; + if (temp) MEM_freeN(temp); + } + + v= obr->vertnodes[a].vert; + if (v==NULL) { + int i; + + v= (VertRen *)MEM_callocN(256*sizeof(VertRen), "findOrAddVert"); + obr->vertnodes[a].vert= v; + + for (i= (nr & 0xFFFFFF00), a=0; a<256; a++, i++) { + v[a].index= i; + } + } + v+= (nr & 255); + return v; +} + +/* ------------------------------------------------------------------------ */ + +MTFace *RE_vlakren_get_tface(ObjectRen *obr, VlakRen *vlr, int n, char **name, int verify) +{ + VlakTableNode *node; + int nr= vlr->index>>8, vlakindex= (vlr->index&255); + int index= (n<<8) + vlakindex; + + node= &obr->vlaknodes[nr]; + + if (verify) { + if (n>=node->totmtface) { + MTFace *mtface= node->mtface; + int size= (n+1)*256; + + node->mtface= MEM_callocN(size*sizeof(MTFace), "Vlak mtface"); + + if (mtface) { + size= node->totmtface*256; + memcpy(node->mtface, mtface, size*sizeof(MTFace)); + MEM_freeN(mtface); + } + + node->totmtface= n+1; + } + } + else { + if (n>=node->totmtface) + return NULL; + + if (name) *name= obr->mtface[n]; + } + + return node->mtface + index; +} + +MCol *RE_vlakren_get_mcol(ObjectRen *obr, VlakRen *vlr, int n, char **name, int verify) +{ + VlakTableNode *node; + int nr= vlr->index>>8, vlakindex= (vlr->index&255); + int index= (n<<8) + vlakindex; + + node= &obr->vlaknodes[nr]; + + if (verify) { + if (n>=node->totmcol) { + MCol *mcol= node->mcol; + int size= (n+1)*256; + + node->mcol= MEM_callocN(size*sizeof(MCol)*RE_MCOL_ELEMS, "Vlak mcol"); + + if (mcol) { + size= node->totmcol*256; + memcpy(node->mcol, mcol, size*sizeof(MCol)*RE_MCOL_ELEMS); + MEM_freeN(mcol); + } + + node->totmcol= n+1; + } + } + else { + if (n>=node->totmcol) + return NULL; + + if (name) *name= obr->mcol[n]; + } + + return node->mcol + index*RE_MCOL_ELEMS; +} + +int *RE_vlakren_get_origindex(ObjectRen *obr, VlakRen *vlak, int verify) +{ + int *origindex; + int nr= vlak->index>>8; + + origindex= obr->vlaknodes[nr].origindex; + if (origindex==NULL) { + if (verify) + origindex= obr->vlaknodes[nr].origindex= MEM_callocN(256*RE_VLAK_ORIGINDEX_ELEMS*sizeof(int), "origindex table"); + else + return NULL; + } + return origindex + (vlak->index & 255)*RE_VLAK_ORIGINDEX_ELEMS; +} + +float *RE_vlakren_get_surfnor(ObjectRen *obr, VlakRen *vlak, int verify) +{ + float *surfnor; + int nr= vlak->index>>8; + + surfnor= obr->vlaknodes[nr].surfnor; + if (surfnor==NULL) { + if (verify) + surfnor= obr->vlaknodes[nr].surfnor= MEM_callocN(256*RE_SURFNOR_ELEMS*sizeof(float), "surfnor table"); + else + return NULL; + } + return surfnor + (vlak->index & 255)*RE_SURFNOR_ELEMS; +} + +float *RE_vlakren_get_nmap_tangent(ObjectRen *obr, VlakRen *vlak, int index, bool verify) +{ + float **tangents; + int nr= vlak->index>>8; + + tangents = obr->vlaknodes[nr].tangent_arrays; + + if (index + 1 > 8) { + return NULL; + } + + index = index < 0 ? 0: index; + + if (tangents[index] == NULL) { + if (verify) { + tangents[index] = MEM_callocN(256*RE_NMAP_TANGENT_ELEMS*sizeof(float), "tangent table"); + } + else + return NULL; + } + + return tangents[index] + (vlak->index & 255)*RE_NMAP_TANGENT_ELEMS; +} + +RadFace **RE_vlakren_get_radface(ObjectRen *obr, VlakRen *vlak, int verify) +{ + RadFace **radface; + int nr= vlak->index>>8; + + radface= obr->vlaknodes[nr].radface; + if (radface==NULL) { + if (verify) + radface = obr->vlaknodes[nr].radface= MEM_callocN(256 * RE_RADFACE_ELEMS * sizeof(void *), "radface table"); + else + return NULL; + } + return radface + (vlak->index & 255)*RE_RADFACE_ELEMS; +} + +VlakRen *RE_vlakren_copy(ObjectRen *obr, VlakRen *vlr) +{ + VlakRen *vlr1 = RE_findOrAddVlak(obr, obr->totvlak++); + MTFace *mtface, *mtface1; + MCol *mcol, *mcol1; + float *surfnor, *surfnor1; + float *tangent, *tangent1; + int *origindex, *origindex1; + RadFace **radface, **radface1; + int i, index = vlr1->index; + char *name; + + *vlr1= *vlr; + vlr1->index= index; + + for (i=0; (mtface=RE_vlakren_get_tface(obr, vlr, i, &name, 0)) != NULL; i++) { + mtface1= RE_vlakren_get_tface(obr, vlr1, i, &name, 1); + memcpy(mtface1, mtface, sizeof(MTFace)*RE_MTFACE_ELEMS); + } + + for (i=0; (mcol=RE_vlakren_get_mcol(obr, vlr, i, &name, 0)) != NULL; i++) { + mcol1= RE_vlakren_get_mcol(obr, vlr1, i, &name, 1); + memcpy(mcol1, mcol, sizeof(MCol)*RE_MCOL_ELEMS); + } + + origindex= RE_vlakren_get_origindex(obr, vlr, 0); + if (origindex) { + origindex1= RE_vlakren_get_origindex(obr, vlr1, 1); + /* Just an int, but memcpy for consistency. */ + memcpy(origindex1, origindex, sizeof(int)*RE_VLAK_ORIGINDEX_ELEMS); + } + + surfnor= RE_vlakren_get_surfnor(obr, vlr, 0); + if (surfnor) { + surfnor1= RE_vlakren_get_surfnor(obr, vlr1, 1); + copy_v3_v3(surfnor1, surfnor); + } + + for (i=0; i < MAX_MTFACE; i++) { + tangent = RE_vlakren_get_nmap_tangent(obr, vlr, i, false); + if (!tangent) + continue; + tangent1 = RE_vlakren_get_nmap_tangent(obr, vlr1, i, true); + memcpy(tangent1, tangent, sizeof(float)*RE_NMAP_TANGENT_ELEMS); + } + + radface= RE_vlakren_get_radface(obr, vlr, 0); + if (radface) { + radface1= RE_vlakren_get_radface(obr, vlr1, 1); + *radface1= *radface; + } + + return vlr1; +} + +void RE_vlakren_get_normal(Render *UNUSED(re), ObjectInstanceRen *obi, VlakRen *vlr, float r_nor[3]) +{ + float (*nmat)[3]= obi->nmat; + + if (obi->flag & R_TRANSFORMED) { + mul_v3_m3v3(r_nor, nmat, vlr->n); + normalize_v3(r_nor); + } + else { + copy_v3_v3(r_nor, vlr->n); + } +} + +void RE_set_customdata_names(ObjectRen *obr, CustomData *data) +{ + /* CustomData layer names are stored per object here, because the + * DerivedMesh which stores the layers is freed */ + + CustomDataLayer *layer; + int numtf = 0, numcol = 0, i, mtfn, mcn; + + if (CustomData_has_layer(data, CD_MTFACE)) { + numtf= CustomData_number_of_layers(data, CD_MTFACE); + obr->mtface= MEM_callocN(sizeof(*obr->mtface)*numtf, "mtfacenames"); + } + + if (CustomData_has_layer(data, CD_MCOL)) { + numcol= CustomData_number_of_layers(data, CD_MCOL); + obr->mcol= MEM_callocN(sizeof(*obr->mcol)*numcol, "mcolnames"); + } + + for (i=0, mtfn=0, mcn=0; i < data->totlayer; i++) { + layer= &data->layers[i]; + + if (layer->type == CD_MTFACE) { + BLI_strncpy(obr->mtface[mtfn++], layer->name, sizeof(layer->name)); + obr->actmtface= CLAMPIS(layer->active_rnd, 0, numtf); + obr->bakemtface= layer->active; + } + else if (layer->type == CD_MCOL) { + BLI_strncpy(obr->mcol[mcn++], layer->name, sizeof(layer->name)); + obr->actmcol= CLAMPIS(layer->active_rnd, 0, numcol); + } + } +} + +VlakRen *RE_findOrAddVlak(ObjectRen *obr, int nr) +{ + VlakTableNode *temp; + VlakRen *v; + int a; + + if (nr<0) { + printf("error in findOrAddVlak: %d\n", nr); + return obr->vlaknodes[0].vlak; + } + a= nr>>8; + + if (a>=obr->vlaknodeslen-1) { /* Need to allocate more columns..., and keep last element NULL for free loop */ + temp= obr->vlaknodes; + + obr->vlaknodes= MEM_mallocN(sizeof(VlakTableNode)*(obr->vlaknodeslen+TABLEINITSIZE), "vlaknodes"); + if (temp) memcpy(obr->vlaknodes, temp, obr->vlaknodeslen*sizeof(VlakTableNode)); + memset(obr->vlaknodes+obr->vlaknodeslen, 0, TABLEINITSIZE*sizeof(VlakTableNode)); + + obr->vlaknodeslen+=TABLEINITSIZE; /*Does this really need to be power of 2?*/ + if (temp) MEM_freeN(temp); + } + + v= obr->vlaknodes[a].vlak; + + if (v==NULL) { + int i; + + v= (VlakRen *)MEM_callocN(256*sizeof(VlakRen), "findOrAddVlak"); + obr->vlaknodes[a].vlak= v; + + for (i= (nr & 0xFFFFFF00), a=0; a<256; a++, i++) + v[a].index= i; + } + v+= (nr & 255); + return v; +} + +/* ------------------------------------------------------------------------ */ + +float *RE_strandren_get_surfnor(ObjectRen *obr, StrandRen *strand, int verify) +{ + float *surfnor; + int nr= strand->index>>8; + + surfnor= obr->strandnodes[nr].surfnor; + if (surfnor==NULL) { + if (verify) + surfnor= obr->strandnodes[nr].surfnor= MEM_callocN(256*RE_SURFNOR_ELEMS*sizeof(float), "surfnor strand table"); + else + return NULL; + } + return surfnor + (strand->index & 255)*RE_SURFNOR_ELEMS; +} + +float *RE_strandren_get_uv(ObjectRen *obr, StrandRen *strand, int n, char **name, int verify) +{ + StrandTableNode *node; + int nr= strand->index>>8, strandindex= (strand->index&255); + int index= (n<<8) + strandindex; + + node= &obr->strandnodes[nr]; + + if (verify) { + if (n>=node->totuv) { + float *uv= node->uv; + int size= (n+1)*256; + + node->uv= MEM_callocN(size*sizeof(float)*RE_UV_ELEMS, "strand uv table"); + + if (uv) { + size= node->totuv*256; + memcpy(node->uv, uv, size*sizeof(float)*RE_UV_ELEMS); + MEM_freeN(uv); + } + + node->totuv= n+1; + } + } + else { + if (n>=node->totuv) + return NULL; + + if (name) *name= obr->mtface[n]; + } + + return node->uv + index*RE_UV_ELEMS; +} + +MCol *RE_strandren_get_mcol(ObjectRen *obr, StrandRen *strand, int n, char **name, int verify) +{ + StrandTableNode *node; + int nr= strand->index>>8, strandindex= (strand->index&255); + int index= (n<<8) + strandindex; + + node= &obr->strandnodes[nr]; + + if (verify) { + if (n>=node->totmcol) { + MCol *mcol= node->mcol; + int size= (n+1)*256; + + node->mcol= MEM_callocN(size*sizeof(MCol)*RE_MCOL_ELEMS, "strand mcol table"); + + if (mcol) { + size= node->totmcol*256; + memcpy(node->mcol, mcol, size*sizeof(MCol)*RE_MCOL_ELEMS); + MEM_freeN(mcol); + } + + node->totmcol= n+1; + } + } + else { + if (n>=node->totmcol) + return NULL; + + if (name) *name= obr->mcol[n]; + } + + return node->mcol + index*RE_MCOL_ELEMS; +} + +float *RE_strandren_get_simplify(struct ObjectRen *obr, struct StrandRen *strand, int verify) +{ + float *simplify; + int nr= strand->index>>8; + + simplify= obr->strandnodes[nr].simplify; + if (simplify==NULL) { + if (verify) + simplify= obr->strandnodes[nr].simplify= MEM_callocN(256*RE_SIMPLIFY_ELEMS*sizeof(float), "simplify strand table"); + else + return NULL; + } + return simplify + (strand->index & 255)*RE_SIMPLIFY_ELEMS; +} + +int *RE_strandren_get_face(ObjectRen *obr, StrandRen *strand, int verify) +{ + int *face; + int nr= strand->index>>8; + + face= obr->strandnodes[nr].face; + if (face==NULL) { + if (verify) + face= obr->strandnodes[nr].face= MEM_callocN(256*RE_FACE_ELEMS*sizeof(int), "face strand table"); + else + return NULL; + } + return face + (strand->index & 255)*RE_FACE_ELEMS; +} + +/* winspeed is exception, it is stored per instance */ +float *RE_strandren_get_winspeed(ObjectInstanceRen *obi, StrandRen *strand, int verify) +{ + float *winspeed; + int totvector; + + winspeed= obi->vectors; + if (winspeed==NULL) { + if (verify) { + totvector= obi->obr->totvert + obi->obr->totstrand; + winspeed= obi->vectors= MEM_callocN(totvector*RE_WINSPEED_ELEMS*sizeof(float), "winspeed strand table"); + } + else + return NULL; + } + return winspeed + (obi->obr->totvert + strand->index)*RE_WINSPEED_ELEMS; +} + +StrandRen *RE_findOrAddStrand(ObjectRen *obr, int nr) +{ + StrandTableNode *temp; + StrandRen *v; + int a; + + if (nr<0) { + printf("error in findOrAddStrand: %d\n", nr); + return obr->strandnodes[0].strand; + } + a= nr>>8; + + if (a>=obr->strandnodeslen-1) { /* Need to allocate more columns..., and keep last element NULL for free loop */ + temp= obr->strandnodes; + + obr->strandnodes= MEM_mallocN(sizeof(StrandTableNode)*(obr->strandnodeslen+TABLEINITSIZE), "strandnodes"); + if (temp) memcpy(obr->strandnodes, temp, obr->strandnodeslen*sizeof(StrandTableNode)); + memset(obr->strandnodes+obr->strandnodeslen, 0, TABLEINITSIZE*sizeof(StrandTableNode)); + + obr->strandnodeslen+=TABLEINITSIZE; /*Does this really need to be power of 2?*/ + if (temp) MEM_freeN(temp); + } + + v= obr->strandnodes[a].strand; + + if (v==NULL) { + int i; + + v= (StrandRen *)MEM_callocN(256*sizeof(StrandRen), "findOrAddStrand"); + obr->strandnodes[a].strand= v; + + for (i= (nr & 0xFFFFFF00), a=0; a<256; a++, i++) + v[a].index= i; + } + v+= (nr & 255); + return v; +} + +StrandBuffer *RE_addStrandBuffer(ObjectRen *obr, int totvert) +{ + StrandBuffer *strandbuf; + + strandbuf= MEM_callocN(sizeof(StrandBuffer), "StrandBuffer"); + strandbuf->vert= MEM_callocN(sizeof(StrandVert)*totvert, "StrandVert"); + strandbuf->totvert= totvert; + strandbuf->obr= obr; + + obr->strandbuf= strandbuf; + + return strandbuf; +} + +/* ------------------------------------------------------------------------ */ + +ObjectRen *RE_addRenderObject(Render *re, Object *ob, Object *par, int index, int psysindex, int lay) +{ + ObjectRen *obr= MEM_callocN(sizeof(ObjectRen), "object render struct"); + + BLI_addtail(&re->objecttable, obr); + obr->ob= ob; + obr->par= par; + obr->index= index; + obr->psysindex= psysindex; + obr->lay= lay; + + return obr; +} + +void free_renderdata_vertnodes(VertTableNode *vertnodes) +{ + int a; + + if (vertnodes==NULL) return; + + for (a=0; vertnodes[a].vert; a++) { + MEM_freeN(vertnodes[a].vert); + + if (vertnodes[a].rad) + MEM_freeN(vertnodes[a].rad); + if (vertnodes[a].strand) + MEM_freeN(vertnodes[a].strand); + if (vertnodes[a].tangent) + MEM_freeN(vertnodes[a].tangent); + if (vertnodes[a].stress) + MEM_freeN(vertnodes[a].stress); + if (vertnodes[a].winspeed) + MEM_freeN(vertnodes[a].winspeed); + if (vertnodes[a].origindex) + MEM_freeN(vertnodes[a].origindex); + } + + MEM_freeN(vertnodes); +} + +void free_renderdata_vlaknodes(VlakTableNode *vlaknodes) +{ + int a; + + if (vlaknodes==NULL) return; + + for (a=0; vlaknodes[a].vlak; a++) { + MEM_freeN(vlaknodes[a].vlak); + + if (vlaknodes[a].mtface) + MEM_freeN(vlaknodes[a].mtface); + if (vlaknodes[a].mcol) + MEM_freeN(vlaknodes[a].mcol); + if (vlaknodes[a].origindex) + MEM_freeN(vlaknodes[a].origindex); + if (vlaknodes[a].surfnor) + MEM_freeN(vlaknodes[a].surfnor); + for (int b = 0; b < MAX_MTFACE; b++) { + if (vlaknodes[a].tangent_arrays[b]) + MEM_freeN(vlaknodes[a].tangent_arrays[b]); + } + if (vlaknodes[a].radface) + MEM_freeN(vlaknodes[a].radface); + } + + MEM_freeN(vlaknodes); +} + +static void free_renderdata_strandnodes(StrandTableNode *strandnodes) +{ + int a; + + if (strandnodes==NULL) return; + + for (a=0; strandnodes[a].strand; a++) { + MEM_freeN(strandnodes[a].strand); + + if (strandnodes[a].uv) + MEM_freeN(strandnodes[a].uv); + if (strandnodes[a].mcol) + MEM_freeN(strandnodes[a].mcol); + if (strandnodes[a].winspeed) + MEM_freeN(strandnodes[a].winspeed); + if (strandnodes[a].surfnor) + MEM_freeN(strandnodes[a].surfnor); + if (strandnodes[a].simplify) + MEM_freeN(strandnodes[a].simplify); + if (strandnodes[a].face) + MEM_freeN(strandnodes[a].face); + } + + MEM_freeN(strandnodes); +} + +void free_renderdata_tables(Render *re) +{ + ObjectInstanceRen *obi; + ObjectRen *obr; + StrandBuffer *strandbuf; + int a=0; + + for (obr=re->objecttable.first; obr; obr=obr->next) { + if (obr->vertnodes) { + free_renderdata_vertnodes(obr->vertnodes); + obr->vertnodes= NULL; + obr->vertnodeslen= 0; + } + + if (obr->vlaknodes) { + free_renderdata_vlaknodes(obr->vlaknodes); + obr->vlaknodes= NULL; + obr->vlaknodeslen= 0; + obr->totvlak= 0; + } + + if (obr->bloha) { + for (a=0; obr->bloha[a]; a++) + MEM_freeN(obr->bloha[a]); + + MEM_freeN(obr->bloha); + obr->bloha= NULL; + obr->blohalen= 0; + } + + if (obr->strandnodes) { + free_renderdata_strandnodes(obr->strandnodes); + obr->strandnodes= NULL; + obr->strandnodeslen= 0; + } + + strandbuf= obr->strandbuf; + if (strandbuf) { + if (strandbuf->vert) MEM_freeN(strandbuf->vert); + if (strandbuf->bound) MEM_freeN(strandbuf->bound); + MEM_freeN(strandbuf); + } + + if (obr->mtface) + MEM_freeN(obr->mtface); + + if (obr->mcol) + MEM_freeN(obr->mcol); + + if (obr->rayfaces) { + MEM_freeN(obr->rayfaces); + obr->rayfaces = NULL; + } + + if (obr->rayprimitives) { + MEM_freeN(obr->rayprimitives); + obr->rayprimitives = NULL; + } + + if (obr->raytree) { + RE_rayobject_free(obr->raytree); + obr->raytree = NULL; + } + } + + if (re->objectinstance) { + for (obi=re->instancetable.first; obi; obi=obi->next) { + if (obi->vectors) + MEM_freeN(obi->vectors); + + if (obi->raytree) + RE_rayobject_free(obi->raytree); + } + + MEM_freeN(re->objectinstance); + re->objectinstance= NULL; + re->totinstance= 0; + re->instancetable.first= re->instancetable.last= NULL; + } + + if (re->sortedhalos) { + MEM_freeN(re->sortedhalos); + re->sortedhalos= NULL; + } + + BLI_freelistN(&re->customdata_names); + BLI_freelistN(&re->objecttable); + BLI_freelistN(&re->instancetable); +} + +/* ------------------------------------------------------------------------ */ + +HaloRen *RE_findOrAddHalo(ObjectRen *obr, int nr) +{ + HaloRen *h, **temp; + int a; + + if (nr<0) { + printf("error in findOrAddHalo: %d\n", nr); + return NULL; + } + a= nr>>8; + + if (a>=obr->blohalen-1) { /* Need to allocate more columns..., and keep last element NULL for free loop */ + //printf("Allocating %i more halo groups. %i total.\n", + // TABLEINITSIZE, obr->blohalen+TABLEINITSIZE ); + temp=obr->bloha; + + obr->bloha = (HaloRen **)MEM_callocN(sizeof(void *) * (obr->blohalen + TABLEINITSIZE), "Bloha"); + if (temp) memcpy(obr->bloha, temp, obr->blohalen*sizeof(void *)); + memset(&(obr->bloha[obr->blohalen]), 0, TABLEINITSIZE * sizeof(void *)); + obr->blohalen+=TABLEINITSIZE; /*Does this really need to be power of 2?*/ + if (temp) MEM_freeN(temp); + } + + h= obr->bloha[a]; + if (h==NULL) { + h= (HaloRen *)MEM_callocN(256*sizeof(HaloRen), "findOrAdHalo"); + obr->bloha[a]= h; + } + h+= (nr & 255); + return h; +} + +/* ------------------------------------------------------------------------- */ + +HaloRen *RE_inithalo(Render *re, ObjectRen *obr, Material *ma, + const float vec[3], const float vec1[3], + const float *orco, float hasize, float vectsize, int seed) +{ + const bool skip_load_image = (re->r.scemode & R_NO_IMAGE_LOAD) != 0; + const bool texnode_preview = (re->r.scemode & R_TEXNODE_PREVIEW) != 0; + HaloRen *har; + MTex *mtex; + float tin, tr, tg, tb, ta; + float xn, yn, zn, texvec[3], hoco[4], hoco1[4]; + + if (hasize==0.0f) return NULL; + + projectverto(vec, re->winmat, hoco); + if (hoco[3]==0.0f) return NULL; + if (vec1) { + projectverto(vec1, re->winmat, hoco1); + if (hoco1[3]==0.0f) return NULL; + } + + har= RE_findOrAddHalo(obr, obr->tothalo++); + copy_v3_v3(har->co, vec); + har->hasize= hasize; + + /* actual projectvert is done in function project_renderdata() because of parts/border/pano */ + /* we do it here for sorting of halos */ + zn= hoco[3]; + har->xs= 0.5f*re->winx*(hoco[0]/zn); + har->ys= 0.5f*re->winy*(hoco[1]/zn); + har->zs= 0x7FFFFF*(hoco[2]/zn); + + har->zBufDist = 0x7FFFFFFF*(hoco[2]/zn); + + /* halovect */ + if (vec1) { + + har->type |= HA_VECT; + + xn= har->xs - 0.5f*re->winx*(hoco1[0]/hoco1[3]); + yn= har->ys - 0.5f*re->winy*(hoco1[1]/hoco1[3]); + if (yn == 0.0f && xn >= 0.0f) zn = 0.0f; + else zn = atan2f(yn, xn); + + har->sin = sinf(zn); + har->cos = cosf(zn); + zn= len_v3v3(vec1, vec); + + har->hasize= vectsize*zn + (1.0f-vectsize)*hasize; + + sub_v3_v3v3(har->no, vec, vec1); + normalize_v3(har->no); + } + + if (ma->mode & MA_HALO_XALPHA) har->type |= HA_XALPHA; + + har->alfa= ma->alpha; + har->r= ma->r; + har->g= ma->g; + har->b= ma->b; + har->add= (255.0f*ma->add); + har->mat= ma; + har->hard= ma->har; + har->seed= seed % 256; + + if (ma->mode & MA_STAR) har->starpoints= ma->starc; + if (ma->mode & MA_HALO_LINES) har->linec= ma->linec; + if (ma->mode & MA_HALO_RINGS) har->ringc= ma->ringc; + if (ma->mode & MA_HALO_FLARE) har->flarec= ma->flarec; + + + if (ma->mtex[0]) { + + if (ma->mode & MA_HALOTEX) { + har->tex = 1; + } + else if (har->mat->septex & (1 << 0)) { + /* only 1 level textures */ + } + else { + mtex= ma->mtex[0]; + copy_v3_v3(texvec, vec); + + if (mtex->texco & TEXCO_NORM) { + ; + } + else if (mtex->texco & TEXCO_OBJECT) { + /* texvec[0]+= imatbase->ivec[0]; */ + /* texvec[1]+= imatbase->ivec[1]; */ + /* texvec[2]+= imatbase->ivec[2]; */ + /* mul_m3_v3(imatbase->imat, texvec); */ + } + else { + if (orco) { + copy_v3_v3(texvec, orco); + } + } + + externtex(mtex, + texvec, + &tin, &tr, &tg, &tb, &ta, + 0, + re->pool, + skip_load_image, + texnode_preview); + + yn= tin*mtex->colfac; + //zn= tin*mtex->alphafac; + + if (mtex->mapto & MAP_COL) { + zn= 1.0f-yn; + har->r= (yn*tr+ zn*ma->r); + har->g= (yn*tg+ zn*ma->g); + har->b= (yn*tb+ zn*ma->b); + } + if (mtex->texco & TEXCO_UV) { + har->alfa= tin; + } + if (mtex->mapto & MAP_ALPHA) + har->alfa= tin; + } + } + + har->pool = re->pool; + har->skip_load_image = skip_load_image; + har->texnode_preview = texnode_preview; + + return har; +} + +HaloRen *RE_inithalo_particle(Render *re, ObjectRen *obr, DerivedMesh *dm, Material *ma, + const float vec[3], const float vec1[3], + const float *orco, const float *uvco, float hasize, float vectsize, int seed, const float pa_co[3]) +{ + const bool skip_load_image = (re->r.scemode & R_NO_IMAGE_LOAD) != 0; + const bool texnode_preview = (re->r.scemode & R_TEXNODE_PREVIEW) != 0; + HaloRen *har; + MTex *mtex; + float tin, tr, tg, tb, ta; + float xn, yn, zn, texvec[3], hoco[4], hoco1[4], in[3], tex[3], out[3]; + int i, hasrgb; + + if (hasize==0.0f) return NULL; + + projectverto(vec, re->winmat, hoco); + if (hoco[3]==0.0f) return NULL; + if (vec1) { + projectverto(vec1, re->winmat, hoco1); + if (hoco1[3]==0.0f) return NULL; + } + + har= RE_findOrAddHalo(obr, obr->tothalo++); + copy_v3_v3(har->co, vec); + har->hasize= hasize; + + /* actual projectvert is done in function project_renderdata() because of parts/border/pano */ + /* we do it here for sorting of halos */ + zn= hoco[3]; + har->xs= 0.5f*re->winx*(hoco[0]/zn); + har->ys= 0.5f*re->winy*(hoco[1]/zn); + har->zs= 0x7FFFFF*(hoco[2]/zn); + + har->zBufDist = 0x7FFFFFFF*(hoco[2]/zn); + + /* halovect */ + if (vec1) { + + har->type |= HA_VECT; + + xn= har->xs - 0.5f*re->winx*(hoco1[0]/hoco1[3]); + yn= har->ys - 0.5f*re->winy*(hoco1[1]/hoco1[3]); + if (yn == 0.0f && xn >= 0.0f) zn = 0.0f; + else zn = atan2f(yn, xn); + + har->sin = sinf(zn); + har->cos = cosf(zn); + zn= len_v3v3(vec1, vec)*0.5f; + + har->hasize= vectsize*zn + (1.0f-vectsize)*hasize; + + sub_v3_v3v3(har->no, vec, vec1); + normalize_v3(har->no); + } + + if (ma->mode & MA_HALO_XALPHA) har->type |= HA_XALPHA; + + har->alfa= ma->alpha; + har->r= ma->r; + har->g= ma->g; + har->b= ma->b; + har->add= (255.0f*ma->add); + har->mat= ma; + har->hard= ma->har; + har->seed= seed % 256; + + if (ma->mode & MA_STAR) har->starpoints= ma->starc; + if (ma->mode & MA_HALO_LINES) har->linec= ma->linec; + if (ma->mode & MA_HALO_RINGS) har->ringc= ma->ringc; + if (ma->mode & MA_HALO_FLARE) har->flarec= ma->flarec; + + if ((ma->mode & MA_HALOTEX) && ma->mtex[0]) + har->tex= 1; + + for (i=0; i<MAX_MTEX; i++) + if (ma->mtex[i] && (ma->septex & (1<<i))==0) { + mtex= ma->mtex[i]; + copy_v3_v3(texvec, vec); + + if (mtex->texco & TEXCO_NORM) { + ; + } + else if (mtex->texco & TEXCO_OBJECT) { + if (mtex->object) + mul_m4_v3(mtex->object->imat_ren, texvec); + } + else if (mtex->texco & TEXCO_GLOB) { + copy_v3_v3(texvec, vec); + } + else if (mtex->texco & TEXCO_UV && uvco) { + int uv_index=CustomData_get_named_layer_index(&dm->faceData, CD_MTFACE, mtex->uvname); + if (uv_index<0) + uv_index=CustomData_get_active_layer_index(&dm->faceData, CD_MTFACE); + + uv_index-=CustomData_get_layer_index(&dm->faceData, CD_MTFACE); + + texvec[0]=2.0f*uvco[2*uv_index]-1.0f; + texvec[1]=2.0f*uvco[2*uv_index+1]-1.0f; + texvec[2]=0.0f; + } + else if (mtex->texco & TEXCO_PARTICLE) { + /* particle coordinates in range [0, 1] */ + texvec[0] = 2.f * pa_co[0] - 1.f; + texvec[1] = 2.f * pa_co[1] - 1.f; + texvec[2] = pa_co[2]; + } + else if (orco) { + copy_v3_v3(texvec, orco); + } + + hasrgb = externtex(mtex, + texvec, + &tin, &tr, &tg, &tb, &ta, + 0, + re->pool, + skip_load_image, + texnode_preview); + + //yn= tin*mtex->colfac; + //zn= tin*mtex->alphafac; + if (mtex->mapto & MAP_COL) { + tex[0]=tr; + tex[1]=tg; + tex[2]=tb; + out[0]=har->r; + out[1]=har->g; + out[2]=har->b; + + texture_rgb_blend(in, tex, out, tin, mtex->colfac, mtex->blendtype); + // zn= 1.0-yn; + //har->r= (yn*tr+ zn*ma->r); + //har->g= (yn*tg+ zn*ma->g); + //har->b= (yn*tb+ zn*ma->b); + har->r= in[0]; + har->g= in[1]; + har->b= in[2]; + } + + /* alpha returned, so let's use it instead of intensity */ + if (hasrgb) + tin = ta; + + if (mtex->mapto & MAP_ALPHA) + har->alfa = texture_value_blend(mtex->def_var, har->alfa, tin, mtex->alphafac, mtex->blendtype); + if (mtex->mapto & MAP_HAR) + har->hard = 1.0f+126.0f*texture_value_blend(mtex->def_var, ((float)har->hard)/127.0f, tin, mtex->hardfac, mtex->blendtype); + if (mtex->mapto & MAP_RAYMIRR) + har->hasize = 100.0f*texture_value_blend(mtex->def_var, har->hasize/100.0f, tin, mtex->raymirrfac, mtex->blendtype); + if (mtex->mapto & MAP_TRANSLU) { + float add = texture_value_blend(mtex->def_var, (float)har->add/255.0f, tin, mtex->translfac, mtex->blendtype); + CLAMP(add, 0.f, 1.f); + har->add = 255.0f*add; + } + /* now what on earth is this good for?? */ + //if (mtex->texco & 16) { + // har->alfa= tin; + //} + } + + har->pool = re->pool; + har->skip_load_image = (re->r.scemode & R_NO_IMAGE_LOAD) != 0; + har->texnode_preview = (re->r.scemode & R_TEXNODE_PREVIEW) != 0; + + return har; +} + +/* -------------------------- operations on entire database ----------------------- */ + +/* ugly function for halos in panorama */ +static int panotestclip(Render *re, bool do_pano, float v[4]) +{ + /* part size (ensure we run RE_parts_clamp first) */ + BLI_assert(re->partx == min_ii(re->r.tilex, re->rectx)); + BLI_assert(re->party == min_ii(re->r.tiley, re->recty)); + + if (do_pano == false) { + return testclip(v); + } + else { + /* to be used for halos en infos */ + float abs4; + short c = 0; + + int xparts = (re->rectx + re->partx - 1) / re->partx; + + abs4= fabsf(v[3]); + + if (v[2]< -abs4) c=16; /* this used to be " if (v[2]<0) ", see clippz() */ + else if (v[2]> abs4) c+= 32; + + if ( v[1]>abs4) c+=4; + else if ( v[1]< -abs4) c+=8; + + abs4*= xparts; + if ( v[0]>abs4) c+=2; + else if ( v[0]< -abs4) c+=1; + + return c; + } +} + +/** + * This adds the hcs coordinates to vertices. It iterates over all + * vertices, halos and faces. After the conversion, we clip in hcs. + * + * Elsewhere, all primites are converted to vertices. + * Called in + * - envmapping (envmap.c) + * - shadow buffering (shadbuf.c) + */ + +void project_renderdata(Render *re, + void (*projectfunc)(const float *, float mat[4][4], float *), + bool do_pano, float xoffs, bool UNUSED(do_buckets)) +{ + ObjectRen *obr; + HaloRen *har = NULL; + float zn, vec[3], hoco[4]; + int a; + + if (do_pano) { + float panophi= xoffs; + + re->panosi = sinf(panophi); + re->panoco = cosf(panophi); + } + + for (obr=re->objecttable.first; obr; obr=obr->next) { + /* calculate view coordinates (and zbuffer value) */ + for (a=0; a<obr->tothalo; a++) { + if ((a & 255)==0) har= obr->bloha[a>>8]; + else har++; + + if (do_pano) { + vec[0]= re->panoco*har->co[0] + re->panosi*har->co[2]; + vec[1]= har->co[1]; + vec[2]= -re->panosi*har->co[0] + re->panoco*har->co[2]; + } + else { + copy_v3_v3(vec, har->co); + } + + projectfunc(vec, re->winmat, hoco); + + /* we clip halos less critical, but not for the Z */ + hoco[0]*= 0.5f; + hoco[1]*= 0.5f; + + if ( panotestclip(re, do_pano, hoco) ) { + har->miny= har->maxy= -10000; /* that way render clips it */ + } + else if (hoco[3]<0.0f) { + har->miny= har->maxy= -10000; /* render clips it */ + } + else { /* do the projection...*/ + /* bring back hocos */ + hoco[0]*= 2.0f; + hoco[1]*= 2.0f; + + zn= hoco[3]; + har->xs= 0.5f*re->winx*(1.0f+hoco[0]/zn); /* the 0.5 negates the previous 2...*/ + har->ys= 0.5f*re->winy*(1.0f+hoco[1]/zn); + + /* this should be the zbuffer coordinate */ + har->zs= 0x7FFFFF*(hoco[2]/zn); + /* taking this from the face clip functions? seems ok... */ + har->zBufDist = 0x7FFFFFFF*(hoco[2]/zn); + + vec[0]+= har->hasize; + projectfunc(vec, re->winmat, hoco); + vec[0]-= har->hasize; + zn= hoco[3]; + har->rad= fabsf(har->xs- 0.5f*re->winx*(1.0f+hoco[0]/zn)); + + /* this clip is not really OK, to prevent stars to become too large */ + if (har->type & HA_ONLYSKY) { + if (har->rad>3.0f) har->rad= 3.0f; + } + + har->radsq= har->rad*har->rad; + + har->miny= har->ys - har->rad/re->ycor; + har->maxy= har->ys + har->rad/re->ycor; + + /* the Zd value is still not really correct for pano */ + + vec[2] -= har->hasize; /* z negative, otherwise it's clipped */ + projectfunc(vec, re->winmat, hoco); + zn = hoco[3]; + zn = fabsf((float)har->zs - 0x7FFFFF * (hoco[2] / zn)); + har->zd = CLAMPIS(zn, 0, INT_MAX); + + } + + } + } +} + +/* ------------------------------------------------------------------------- */ + +void RE_updateRenderInstance(Render *re, ObjectInstanceRen *obi, int flag) +{ + /* flag specifies what things have changed. */ + if (flag & RE_OBJECT_INSTANCES_UPDATE_OBMAT) { + copy_m4_m4(obi->obmat, obi->ob->obmat); + invert_m4_m4(obi->obinvmat, obi->obmat); + } + if (flag & RE_OBJECT_INSTANCES_UPDATE_VIEW) { + mul_m4_m4m4(obi->localtoviewmat, re->viewmat, obi->obmat); + mul_m4_m4m4(obi->localtoviewinvmat, obi->obinvmat, re->viewinv); + } +} + +void RE_updateRenderInstances(Render *re, int flag) +{ + int i = 0; + for (i = 0; i < re->totinstance; i++) + RE_updateRenderInstance(re, &re->objectinstance[i], flag); +} + +ObjectInstanceRen *RE_addRenderInstance( + Render *re, ObjectRen *obr, Object *ob, Object *par, + int index, int psysindex, float mat[4][4], int lay, const DupliObject *dob) +{ + ObjectInstanceRen *obi; + float mat3[3][3]; + + obi= MEM_callocN(sizeof(ObjectInstanceRen), "ObjectInstanceRen"); + obi->obr= obr; + obi->ob= ob; + obi->par= par; + obi->index= index; + obi->psysindex= psysindex; + obi->lay= lay; + + /* Fill particle info */ + if (par && dob) { + const ParticleSystem *psys = dob->particle_system; + if (psys) { + int part_index; + if (obi->index < psys->totpart) { + part_index = obi->index; + } + else if (psys->child) { + part_index = psys->child[obi->index - psys->totpart].parent; + } + else { + part_index = -1; + } + + if (part_index >= 0) { + const ParticleData *p = &psys->particles[part_index]; + obi->part_index = part_index; + obi->part_size = p->size; + obi->part_age = RE_GetStats(re)->cfra - p->time; + obi->part_lifetime = p->lifetime; + + copy_v3_v3(obi->part_co, p->state.co); + copy_v3_v3(obi->part_vel, p->state.vel); + copy_v3_v3(obi->part_avel, p->state.ave); + } + } + } + + /* Fill object info */ + if (dob) { + obi->random_id = dob->random_id; + } + else { + obi->random_id = BLI_hash_int_2d(BLI_hash_string(obi->ob->id.name + 2), 0); + } + + RE_updateRenderInstance(re, obi, RE_OBJECT_INSTANCES_UPDATE_OBMAT | RE_OBJECT_INSTANCES_UPDATE_VIEW); + + if (mat) { + copy_m4_m4(obi->mat, mat); + copy_m3_m4(mat3, mat); + invert_m3_m3(obi->nmat, mat3); + transpose_m3(obi->nmat); + obi->flag |= R_DUPLI_TRANSFORMED; + } + + BLI_addtail(&re->instancetable, obi); + + return obi; +} + +void RE_instance_get_particle_info(struct ObjectInstanceRen *obi, float *index, float *random, float *age, float *lifetime, float co[3], float *size, float vel[3], float angvel[3]) +{ + *index = obi->part_index; + *random = BLI_hash_int_01(obi->part_index); + *age = obi->part_age; + *lifetime = obi->part_lifetime; + copy_v3_v3(co, obi->part_co); + *size = obi->part_size; + copy_v3_v3(vel, obi->part_vel); + copy_v3_v3(angvel, obi->part_avel); +} + + +void RE_makeRenderInstances(Render *re) +{ + ObjectInstanceRen *obi, *oldobi; + ListBase newlist; + int tot; + + /* convert list of object instances to an array for index based lookup */ + tot= BLI_listbase_count(&re->instancetable); + re->objectinstance= MEM_callocN(sizeof(ObjectInstanceRen)*tot, "ObjectInstance"); + re->totinstance= tot; + newlist.first= newlist.last= NULL; + + obi= re->objectinstance; + for (oldobi=re->instancetable.first; oldobi; oldobi=oldobi->next) { + *obi= *oldobi; + + if (obi->obr) { + obi->prev= obi->next= NULL; + BLI_addtail(&newlist, obi); + obi++; + } + else + re->totinstance--; + } + + BLI_freelistN(&re->instancetable); + re->instancetable= newlist; +} + +/* four functions to facilitate envmap rotation for raytrace */ +void RE_instance_rotate_ray_start(ObjectInstanceRen *obi, Isect *is) +{ + if (obi && (obi->flag & R_ENV_TRANSFORMED)) { + copy_v3_v3(is->origstart, is->start); + mul_m4_v3(obi->imat, is->start); + } +} + +void RE_instance_rotate_ray_dir(ObjectInstanceRen *obi, Isect *is) +{ + if (obi && (obi->flag & R_ENV_TRANSFORMED)) { + float end[3]; + + copy_v3_v3(is->origdir, is->dir); + add_v3_v3v3(end, is->origstart, is->dir); + + mul_m4_v3(obi->imat, end); + sub_v3_v3v3(is->dir, end, is->start); + } +} + +void RE_instance_rotate_ray(ObjectInstanceRen *obi, Isect *is) +{ + RE_instance_rotate_ray_start(obi, is); + RE_instance_rotate_ray_dir(obi, is); +} + +void RE_instance_rotate_ray_restore(ObjectInstanceRen *obi, Isect *is) +{ + if (obi && (obi->flag & R_ENV_TRANSFORMED)) { + copy_v3_v3(is->start, is->origstart); + copy_v3_v3(is->dir, is->origdir); + } +} + +int clip_render_object(float boundbox[2][3], float bounds[4], float winmat[4][4]) +{ + float mat[4][4], vec[4]; + int a, fl, flag = -1; + + copy_m4_m4(mat, winmat); + + for (a=0; a < 8; a++) { + vec[0]= (a & 1)? boundbox[0][0]: boundbox[1][0]; + vec[1]= (a & 2)? boundbox[0][1]: boundbox[1][1]; + vec[2]= (a & 4)? boundbox[0][2]: boundbox[1][2]; + vec[3]= 1.0; + mul_m4_v4(mat, vec); + + fl = 0; + if (bounds) { + if (vec[0] < bounds[0] * vec[3]) fl |= 1; + else if (vec[0] > bounds[1] * vec[3]) fl |= 2; + + if (vec[1] > bounds[3] * vec[3]) fl |= 4; + else if (vec[1] < bounds[2] * vec[3]) fl |= 8; + } + else { + if (vec[0] < -vec[3]) fl |= 1; + else if (vec[0] > vec[3]) fl |= 2; + + if (vec[1] > vec[3]) fl |= 4; + else if (vec[1] < -vec[3]) fl |= 8; + } + if (vec[2] < -vec[3]) fl |= 16; + else if (vec[2] > vec[3]) fl |= 32; + + flag &= fl; + if (flag == 0) { + return 0; + } + } + + return flag; +} + diff --git a/source/blender/render/intern/source/shadbuf.c b/source/blender/render/intern/source/shadbuf.c new file mode 100644 index 00000000000..04e9177241b --- /dev/null +++ b/source/blender/render/intern/source/shadbuf.c @@ -0,0 +1,2647 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * Contributor(s): 2004-2006, Blender Foundation + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/shadbuf.c + * \ingroup render + */ + + +#include <math.h> +#include <string.h> + + +#include "MEM_guardedalloc.h" + +#include "DNA_group_types.h" +#include "DNA_lamp_types.h" +#include "DNA_material_types.h" + +#include "BLI_math.h" +#include "BLI_blenlib.h" +#include "BLI_jitter_2d.h" +#include "BLI_memarena.h" +#include "BLI_rand.h" +#include "BLI_utildefines.h" + +#include "BKE_global.h" +#include "BKE_scene.h" + +#include "PIL_time.h" + +#include "render_types.h" +#include "renderdatabase.h" +#include "rendercore.h" +#include "shadbuf.h" +#include "shading.h" +#include "zbuf.h" + +/* XXX, could be better implemented... this is for endian issues */ +#ifdef __BIG_ENDIAN__ +//# define RCOMP 3 +# define GCOMP 2 +# define BCOMP 1 +# define ACOMP 0 +#else +//# define RCOMP 0 +# define GCOMP 1 +# define BCOMP 2 +# define ACOMP 3 +#endif + +#define RCT_SIZE_X(rct) ((rct)->xmax - (rct)->xmin) +#define RCT_SIZE_Y(rct) ((rct)->ymax - (rct)->ymin) + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +/* ------------------------------------------------------------------------- */ + +/* initshadowbuf() in convertBlenderScene.c */ + +/* ------------------------------------------------------------------------- */ + +static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1) +{ + int len4, *rz; + int x2, y2; + + x2= x1+tile; + y2= y1+tile; + if (x2>=size) x2= size-1; + if (y2>=size) y2= size-1; + + if (x1>=x2 || y1>=y2) return; + + len4= 4*(x2- x1); + rz= rectz + size*y1 + x1; + for (; y1<y2; y1++) { + memcpy(r1, rz, len4); + rz+= size; + r1+= len4; + } +} + +#if 0 +static int sizeoflampbuf(ShadBuf *shb) +{ + int num, count=0; + char *cp; + + cp= shb->cbuf; + num= (shb->size*shb->size)/256; + + while (num--) count+= *(cp++); + + return 256*count; +} +#endif + +/* not threadsafe... */ +static float *give_jitter_tab(int samp) +{ + /* these are all possible jitter tables, takes up some + * 12k, not really bad! + * For soft shadows, it saves memory and render time + */ + static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256}; + static float jit[1496][2]; + static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int a, offset=0; + + if (samp<2) samp= 2; + else if (samp>16) samp= 16; + + for (a=0; a<samp-1; a++) offset+= tab[a]; + + if (ctab[samp]==0) { + ctab[samp]= 1; + BLI_jitter_init((float (*)[2])jit[offset], samp*samp); + } + + return jit[offset]; + +} + +static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype) +{ + float *jit, totw= 0.0f; + int samp= get_render_shadow_samples(&re->r, shb->samp); + int a, tot=samp*samp; + + shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp"); + + for (jit= shb->jit, a=0; a<tot; a++, jit+=2) { + if (filtertype==LA_SHADBUF_TENT) + shb->weight[a] = 0.71f - sqrtf(jit[0] * jit[0] + jit[1] * jit[1]); + else if (filtertype==LA_SHADBUF_GAUSS) + shb->weight[a] = RE_filter_value(R_FILTER_GAUSS, 1.8f * sqrtf(jit[0] * jit[0] + jit[1] * jit[1])); + else + shb->weight[a]= 1.0f; + + totw+= shb->weight[a]; + } + + totw= 1.0f/totw; + for (a=0; a<tot; a++) { + shb->weight[a]*= totw; + } +} + +static int verg_deepsample(const void *poin1, const void *poin2) +{ + const DeepSample *ds1= (const DeepSample*)poin1; + const DeepSample *ds2= (const DeepSample*)poin2; + + if (ds1->z < ds2->z) return -1; + else if (ds1->z == ds2->z) return 0; + else return 1; +} + +static int compress_deepsamples(DeepSample *dsample, int tot, float epsilon) +{ + /* uses doubles to avoid overflows and other numerical issues, + * could be improved */ + DeepSample *ds, *newds; + float v; + double slope, slopemin, slopemax, min, max, div, newmin, newmax; + int a, first, z, newtot= 0; + +#if 0 + if (print) { + for (a=0, ds=dsample; a<tot; a++, ds++) + printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v); + printf("\n"); + } +#endif + + /* read from and write into same array */ + ds= dsample; + newds= dsample; + a= 0; + + /* as long as we are not at the end of the array */ + for (a++, ds++; a<tot; a++, ds++) { + slopemin= 0.0f; + slopemax= 0.0f; + first= 1; + + for (; a<tot; a++, ds++) { + //dz= ds->z - newds->z; + if (ds->z == newds->z) { + /* still in same z position, simply check + * visibility difference against epsilon */ + if (!(fabsf(newds->v - ds->v) <= epsilon)) { + break; + } + } + else { + /* compute slopes */ + div= (double)0x7FFFFFFF / ((double)ds->z - (double)newds->z); + min= (double)((ds->v - epsilon) - newds->v) * div; + max= (double)((ds->v + epsilon) - newds->v) * div; + + /* adapt existing slopes */ + if (first) { + newmin= min; + newmax= max; + first= 0; + } + else { + newmin= MAX2(slopemin, min); + newmax= MIN2(slopemax, max); + + /* verify if there is still space between the slopes */ + if (newmin > newmax) { + ds--; + a--; + break; + } + } + + slopemin= newmin; + slopemax= newmax; + } + } + + if (a == tot) { + ds--; + a--; + } + + /* always previous z */ + z= ds->z; + + if (first || a==tot-1) { + /* if slopes were not initialized, use last visibility */ + v= ds->v; + } + else { + /* compute visibility at center between slopes at z */ + slope = (slopemin + slopemax) * 0.5; + v = (double)newds->v + slope * ((double)(z - newds->z) / (double)0x7FFFFFFF); + } + + newds++; + newtot++; + + newds->z= z; + newds->v= v; + } + + if (newtot == 0 || (newds->v != (newds-1)->v)) + newtot++; + +#if 0 + if (print) { + for (a=0, ds=dsample; a<newtot; a++, ds++) + printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v); + printf("\n"); + } +#endif + + return newtot; +} + +static float deep_alpha(Render *re, int obinr, int facenr, bool use_strand) +{ + ObjectInstanceRen *obi= &re->objectinstance[obinr]; + Material *ma; + + if (use_strand) { + StrandRen *strand= RE_findOrAddStrand(obi->obr, facenr-1); + ma= strand->buffer->ma; + } + else { + VlakRen *vlr= RE_findOrAddVlak(obi->obr, (facenr-1) & RE_QUAD_MASK); + ma= vlr->mat; + } + + return ma->shad_alpha; +} + +static void compress_deepshadowbuf(Render *re, ShadBuf *shb, APixstr *apixbuf, APixstrand *apixbufstrand) +{ + ShadSampleBuf *shsample; + DeepSample *ds[RE_MAX_OSA], *sampleds[RE_MAX_OSA], *dsb, *newbuf; + APixstr *ap, *apn; + APixstrand *aps, *apns; + float visibility; + + const int totbuf= shb->totbuf; + const float totbuf_f= (float)shb->totbuf; + const float totbuf_f_inv= 1.0f/totbuf_f; + const int size= shb->size; + + int a, b, c, tot, minz, found, prevtot, newtot; + int sampletot[RE_MAX_OSA], totsample = 0, totsamplec = 0; + + shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf"); + BLI_addtail(&shb->buffers, shsample); + + shsample->totbuf = MEM_callocN(sizeof(int) * size * size, "deeptotbuf"); + shsample->deepbuf = MEM_callocN(sizeof(DeepSample *) * size * size, "deepbuf"); + + ap= apixbuf; + aps= apixbufstrand; + for (a=0; a<size*size; a++, ap++, aps++) { + /* count number of samples */ + for (c=0; c<totbuf; c++) + sampletot[c]= 0; + + tot= 0; + for (apn=ap; apn; apn=apn->next) + for (b=0; b<4; b++) + if (apn->p[b]) + for (c=0; c<totbuf; c++) + if (apn->mask[b] & (1<<c)) + sampletot[c]++; + + if (apixbufstrand) { + for (apns=aps; apns; apns=apns->next) + for (b=0; b<4; b++) + if (apns->p[b]) + for (c=0; c<totbuf; c++) + if (apns->mask[b] & (1<<c)) + sampletot[c]++; + } + + for (c=0; c<totbuf; c++) + tot += sampletot[c]; + + if (tot == 0) { + shsample->deepbuf[a]= NULL; + shsample->totbuf[a]= 0; + continue; + } + + /* fill samples */ + ds[0]= sampleds[0]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample"); + for (c=1; c<totbuf; c++) + ds[c]= sampleds[c]= sampleds[c-1] + sampletot[c-1]*2; + + for (apn=ap; apn; apn=apn->next) { + for (b=0; b<4; b++) { + if (apn->p[b]) { + for (c=0; c<totbuf; c++) { + if (apn->mask[b] & (1<<c)) { + /* two entries to create step profile */ + ds[c]->z= apn->z[b]; + ds[c]->v= 1.0f; /* not used */ + ds[c]++; + ds[c]->z= apn->z[b]; + ds[c]->v= deep_alpha(re, apn->obi[b], apn->p[b], 0); + ds[c]++; + } + } + } + } + } + + if (apixbufstrand) { + for (apns=aps; apns; apns=apns->next) { + for (b=0; b<4; b++) { + if (apns->p[b]) { + for (c=0; c<totbuf; c++) { + if (apns->mask[b] & (1<<c)) { + /* two entries to create step profile */ + ds[c]->z= apns->z[b]; + ds[c]->v= 1.0f; /* not used */ + ds[c]++; + ds[c]->z= apns->z[b]; + ds[c]->v= deep_alpha(re, apns->obi[b], apns->p[b], 1); + ds[c]++; + } + } + } + } + } + } + + for (c=0; c<totbuf; c++) { + /* sort by increasing z */ + qsort(sampleds[c], sampletot[c], sizeof(DeepSample)*2, verg_deepsample); + + /* sum visibility, replacing alpha values */ + visibility= 1.0f; + ds[c]= sampleds[c]; + + for (b=0; b<sampletot[c]; b++) { + /* two entries creating step profile */ + ds[c]->v= visibility; + ds[c]++; + + visibility *= 1.0f-ds[c]->v; + ds[c]->v= visibility; + ds[c]++; + } + + /* halfway trick, probably won't work well for volumes? */ + ds[c]= sampleds[c]; + for (b=0; b<sampletot[c]; b++) { + if (b+1 < sampletot[c]) { + ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1); + ds[c]++; + ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1); + ds[c]++; + } + else { + ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1); + ds[c]++; + ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1); + ds[c]++; + } + } + + /* init for merge loop */ + ds[c]= sampleds[c]; + sampletot[c] *= 2; + } + + shsample->deepbuf[a]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample"); + shsample->totbuf[a]= 0; + + /* merge buffers */ + dsb= shsample->deepbuf[a]; + while (1) { + minz= 0; + found= 0; + + for (c=0; c<totbuf; c++) { + if (sampletot[c] && (!found || ds[c]->z < minz)) { + minz= ds[c]->z; + found= 1; + } + } + + if (!found) + break; + + dsb->z= minz; + dsb->v= 0.0f; + + visibility= 0.0f; + for (c=0; c<totbuf; c++) { + if (sampletot[c] && ds[c]->z == minz) { + ds[c]++; + sampletot[c]--; + } + + if (sampleds[c] == ds[c]) + visibility += totbuf_f_inv; + else + visibility += (ds[c]-1)->v / totbuf_f; + } + + dsb->v= visibility; + dsb++; + shsample->totbuf[a]++; + } + + prevtot= shsample->totbuf[a]; + totsample += prevtot; + + newtot= compress_deepsamples(shsample->deepbuf[a], prevtot, shb->compressthresh); + shsample->totbuf[a]= newtot; + totsamplec += newtot; + + if (newtot < prevtot) { + newbuf= MEM_mallocN(sizeof(DeepSample)*newtot, "cdeepsample"); + memcpy(newbuf, shsample->deepbuf[a], sizeof(DeepSample)*newtot); + MEM_freeN(shsample->deepbuf[a]); + shsample->deepbuf[a]= newbuf; + } + + MEM_freeN(sampleds[0]); + } + + //printf("%d -> %d, ratio %f\n", totsample, totsamplec, (float)totsamplec/(float)totsample); +} + +/* create Z tiles (for compression): this system is 24 bits!!! */ +static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square) +{ + ShadSampleBuf *shsample; + float dist; + uintptr_t *ztile; + int *rz, *rz1, verg, verg1, size= shb->size; + int a, x, y, minx, miny, byt1, byt2; + char *rc, *rcline, *ctile, *zt; + + shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf"); + BLI_addtail(&shb->buffers, shsample); + + shsample->zbuf= MEM_mallocN(sizeof(uintptr_t)*(size*size)/256, "initshadbuf2"); + shsample->cbuf= MEM_callocN((size*size)/256, "initshadbuf3"); + + ztile= (uintptr_t *)shsample->zbuf; + ctile= shsample->cbuf; + + /* help buffer */ + rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2"); + + for (y=0; y<size; y+=16) { + if (y< size/2) miny= y+15-size/2; + else miny= y-size/2; + + for (x=0; x<size; x+=16) { + + /* is tile within spotbundle? */ + a= size/2; + if (x< a) minx= x+15-a; + else minx= x-a; + + dist = sqrtf((float)(minx * minx + miny * miny)); + + if (square==0 && dist>(float)(a+12)) { /* 12, tested with a onlyshadow lamp */ + a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */ + rz1= (&verg)+1; + } + else { + copy_to_ztile(rectz, size, x, y, 16, rcline); + rz1= (int *)rcline; + + verg= (*rz1 & 0xFFFFFF00); + + for (a=0;a<256;a++, rz1++) { + if ( (*rz1 & 0xFFFFFF00) !=verg) break; + } + } + if (a==256) { /* complete empty tile */ + *ctile= 0; + *ztile= *(rz1-1); + } + else { + + /* ACOMP etc. are defined to work L/B endian */ + + rc= rcline; + rz1= (int *)rcline; + verg= rc[ACOMP]; + verg1= rc[BCOMP]; + rc+= 4; + byt1= 1; byt2= 1; + for (a=1;a<256;a++, rc+=4) { + byt1 &= (verg==rc[ACOMP]); + byt2 &= (verg1==rc[BCOMP]); + + if (byt1==0) break; + } + if (byt1 && byt2) { /* only store byte */ + *ctile= 1; + *ztile= (uintptr_t)MEM_mallocN(256+4, "tile1"); + rz= (int *)*ztile; + *rz= *rz1; + + zt= (char *)(rz+1); + rc= rcline; + for (a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP]; + } + else if (byt1) { /* only store short */ + *ctile= 2; + *ztile= (uintptr_t)MEM_mallocN(2*256+4, "Tile2"); + rz= (int *)*ztile; + *rz= *rz1; + + zt= (char *)(rz+1); + rc= rcline; + for (a=0; a<256; a++, zt+=2, rc+=4) { + zt[0]= rc[BCOMP]; + zt[1]= rc[GCOMP]; + } + } + else { /* store triple */ + *ctile= 3; + *ztile= (uintptr_t)MEM_mallocN(3*256, "Tile3"); + + zt= (char *)*ztile; + rc= rcline; + for (a=0; a<256; a++, zt+=3, rc+=4) { + zt[0]= rc[ACOMP]; + zt[1]= rc[BCOMP]; + zt[2]= rc[GCOMP]; + } + } + } + ztile++; + ctile++; + } + } + + MEM_freeN(rcline); +} + +/* sets start/end clipping. lar->shb should be initialized */ +static void shadowbuf_autoclip(Render *re, LampRen *lar) +{ + ObjectInstanceRen *obi; + ObjectRen *obr; + VlakRen *vlr= NULL; + VertRen *ver= NULL; + Material *ma= NULL; + float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4]; + unsigned int lay = -1; + int i, a, maxtotvert, ok= 1; + char *clipflag; + + minz= 1.0e30f; maxz= -1.0e30f; + copy_m4_m4(viewmat, lar->shb->viewmat); + + if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay; + + maxtotvert= 0; + for (obr=re->objecttable.first; obr; obr=obr->next) + maxtotvert = max_ii(obr->totvert, maxtotvert); + + clipflag= MEM_callocN(sizeof(char)*maxtotvert, "autoclipflag"); + + /* set clip in vertices when face visible */ + for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) { + obr= obi->obr; + + if (obi->flag & R_TRANSFORMED) + mul_m4_m4m4(obviewmat, viewmat, obi->mat); + else + copy_m4_m4(obviewmat, viewmat); + + memset(clipflag, 0, sizeof(char)*obr->totvert); + + /* clear clip, is being set if face is visible (clip is calculated for real later) */ + for (a=0; a<obr->totvlak; a++) { + if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak; + else vlr++; + + /* note; these conditions are copied from zbuffer_shadow() */ + if (vlr->mat!= ma) { + ma= vlr->mat; + ok= 1; + if ((ma->mode2 & MA_CASTSHADOW)==0 || (ma->mode & MA_SHADBUF)==0) ok= 0; + } + + if (ok && (obi->lay & lay)) { + clipflag[vlr->v1->index]= 1; + clipflag[vlr->v2->index]= 1; + clipflag[vlr->v3->index]= 1; + if (vlr->v4) clipflag[vlr->v4->index]= 1; + } + } + + /* calculate min and max */ + for (a=0; a< obr->totvert;a++) { + if ((a & 255)==0) ver= RE_findOrAddVert(obr, a); + else ver++; + + if (clipflag[a]) { + copy_v3_v3(vec, ver->co); + mul_m4_v3(obviewmat, vec); + /* Z on visible side of lamp space */ + if (vec[2] < 0.0f) { + float inpr, z= -vec[2]; + + /* since vec is rotated in lampspace, this is how to get the cosine of angle */ + /* precision is set 20% larger */ + vec[2]*= 1.2f; + normalize_v3(vec); + inpr= - vec[2]; + + if (inpr>=lar->spotsi) { + if (z<minz) minz= z; + if (z>maxz) maxz= z; + } + } + } + } + } + + MEM_freeN(clipflag); + + /* set clipping min and max */ + if (minz < maxz) { + float delta= (maxz - minz); /* threshold to prevent precision issues */ + + //printf("minz %f maxz %f delta %f\n", minz, maxz, delta); + if (lar->bufflag & LA_SHADBUF_AUTO_START) + lar->shb->d= minz - delta*0.02f; /* 0.02 is arbitrary... needs more thinking! */ + if (lar->bufflag & LA_SHADBUF_AUTO_END) + lar->shb->clipend= maxz + delta*0.1f; + + /* bias was calculated as percentage, we scale it to prevent animation issues */ + delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d); + //printf("bias delta %f\n", delta); + lar->shb->bias= (int) (delta*(float)lar->shb->bias); + } +} + +static void makeflatshadowbuf(Render *re, LampRen *lar, float *jitbuf) +{ + ShadBuf *shb= lar->shb; + int *rectz, samples; + + /* zbuffering */ + rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf"); + + for (samples=0; samples<shb->totbuf; samples++) { + zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]); + /* create Z tiles (for compression): this system is 24 bits!!! */ + compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE); + + if (re->test_break(re->tbh)) + break; + } + + MEM_freeN(rectz); +} + +static void makedeepshadowbuf(Render *re, LampRen *lar, float *jitbuf) +{ + ShadBuf *shb= lar->shb; + APixstr *apixbuf; + APixstrand *apixbufstrand= NULL; + ListBase apsmbase= {NULL, NULL}; + + /* zbuffering */ + apixbuf= MEM_callocN(sizeof(APixstr)*shb->size*shb->size, "APixbuf"); + if (re->totstrand) + apixbufstrand= MEM_callocN(sizeof(APixstrand)*shb->size*shb->size, "APixbufstrand"); + + zbuffer_abuf_shadow(re, lar, shb->persmat, apixbuf, apixbufstrand, &apsmbase, shb->size, + shb->totbuf, (float(*)[2])jitbuf); + + /* create Z tiles (for compression): this system is 24 bits!!! */ + compress_deepshadowbuf(re, shb, apixbuf, apixbufstrand); + + MEM_freeN(apixbuf); + if (apixbufstrand) + MEM_freeN(apixbufstrand); + freepsA(&apsmbase); +} + +void makeshadowbuf(Render *re, LampRen *lar) +{ + ShadBuf *shb= lar->shb; + float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp; + + if (lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END)) + shadowbuf_autoclip(re, lar); + + /* just to enforce identical behavior of all irregular buffers */ + if (lar->buftype==LA_SHADBUF_IRREGULAR) + shb->size= 1024; + + /* matrices and window: in winmat the transformation is being put, + * transforming from observer view to lamp view, including lamp window matrix */ + + angle= saacos(lar->spotsi); + temp = 0.5f * shb->size * cosf(angle) / sinf(angle); + shb->pixsize= (shb->d)/temp; + wsize= shb->pixsize*(shb->size/2.0f); + + perspective_m4(shb->winmat, -wsize, wsize, -wsize, wsize, shb->d, shb->clipend); + mul_m4_m4m4(shb->persmat, shb->winmat, shb->viewmat); + + if (ELEM(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) { + shb->totbuf= lar->buffers; + + /* jitter, weights - not threadsafe! */ + BLI_thread_lock(LOCK_CUSTOM1); + shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp)); + make_jitter_weight_tab(re, shb, lar->filtertype); + BLI_thread_unlock(LOCK_CUSTOM1); + + if (shb->totbuf==4) jitbuf= give_jitter_tab(2); + else if (shb->totbuf==9) jitbuf= give_jitter_tab(3); + else jitbuf= twozero; + + /* zbuffering */ + if (lar->buftype == LA_SHADBUF_DEEP) { + makedeepshadowbuf(re, lar, jitbuf); + shb->totbuf= 1; + } + else + makeflatshadowbuf(re, lar, jitbuf); + + /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */ + } +} + +static void *do_shadow_thread(void *re_v) +{ + Render *re = (Render *)re_v; + LampRen *lar; + + do { + BLI_thread_lock(LOCK_CUSTOM1); + for (lar=re->lampren.first; lar; lar=lar->next) { + if (lar->shb && !lar->thread_assigned) { + lar->thread_assigned= 1; + break; + } + } + BLI_thread_unlock(LOCK_CUSTOM1); + + /* if type is irregular, this only sets the perspective matrix and autoclips */ + if (lar) { + makeshadowbuf(re, lar); + BLI_thread_lock(LOCK_CUSTOM1); + lar->thread_ready= 1; + BLI_thread_unlock(LOCK_CUSTOM1); + } + } while (lar && !re->test_break(re->tbh)); + + return NULL; +} + +static volatile int g_break= 0; +static int thread_break(void *UNUSED(arg)) +{ + return g_break; +} + +void threaded_makeshadowbufs(Render *re) +{ + ListBase threads; + LampRen *lar; + int a, totthread= 0; + int (*test_break)(void *); + + /* count number of threads to use */ + if (G.is_rendering) { + for (lar=re->lampren.first; lar; lar= lar->next) + if (lar->shb) + totthread++; + + totthread = min_ii(totthread, re->r.threads); + } + else + totthread = 1; /* preview render */ + + if (totthread <= 1) { + for (lar=re->lampren.first; lar; lar= lar->next) { + if (re->test_break(re->tbh)) break; + if (lar->shb) { + /* if type is irregular, this only sets the perspective matrix and autoclips */ + makeshadowbuf(re, lar); + } + } + } + else { + /* swap test break function */ + test_break= re->test_break; + re->test_break= thread_break; + + for (lar=re->lampren.first; lar; lar= lar->next) { + lar->thread_assigned= 0; + lar->thread_ready= 0; + } + + BLI_threadpool_init(&threads, do_shadow_thread, totthread); + + for (a=0; a<totthread; a++) + BLI_threadpool_insert(&threads, re); + + /* keep rendering as long as there are shadow buffers not ready */ + do { + if ((g_break=test_break(re->tbh))) + break; + + PIL_sleep_ms(50); + + BLI_thread_lock(LOCK_CUSTOM1); + for (lar=re->lampren.first; lar; lar= lar->next) + if (lar->shb && !lar->thread_ready) + break; + BLI_thread_unlock(LOCK_CUSTOM1); + } while (lar); + + BLI_threadpool_end(&threads); + + /* unset threadsafety */ + re->test_break= test_break; + g_break= 0; + } +} + +void freeshadowbuf(LampRen *lar) +{ + if (lar->shb) { + ShadBuf *shb= lar->shb; + ShadSampleBuf *shsample; + int b, v; + + for (shsample= shb->buffers.first; shsample; shsample= shsample->next) { + if (shsample->deepbuf) { + v= shb->size*shb->size; + for (b=0; b<v; b++) + if (shsample->deepbuf[b]) + MEM_freeN(shsample->deepbuf[b]); + + MEM_freeN(shsample->deepbuf); + MEM_freeN(shsample->totbuf); + } + else { + intptr_t *ztile= shsample->zbuf; + const char *ctile= shsample->cbuf; + + v= (shb->size*shb->size)/256; + for (b=0; b<v; b++, ztile++, ctile++) + if (*ctile) MEM_freeN((void *) *ztile); + + MEM_freeN(shsample->zbuf); + MEM_freeN(shsample->cbuf); + } + } + BLI_freelistN(&shb->buffers); + + if (shb->weight) MEM_freeN(shb->weight); + MEM_freeN(lar->shb); + + lar->shb= NULL; + } +} + + +static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr) +{ + /* return a 1 if fully compressed shadbuf-tile && z==const */ + int ofs; + const char *ct; + + if (shsample->deepbuf) + return 0; + + /* always test borders of shadowbuffer */ + if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1; + if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1; + + /* calc z */ + ofs= (ys>>4)*(shb->size>>4) + (xs>>4); + ct= shsample->cbuf+ofs; + if (*ct==0) { + if (nr==0) { + *rz= *( (int **)(shsample->zbuf+ofs) ); + return 1; + } + else if (*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0; + + return 1; + } + + return 0; +} + +static float readdeepvisibility(DeepSample *dsample, int tot, int z, int bias, float *biast) +{ + DeepSample *ds, *prevds; + float t; + int a; + + /* tricky stuff here; we use ints which can overflow easily with bias values */ + + ds= dsample; + for (a=0; a<tot && (z-bias > ds->z); a++, ds++) {} + + if (a == tot) { + if (biast) + *biast= 0.0f; + return (ds-1)->v; /* completely behind all samples */ + } + + /* check if this read needs bias blending */ + if (biast) { + if (z > ds->z) + *biast= (float)(z - ds->z)/(float)bias; + else + *biast= 0.0f; + } + + if (a == 0) + return 1.0f; /* completely in front of all samples */ + + /* converting to float early here because ds->z - prevds->z can overflow */ + prevds= ds-1; + t= ((float)(z-bias) - (float)prevds->z)/((float)ds->z - (float)prevds->z); + return t*ds->v + (1.0f-t)*prevds->v; +} + +static float readdeepshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs) +{ + float v, biasv, biast; + int ofs, tot; + + if (zs < - 0x7FFFFE00 + bias) + return 1.0; /* extreme close to clipstart */ + + /* calc z */ + ofs= ys*shb->size + xs; + tot= shsample->totbuf[ofs]; + if (tot == 0) + return 1.0f; + + v= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, bias, &biast); + + if (biast != 0.0f) { + /* in soft bias area */ + biasv = readdeepvisibility(shsample->deepbuf[ofs], tot, zs, 0, NULL); + + biast= biast*biast; + return (1.0f-biast)*v + biast*biasv; + } + + return v; +} + +/* return 1.0 : fully in light */ +static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs) +{ + float temp; + int *rz, ofs; + int zsamp=0; + char *ct, *cz; + + /* simpleclip */ + /* if (xs<0 || ys<0) return 1.0; */ + /* if (xs>=shb->size || ys>=shb->size) return 1.0; */ + + /* always test borders of shadowbuffer */ + if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1; + if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1; + + if (shsample->deepbuf) + return readdeepshadowbuf(shb, shsample, bias, xs, ys, zs); + + /* calc z */ + ofs= (ys>>4)*(shb->size>>4) + (xs>>4); + ct= shsample->cbuf+ofs; + rz= *( (int **)(shsample->zbuf+ofs) ); + + if (*ct==3) { + ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15); + cz= (char *)&zsamp; + cz[ACOMP]= ct[0]; + cz[BCOMP]= ct[1]; + cz[GCOMP]= ct[2]; + } + else if (*ct==2) { + ct= ((char *)rz); + ct+= 4+2*16*(ys & 15)+2*(xs & 15); + zsamp= *rz; + + cz= (char *)&zsamp; + cz[BCOMP]= ct[0]; + cz[GCOMP]= ct[1]; + } + else if (*ct==1) { + ct= ((char *)rz); + ct+= 4+16*(ys & 15)+(xs & 15); + zsamp= *rz; + + cz= (char *)&zsamp; + cz[GCOMP]= ct[0]; + + } + else { + /* got warning on this for 64 bits.... */ + /* but it's working code! in this case rz is not a pointer but zvalue (ton) */ + zsamp= GET_INT_FROM_POINTER(rz); + } + + /* tricky stuff here; we use ints which can overflow easily with bias values */ + + if (zsamp > zs) return 1.0; /* absolute no shadow */ + else if (zs < - 0x7FFFFE00 + bias) return 1.0; /* extreme close to clipstart */ + else if (zsamp < zs-bias) return 0.0; /* absolute in shadow */ + else { /* soft area */ + + temp= ( (float)(zs- zsamp) )/(float)bias; + return 1.0f - temp*temp; + + } +} + +static void shadowbuf_project_co(float *x, float *y, float *z, ShadBuf *shb, const float co[3]) +{ + float hco[4], size= 0.5f*(float)shb->size; + + copy_v3_v3(hco, co); + hco[3]= 1.0f; + + mul_m4_v4(shb->persmat, hco); + + *x= size*(1.0f+hco[0]/hco[3]); + *y= size*(1.0f+hco[1]/hco[3]); + if (z) *z= (hco[2]/hco[3]); +} + +/* the externally called shadow testing (reading) function */ +/* return 1.0: no shadow at all */ +float testshadowbuf(Render *re, ShadBuf *shb, const float co[3], const float dxco[3], const float dyco[3], float inp, float mat_bias) +{ + ShadSampleBuf *shsample; + float fac, dco[3], dx[3], dy[3], shadfac=0.0f; + float xs1, ys1, zs1, *jit, *weight, xres, yres, biasf; + int xs, ys, zs, bias, *rz; + short a, num; + + /* crash preventer */ + if (shb->buffers.first==NULL) + return 1.0f; + + /* when facing away, assume fully in shadow */ + if (inp <= 0.0f) + return 0.0f; + + /* project coordinate to pixel space */ + shadowbuf_project_co(&xs1, &ys1, &zs1, shb, co); + + /* clip z coordinate, z is projected so that (-1.0, 1.0) matches + * (clipstart, clipend), so we can do this simple test */ + if (zs1>=1.0f) + return 0.0f; + else if (zs1<= -1.0f) + return 1.0f; + + zs= ((float)0x7FFFFFFF)*zs1; + + /* take num*num samples, increase area with fac */ + num= get_render_shadow_samples(&re->r, shb->samp); + num= num*num; + fac= shb->soft; + + /* compute z bias */ + if (mat_bias!=0.0f) biasf= shb->bias*mat_bias; + else biasf= shb->bias; + /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors + * on cube edges, with one side being almost frontal lighted (ton) */ + bias= (1.5f-inp*inp)*biasf; + + /* in case of no filtering we can do things simpler */ + if (num==1) { + for (shsample= shb->buffers.first; shsample; shsample= shsample->next) + shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs); + + return shadfac/(float)shb->totbuf; + } + + /* calculate filter size */ + add_v3_v3v3(dco, co, dxco); + shadowbuf_project_co(&dx[0], &dx[1], NULL, shb, dco); + dx[0]= xs1 - dx[0]; + dx[1]= ys1 - dx[1]; + + add_v3_v3v3(dco, co, dyco); + shadowbuf_project_co(&dy[0], &dy[1], NULL, shb, dco); + dy[0]= xs1 - dy[0]; + dy[1]= ys1 - dy[1]; + + xres = fac * (fabsf(dx[0]) + fabsf(dy[0])); + yres = fac * (fabsf(dx[1]) + fabsf(dy[1])); + if (xres<1.0f) xres= 1.0f; + if (yres<1.0f) yres= 1.0f; + + /* make xs1/xs1 corner of sample area */ + xs1 -= xres*0.5f; + ys1 -= yres*0.5f; + + /* in case we have a constant value in a tile, we can do quicker lookup */ + if (xres<16.0f && yres<16.0f) { + shsample= shb->buffers.first; + if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) { + if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) { + if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) { + if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) { + return readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs); + } + } + } + } + } + + /* full jittered shadow buffer lookup */ + for (shsample= shb->buffers.first; shsample; shsample= shsample->next) { + jit= shb->jit; + weight= shb->weight; + + for (a=num; a>0; a--, jit+=2, weight++) { + /* instead of jit i tried random: ugly! */ + /* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */ + /* xs1 and ys1 are already corrected to be corner of sample area */ + xs= xs1 + xres*(jit[0] + 0.5f); + ys= ys1 + yres*(jit[1] + 0.5f); + + shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs); + } + } + + /* Renormalizes for the sample number: */ + return shadfac/(float)shb->totbuf; +} + +/* different function... sampling behind clipend can be LIGHT, bias is negative! */ +/* return: light */ +static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs) +{ + float temp; + int *rz, ofs; + int bias, zbias, zsamp; + char *ct, *cz; + + /* negative! The other side is more important */ + bias= -shb->bias; + + /* simpleclip */ + if (xs<0 || ys<0) return 0.0; + if (xs>=shb->size || ys>=shb->size) return 0.0; + + /* calc z */ + ofs= (ys>>4)*(shb->size>>4) + (xs>>4); + ct= shsample->cbuf+ofs; + rz= *( (int **)(shsample->zbuf+ofs) ); + + if (*ct==3) { + ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15); + cz= (char *)&zsamp; + zsamp= 0; + cz[ACOMP]= ct[0]; + cz[BCOMP]= ct[1]; + cz[GCOMP]= ct[2]; + } + else if (*ct==2) { + ct= ((char *)rz); + ct+= 4+2*16*(ys & 15)+2*(xs & 15); + zsamp= *rz; + + cz= (char *)&zsamp; + cz[BCOMP]= ct[0]; + cz[GCOMP]= ct[1]; + } + else if (*ct==1) { + ct= ((char *)rz); + ct+= 4+16*(ys & 15)+(xs & 15); + zsamp= *rz; + + cz= (char *)&zsamp; + cz[GCOMP]= ct[0]; + + } + else { + /* same as before */ + /* still working code! (ton) */ + zsamp= GET_INT_FROM_POINTER(rz); + } + + /* NO schadow when sampled at 'eternal' distance */ + + if (zsamp >= 0x7FFFFE00) return 1.0; + + if (zsamp > zs) return 1.0; /* absolute no shadww */ + else { + /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */ + zbias= 0x7fffffff - zs; + if (zbias > -bias) { + if ( zsamp < zs-bias) return 0.0; /* absolute in shadow */ + } + else return 0.0; /* absolute shadow */ + } + + /* soft area */ + + temp= ( (float)(zs- zsamp) )/(float)bias; + return 1.0f - temp*temp; +} + + +float shadow_halo(LampRen *lar, const float p1[3], const float p2[3]) +{ + /* p1 p2 already are rotated in spot-space */ + ShadBuf *shb= lar->shb; + ShadSampleBuf *shsample; + float co[4], siz; + float lambda, lambda_o, lambda_x, lambda_y, ldx, ldy; + float zf, xf1, yf1, zf1, xf2, yf2, zf2; + float count, lightcount; + int x, y, z, xs1, ys1; + int dx = 0, dy = 0; + + siz= 0.5f*(float)shb->size; + + co[0]= p1[0]; + co[1]= p1[1]; + co[2]= p1[2]/lar->sh_zfac; + co[3]= 1.0; + mul_m4_v4(shb->winmat, co); /* rational hom co */ + xf1= siz*(1.0f+co[0]/co[3]); + yf1= siz*(1.0f+co[1]/co[3]); + zf1= (co[2]/co[3]); + + + co[0]= p2[0]; + co[1]= p2[1]; + co[2]= p2[2]/lar->sh_zfac; + co[3]= 1.0; + mul_m4_v4(shb->winmat, co); /* rational hom co */ + xf2= siz*(1.0f+co[0]/co[3]); + yf2= siz*(1.0f+co[1]/co[3]); + zf2= (co[2]/co[3]); + + /* the 2dda (a pixel line formula) */ + + xs1= (int)xf1; + ys1= (int)yf1; + + if (xf1 != xf2) { + if (xf2-xf1 > 0.0f) { + lambda_x= (xf1-xs1-1.0f)/(xf1-xf2); + ldx= -shb->shadhalostep/(xf1-xf2); + dx= shb->shadhalostep; + } + else { + lambda_x= (xf1-xs1)/(xf1-xf2); + ldx= shb->shadhalostep/(xf1-xf2); + dx= -shb->shadhalostep; + } + } + else { + lambda_x= 1.0; + ldx= 0.0; + } + + if (yf1 != yf2) { + if (yf2-yf1 > 0.0f) { + lambda_y= (yf1-ys1-1.0f)/(yf1-yf2); + ldy= -shb->shadhalostep/(yf1-yf2); + dy= shb->shadhalostep; + } + else { + lambda_y= (yf1-ys1)/(yf1-yf2); + ldy= shb->shadhalostep/(yf1-yf2); + dy= -shb->shadhalostep; + } + } + else { + lambda_y= 1.0; + ldy= 0.0; + } + + x= xs1; + y= ys1; + lambda= count= lightcount= 0.0; + +/* printf("start %x %x \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */ + + do { + lambda_o= lambda; + + if (lambda_x==lambda_y) { + lambda_x+= ldx; + x+= dx; + lambda_y+= ldy; + y+= dy; + } + else { + if (lambda_x<lambda_y) { + lambda_x+= ldx; + x+= dx; + } + else { + lambda_y+= ldy; + y+= dy; + } + } + + lambda = min_ff(lambda_x, lambda_y); + + /* not making any progress? */ + if (lambda==lambda_o) break; + + /* clip to end of volume */ + lambda = min_ff(lambda, 1.0f); + + zf= zf1 + lambda*(zf2-zf1); + count+= (float)shb->totbuf; + + if (zf<= -1.0f) lightcount += 1.0f; /* close to the spot */ + else { + + /* make sure, behind the clipend we extend halolines. */ + if (zf>=1.0f) z= 0x7FFFF000; + else z= (int)(0x7FFFF000*zf); + + for (shsample= shb->buffers.first; shsample; shsample= shsample->next) + lightcount+= readshadowbuf_halo(shb, shsample, x, y, z); + + } + } + while (lambda < 1.0f); + + if (count!=0.0f) return (lightcount/count); + return 0.0f; + +} + + +/* ********************* Irregular Shadow Buffer (ISB) ************* */ +/* ********** storage of all view samples in a raster of lists ***** */ + +/* based on several articles describing this method, like: + * The Irregular Z-Buffer and its Application to Shadow Mapping + * Gregory S. Johnson - William R. Mark - Christopher A. Burns + * and + * Alias-Free Shadow Maps + * Timo Aila and Samuli Laine + */ + +/* bsp structure (actually kd tree) */ + +#define BSPMAX_SAMPLE 128 +#define BSPMAX_DEPTH 32 + +/* aligned with struct rctf */ +typedef struct Boxf { + float xmin, xmax; + float ymin, ymax; + float zmin, zmax; +} Boxf; + +typedef struct ISBBranch { + struct ISBBranch *left, *right; + float divider[2]; + Boxf box; + short totsamp, index, full, unused; + ISBSample **samples; +} ISBBranch; + +typedef struct BSPFace { + Boxf box; + const float *v1, *v2, *v3, *v4; + int obi; /* object for face lookup */ + int facenr; /* index to retrieve VlakRen */ + int type; /* only for strand now */ + short shad_alpha, is_full; + + /* strand caching data, optimize for point_behind_strand() */ + float radline, radline_end, len; + float vec1[3], vec2[3], rc[3]; +} BSPFace; + +/* boxes are in lamp projection */ +static void init_box(Boxf *box) +{ + box->xmin = 1000000.0f; + box->xmax = 0; + box->ymin = 1000000.0f; + box->ymax = 0; + box->zmin= 0x7FFFFFFF; + box->zmax= - 0x7FFFFFFF; +} + +/* use v1 to calculate boundbox */ +static void bound_boxf(Boxf *box, const float v1[3]) +{ + if (v1[0] < box->xmin) box->xmin = v1[0]; + if (v1[0] > box->xmax) box->xmax = v1[0]; + if (v1[1] < box->ymin) box->ymin = v1[1]; + if (v1[1] > box->ymax) box->ymax = v1[1]; + if (v1[2] < box->zmin) box->zmin= v1[2]; + if (v1[2] > box->zmax) box->zmax= v1[2]; +} + +/* use v1 to calculate boundbox */ +static void bound_rectf(rctf *box, const float v1[2]) +{ + if (v1[0] < box->xmin) box->xmin = v1[0]; + if (v1[0] > box->xmax) box->xmax = v1[0]; + if (v1[1] < box->ymin) box->ymin = v1[1]; + if (v1[1] > box->ymax) box->ymax = v1[1]; +} + + +/* halfway splitting, for initializing a more regular tree */ +static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level) +{ + + /* if level > 0 we create new branches and go deeper */ + if (level > 0) { + ISBBranch *left, *right; + int i; + + /* splitpoint */ + root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax); + root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax); + + /* find best splitpoint */ + if (RCT_SIZE_X(&root->box) > RCT_SIZE_Y(&root->box)) + i = root->index = 0; + else + i = root->index = 1; + + left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch)); + right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch)); + + /* box info */ + left->box= root->box; + right->box= root->box; + if (i==0) { + left->box.xmax = root->divider[0]; + right->box.xmin = root->divider[0]; + } + else { + left->box.ymax = root->divider[1]; + right->box.ymin = root->divider[1]; + } + isb_bsp_split_init(left, mem, level-1); + isb_bsp_split_init(right, mem, level-1); + } + else { + /* we add sample array */ + root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *)); + } +} + +/* note; if all samples on same location we just spread them over 2 new branches */ +static void isb_bsp_split(ISBBranch *root, MemArena *mem) +{ + ISBBranch *left, *right; + ISBSample *samples[BSPMAX_SAMPLE]; + int a, i; + + /* splitpoint */ + root->divider[0]= root->divider[1]= 0.0f; + for (a=BSPMAX_SAMPLE-1; a>=0; a--) { + root->divider[0]+= root->samples[a]->zco[0]; + root->divider[1]+= root->samples[a]->zco[1]; + } + root->divider[0]/= BSPMAX_SAMPLE; + root->divider[1]/= BSPMAX_SAMPLE; + + /* find best splitpoint */ + if (RCT_SIZE_X(&root->box) > RCT_SIZE_Y(&root->box)) + i = root->index = 0; + else + i = root->index = 1; + + /* new branches */ + left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch)); + right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch)); + + /* new sample array */ + left->samples = BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *)); + right->samples = samples; /* tmp */ + + /* split samples */ + for (a=BSPMAX_SAMPLE-1; a>=0; a--) { + int comp= 0; + /* this prevents adding samples all to 1 branch when divider is equal to samples */ + if (root->samples[a]->zco[i] == root->divider[i]) + comp= a & 1; + else if (root->samples[a]->zco[i] < root->divider[i]) + comp= 1; + + if (comp==1) { + left->samples[left->totsamp]= root->samples[a]; + left->totsamp++; + } + else { + right->samples[right->totsamp]= root->samples[a]; + right->totsamp++; + } + } + + /* copy samples from tmp */ + memcpy(root->samples, samples, right->totsamp*(sizeof(void *))); + right->samples= root->samples; + root->samples= NULL; + + /* box info */ + left->box= root->box; + right->box= root->box; + if (i==0) { + left->box.xmax = root->divider[0]; + right->box.xmin = root->divider[0]; + } + else { + left->box.ymax = root->divider[1]; + right->box.ymin = root->divider[1]; + } +} + +/* inserts sample in main tree, also splits on threshold */ +/* returns 1 if error */ +static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample) +{ + ISBBranch *bspn= root; + const float *zco= sample->zco; + int i= 0; + + /* debug counter, also used to check if something was filled in ever */ + root->totsamp++; + + /* going over branches until last one found */ + while (bspn->left) { + if (zco[bspn->index] <= bspn->divider[bspn->index]) + bspn= bspn->left; + else + bspn= bspn->right; + i++; + } + /* bspn now is the last branch */ + + if (bspn->totsamp==BSPMAX_SAMPLE) { + printf("error in bsp branch\n"); /* only for debug, cannot happen */ + return 1; + } + + /* insert */ + bspn->samples[bspn->totsamp]= sample; + bspn->totsamp++; + + /* split if allowed and needed */ + if (bspn->totsamp==BSPMAX_SAMPLE) { + if (i==BSPMAX_DEPTH) { + bspn->totsamp--; /* stop filling in... will give errors */ + return 1; + } + isb_bsp_split(bspn, memarena); + } + return 0; +} + +/* initialize vars in face, for optimal point-in-face test */ +static void bspface_init_strand(BSPFace *face) +{ + + face->radline= 0.5f* len_v2v2(face->v1, face->v2); + + mid_v3_v3v3(face->vec1, face->v1, face->v2); + if (face->v4) + mid_v3_v3v3(face->vec2, face->v3, face->v4); + else + copy_v3_v3(face->vec2, face->v3); + + face->rc[0]= face->vec2[0]-face->vec1[0]; + face->rc[1]= face->vec2[1]-face->vec1[1]; + face->rc[2]= face->vec2[2]-face->vec1[2]; + + face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1]; + + if (face->len != 0.0f) { + face->radline_end = face->radline / sqrtf(face->len); + face->len = 1.0f / face->len; + } +} + +/* brought back to a simple 2d case */ +static int point_behind_strand(const float p[3], BSPFace *face) +{ + /* v1 - v2 is radius, v1 - v3 length */ + float dist, rc[2], pt[2]; + + /* using code from dist_to_line_segment_v2(), distance vec to line-piece */ + + if (face->len==0.0f) { + rc[0]= p[0]-face->vec1[0]; + rc[1]= p[1]-face->vec1[1]; + dist = len_v2(rc); + + if (dist < face->radline) + return 1; + } + else { + float lambda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len; + + if (lambda > -face->radline_end && lambda < 1.0f+face->radline_end) { + /* hesse for dist: */ + //dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len); + + pt[0]= lambda*face->rc[0]+face->vec1[0]; + pt[1]= lambda*face->rc[1]+face->vec1[1]; + + rc[0]= pt[0]-p[0]; + rc[1]= pt[1]-p[1]; + dist = len_v2(rc); + + if (dist < face->radline) { + float zval= face->vec1[2] + lambda*face->rc[2]; + if (p[2] > zval) + return 1; + } + } + } + return 0; +} + + +/* return 1 if inside. code derived from src/parametrizer.c */ +static int point_behind_tria2d(const float p[3], const float v1[3], const float v2[3], const float v3[3]) +{ + float a[2], c[2], h[2], div; + float u, v; + + a[0] = v2[0] - v1[0]; + a[1] = v2[1] - v1[1]; + c[0] = v3[0] - v1[0]; + c[1] = v3[1] - v1[1]; + + div = a[0]*c[1] - a[1]*c[0]; + if (div==0.0f) + return 0; + + h[0] = p[0] - v1[0]; + h[1] = p[1] - v1[1]; + + div = 1.0f/div; + + u = (h[0]*c[1] - h[1]*c[0])*div; + if (u >= 0.0f) { + v = (a[0]*h[1] - a[1]*h[0])*div; + if (v >= 0.0f) { + if ( u + v <= 1.0f) { + /* inside, now check if point p is behind */ + float z= (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2]; + if (z <= p[2]) + return 1; + } + } + } + + return 0; +} + +#if 0 +/* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */ + +/* check if line v1-v2 has all rect points on other side of point v3 */ +static int rect_outside_line(rctf *rect, const float v1[3], const float v2[3], const float v3[3]) +{ + float a, b, c; + int side; + + /* line formula for v1-v2 */ + a= v2[1]-v1[1]; + b= v1[0]-v2[0]; + c= -a*v1[0] - b*v1[1]; + side= a*v3[0] + b*v3[1] + c < 0.0f; + + /* the four quad points */ + if ( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) ) + if ( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) ) + if ( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) ) + if ( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) ) + return 1; + return 0; +} + +/* check if one of the triangle edges separates all rect points on 1 side */ +static int rect_isect_tria(rctf *rect, const float v1[3], const float v2[3], const float v3[3]) +{ + if (rect_outside_line(rect, v1, v2, v3)) + return 0; + if (rect_outside_line(rect, v2, v3, v1)) + return 0; + if (rect_outside_line(rect, v3, v1, v2)) + return 0; + return 1; +} +#endif + +/* if face overlaps a branch, it executes func. recursive */ +static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face) +{ + + /* are we descending? */ + if (bspn->left) { + /* hrmf, the box struct cannot be addressed with index */ + if (bspn->index==0) { + if (face->box.xmin <= bspn->divider[0]) + isb_bsp_face_inside(bspn->left, face); + if (face->box.xmax > bspn->divider[0]) + isb_bsp_face_inside(bspn->right, face); + } + else { + if (face->box.ymin <= bspn->divider[1]) + isb_bsp_face_inside(bspn->left, face); + if (face->box.ymax > bspn->divider[1]) + isb_bsp_face_inside(bspn->right, face); + } + } + else { + /* else: end branch reached */ + int a; + + if (bspn->totsamp==0) return; + + /* check for nodes entirely in shadow, can be skipped */ + if (bspn->totsamp==bspn->full) + return; + + /* if bsp node is entirely in front of face, give up */ + if (bspn->box.zmax < face->box.zmin) + return; + + /* if face boundbox is outside of branch rect, give up */ + if (0==BLI_rctf_isect((rctf *)&face->box, (rctf *)&bspn->box, NULL)) + return; + + /* test all points inside branch */ + for (a=bspn->totsamp-1; a>=0; a--) { + ISBSample *samp= bspn->samples[a]; + + if ((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) { + if (face->box.zmin < samp->zco[2]) { + if (BLI_rctf_isect_pt_v((rctf *)&face->box, samp->zco)) { + int inshadow= 0; + + if (face->type) { + if (point_behind_strand(samp->zco, face)) + inshadow= 1; + } + else if ( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3)) + inshadow= 1; + else if (face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4)) + inshadow= 1; + + if (inshadow) { + *(samp->shadfac) += face->shad_alpha; + /* optimize; is_full means shad_alpha==4096 */ + if (*(samp->shadfac) >= 4096 || face->is_full) { + bspn->full++; + samp->shadfac= NULL; + } + } + } + } + } + } + } +} + +/* based on available samples, recalculate the bounding box for bsp nodes, recursive */ +static void isb_bsp_recalc_box(ISBBranch *root) +{ + if (root->left) { + isb_bsp_recalc_box(root->left); + isb_bsp_recalc_box(root->right); + } + else if (root->totsamp) { + int a; + + init_box(&root->box); + for (a=root->totsamp-1; a>=0; a--) + bound_boxf(&root->box, root->samples[a]->zco); + } +} + +/* callback function for zbuf clip */ +static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr, + const float *v1, const float *v2, const float *v3, const float *v4) +{ + BSPFace face; + + face.v1= v1; + face.v2= v2; + face.v3= v3; + face.v4= v4; + face.obi= obi; + face.facenr= zvlnr & ~RE_QUAD_OFFS; + face.type= R_STRAND; + if (R.osa) + face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa); + else + face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha); + + face.is_full= (zspan->shad_alpha==1.0f); + + /* setup boundbox */ + init_box(&face.box); + bound_boxf(&face.box, v1); + bound_boxf(&face.box, v2); + bound_boxf(&face.box, v3); + if (v4) + bound_boxf(&face.box, v4); + + /* optimize values */ + bspface_init_strand(&face); + + isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face); + +} + +/* callback function for zbuf clip */ +static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr, + const float *v1, const float *v2, const float *v3, const float *v4) +{ + BSPFace face; + + face.v1= v1; + face.v2= v2; + face.v3= v3; + face.v4= v4; + face.obi= obi; + face.facenr= zvlnr & ~RE_QUAD_OFFS; + face.type= 0; + if (R.osa) + face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa); + else + face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha); + + face.is_full= (zspan->shad_alpha==1.0f); + + /* setup boundbox */ + init_box(&face.box); + bound_boxf(&face.box, v1); + bound_boxf(&face.box, v2); + bound_boxf(&face.box, v3); + if (v4) + bound_boxf(&face.box, v4); + + isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face); +} + +static int testclip_minmax(const float ho[4], const float minmax[4]) +{ + float wco= ho[3]; + int flag= 0; + + if ( ho[0] > minmax[1]*wco) flag = 1; + else if ( ho[0]< minmax[0]*wco) flag = 2; + + if ( ho[1] > minmax[3]*wco) flag |= 4; + else if ( ho[1]< minmax[2]*wco) flag |= 8; + + return flag; +} + +/* main loop going over all faces and check in bsp overlaps, fill in shadfac values */ +static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root) +{ + ObjectInstanceRen *obi; + ObjectRen *obr; + ShadBuf *shb= lar->shb; + ZSpan zspan, zspanstrand; + VlakRen *vlr= NULL; + Material *ma= NULL; + float minmaxf[4], winmat[4][4]; + int size= shb->size; + int i, a, ok=1, lay= -1; + + /* further optimize, also sets minz maxz */ + isb_bsp_recalc_box(root); + + /* extra clipping for minmax */ + minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size; + minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size; + minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size; + minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size; + + if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay; + + /* (ab)use zspan, since we use zbuffer clipping code */ + zbuf_alloc_span(&zspan, size, size, re->clipcrop); + + zspan.zmulx= ((float)size)/2.0f; + zspan.zmuly= ((float)size)/2.0f; + zspan.zofsx= -0.5f; + zspan.zofsy= -0.5f; + + /* pass on bsp root to zspan */ + zspan.rectz= (int *)root; + + /* filling methods */ + zspanstrand= zspan; + // zspan.zbuflinefunc= zbufline_onlyZ; + zspan.zbuffunc= isb_bsp_test_face; + zspanstrand.zbuffunc= isb_bsp_test_strand; + + for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) { + obr= obi->obr; + + if (obi->flag & R_TRANSFORMED) + mul_m4_m4m4(winmat, shb->persmat, obi->mat); + else + copy_m4_m4(winmat, shb->persmat); + + for (a=0; a<obr->totvlak; a++) { + + if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak; + else vlr++; + + /* note, these conditions are copied in shadowbuf_autoclip() */ + if (vlr->mat!= ma) { + ma= vlr->mat; + ok= 1; + if ((ma->mode2 & MA_CASTSHADOW)==0 || (ma->mode & MA_SHADBUF)==0) ok= 0; + if (ma->material_type == MA_TYPE_WIRE) ok= 0; + zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha; + } + + if (ok && (obi->lay & lay)) { + float hoco[4][4]; + int c1, c2, c3, c4=0; + int d1, d2, d3, d4=0; + int partclip; + + /* create hocos per face, it is while render */ + projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf); + projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf); + projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf); + if (vlr->v4) { + projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf); + } + + /* minmax clipping */ + if (vlr->v4) partclip= d1 & d2 & d3 & d4; + else partclip= d1 & d2 & d3; + + if (partclip==0) { + + /* window clipping */ + c1= testclip(hoco[0]); + c2= testclip(hoco[1]); + c3= testclip(hoco[2]); + if (vlr->v4) + c4= testclip(hoco[3]); + + /* ***** NO WIRE YET */ + if (ma->material_type == MA_TYPE_WIRE) { + if (vlr->v4) + zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4); + else + zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], NULL, c1, c2, c3, 0); + } + else if (vlr->v4) { + if (vlr->flag & R_STRAND) + zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4); + else + zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4); + } + else + zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3); + + } + } + } + } + + zbuf_free_span(&zspan); +} + +/* returns 1 when the viewpixel is visible in lampbuffer */ +static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float co_r[3]) +{ + float hoco[4], v1[3], nor[3]; + float dface, fac, siz; + + RE_vlakren_get_normal(&R, obi, vlr, nor); + copy_v3_v3(v1, vlr->v1->co); + if (obi->flag & R_TRANSFORMED) + mul_m4_v3(obi->mat, v1); + + /* from shadepixel() */ + dface = dot_v3v3(v1, nor); + hoco[3]= 1.0f; + + /* ortho viewplane cannot intersect using view vector originating in (0, 0, 0) */ + if (R.r.mode & R_ORTHO) { + /* x and y 3d coordinate can be derived from pixel coord and winmat */ + float fx= 2.0f/(R.winx*R.winmat[0][0]); + float fy= 2.0f/(R.winy*R.winmat[1][1]); + + hoco[0]= (x - 0.5f*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0]; + hoco[1]= (y - 0.5f*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1]; + + /* using a*x + b*y + c*z = d equation, (a b c) is normal */ + if (nor[2]!=0.0f) + hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2]; + else + hoco[2]= 0.0f; + } + else { + float div, view[3]; + + calc_view_vector(view, x, y); + + div = dot_v3v3(nor, view); + if (div==0.0f) + return 0; + + fac= dface/div; + + hoco[0]= fac*view[0]; + hoco[1]= fac*view[1]; + hoco[2]= fac*view[2]; + } + + /* move 3d vector to lampbuf */ + mul_m4_v4(shb->persmat, hoco); /* rational hom co */ + + /* clip We can test for -1.0/1.0 because of the properties of the + * coordinate transformations. */ + fac = fabsf(hoco[3]); + if (hoco[0]<-fac || hoco[0]>fac) + return 0; + if (hoco[1]<-fac || hoco[1]>fac) + return 0; + if (hoco[2]<-fac || hoco[2]>fac) + return 0; + + siz= 0.5f*(float)shb->size; + co_r[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f; + co_r[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f; + co_r[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]); + + /* XXXX bias, much less than normal shadbuf, or do we need a constant? */ + co_r[2] -= 0.05f*shb->bias; + + return 1; +} + +/* storage of shadow results, solid osa and transp case */ +static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples) +{ + ISBShadfacA *new; + float shadfacf; + + /* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */ + if (R.osa) + shadfacf= ((float)shadfac*R.osa)/(4096.0f*samples); + else + shadfacf= ((float)shadfac)/(4096.0f); + + new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA)); + new->obi= obi; + new->facenr= facenr & ~RE_QUAD_OFFS; + new->shadfac= shadfacf; + if (*isbsapp) + new->next= (*isbsapp); + else + new->next= NULL; + + *isbsapp= new; +} + +/* adding samples, solid case */ +static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf) +{ + int xi, yi, *xcos, *ycos; + int sample, bsp_err= 0; + + /* bsp split doesn't like to handle regular sequences */ + xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos"); + ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos"); + for (xi=0; xi<pa->rectx; xi++) + xcos[xi]= xi; + for (yi=0; yi<pa->recty; yi++) + ycos[yi]= yi; + BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345); + BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321); + + for (sample=0; sample<(R.osa?R.osa:1); sample++) { + ISBSample *samp= samplebuf[sample], *samp1; + + for (yi=0; yi<pa->recty; yi++) { + int y= ycos[yi]; + for (xi=0; xi<pa->rectx; xi++) { + int x= xcos[xi]; + samp1= samp + y*pa->rectx + x; + if (samp1->facenr) + bsp_err |= isb_bsp_insert(root, memarena, samp1); + } + if (bsp_err) break; + } + } + + MEM_freeN(xcos); + MEM_freeN(ycos); + + return bsp_err; +} + +/* solid version */ +/* lar->shb, pa->rectz and pa->rectp should exist */ +static void isb_make_buffer(RenderPart *pa, LampRen *lar) +{ + ShadBuf *shb= lar->shb; + ISBData *isbdata; + ISBSample *samp, *samplebuf[16]; /* should be RE_MAX_OSA */ + ISBBranch root; + MemArena *memarena; + intptr_t *rd; + int *recto, *rectp, x, y, sindex, sample, bsp_err=0; + + /* storage for shadow, per thread */ + isbdata= shb->isb_result[pa->thread]; + + /* to map the shi->xs and ys coordinate */ + isbdata->minx= pa->disprect.xmin; + isbdata->miny= pa->disprect.ymin; + isbdata->rectx= pa->rectx; + isbdata->recty= pa->recty; + + /* branches are added using memarena (32k branches) */ + memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena"); + BLI_memarena_use_calloc(memarena); + + /* samplebuf is in camera view space (pixels) */ + for (sample=0; sample<(R.osa?R.osa:1); sample++) + samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf"); + + /* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */ + if (R.osa==0) + isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs"); + + /* setup bsp root */ + memset(&root, 0, sizeof(ISBBranch)); + root.box.xmin = (float)shb->size; + root.box.ymin = (float)shb->size; + + /* create the sample buffers */ + for (sindex=0, y=0; y<pa->recty; y++) { + for (x=0; x<pa->rectx; x++, sindex++) { + + /* this makes it a long function, but splitting it out would mean 10+ arguments */ + /* first check OSA case */ + if (R.osa) { + rd= pa->rectdaps + sindex; + if (*rd) { + float xs= (float)(x + pa->disprect.xmin); + float ys= (float)(y + pa->disprect.ymin); + + for (sample=0; sample<R.osa; sample++) { + PixStr *ps= (PixStr *)(*rd); + int mask= (1<<sample); + + while (ps) { + if (ps->mask & mask) + break; + ps= ps->next; + } + if (ps && ps->facenr>0) { + ObjectInstanceRen *obi= &R.objectinstance[ps->obi]; + ObjectRen *obr= obi->obr; + VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK); + + samp= samplebuf[sample] + sindex; + /* convert image plane pixel location to lamp buffer space */ + if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) { + samp->obi= ps->obi; + samp->facenr= ps->facenr & ~RE_QUAD_OFFS; + ps->shadfac= 0; + samp->shadfac= &ps->shadfac; + bound_rectf((rctf *)&root.box, samp->zco); + } + } + } + } + } + else { + rectp= pa->rectp + sindex; + recto= pa->recto + sindex; + if (*rectp>0) { + ObjectInstanceRen *obi= &R.objectinstance[*recto]; + ObjectRen *obr= obi->obr; + VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK); + float xs= (float)(x + pa->disprect.xmin); + float ys= (float)(y + pa->disprect.ymin); + + samp= samplebuf[0] + sindex; + /* convert image plane pixel location to lamp buffer space */ + if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) { + samp->obi= *recto; + samp->facenr= *rectp & ~RE_QUAD_OFFS; + samp->shadfac= isbdata->shadfacs + sindex; + bound_rectf((rctf *)&root.box, samp->zco); + } + } + } + } + } + + /* simple method to see if we have samples */ + if (root.box.xmin != (float)shb->size) { + /* now create a regular split, root.box has the initial bounding box of all pixels */ + /* split bsp 8 levels deep, in regular grid (16 x 16) */ + isb_bsp_split_init(&root, memarena, 8); + + /* insert all samples in BSP now */ + bsp_err= isb_add_samples(pa, &root, memarena, samplebuf); + + if (bsp_err==0) { + /* go over all faces and fill in shadow values */ + + isb_bsp_fillfaces(&R, lar, &root); /* shb->persmat should have been calculated */ + + /* copy shadow samples to persistent buffer, reduce memory overhead */ + if (R.osa) { + ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs"); + + isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena"); + BLI_memarena_use_calloc(isbdata->memarena); + + for (rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) { + + if (*rd) { + PixStr *ps= (PixStr *)(*rd); + while (ps) { + if (ps->shadfac) + isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask)); + ps= ps->next; + } + } + } + } + } + } + else { + if (isbdata->shadfacs) { + MEM_freeN(isbdata->shadfacs); + isbdata->shadfacs= NULL; + } + } + + /* free BSP */ + BLI_memarena_free(memarena); + + /* free samples */ + for (x=0; x<(R.osa?R.osa:1); x++) + MEM_freeN(samplebuf[x]); + + if (bsp_err) printf("error in filling bsp\n"); +} + +/* add sample to buffer, isbsa is the root sample in a buffer */ +static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem) +{ + ISBSampleA *new; + + new= BLI_memarena_alloc(mem, sizeof(ISBSampleA)); + if (*isbsa) + new->next= (*isbsa); + else + new->next= NULL; + + *isbsa= new; + return new; +} + +/* adding samples in BSP, transparent case */ +static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf) +{ + int xi, yi, *xcos, *ycos; + int sample, bsp_err= 0; + + /* bsp split doesn't like to handle regular sequences */ + xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos"); + ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos"); + for (xi=0; xi<pa->rectx; xi++) + xcos[xi]= xi; + for (yi=0; yi<pa->recty; yi++) + ycos[yi]= yi; + BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345); + BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321); + + for (sample=0; sample<(R.osa?R.osa:1); sample++) { + ISBSampleA **samp= samplebuf[sample], *samp1; + + for (yi=0; yi<pa->recty; yi++) { + int y= ycos[yi]; + for (xi=0; xi<pa->rectx; xi++) { + int x= xcos[xi]; + + samp1= *(samp + y*pa->rectx + x); + while (samp1) { + bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1); + samp1= samp1->next; + } + } + if (bsp_err) break; + } + } + + MEM_freeN(xcos); + MEM_freeN(ycos); + + return bsp_err; +} + + +/* Ztransp version */ +/* lar->shb, pa->rectz and pa->rectp should exist */ +static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar) +{ + ShadBuf *shb= lar->shb; + ISBData *isbdata; + ISBSampleA *samp, **samplebuf[16]; /* MAX_OSA */ + ISBBranch root; + MemArena *memarena; + APixstr *ap; + int x, y, sindex, sample, bsp_err=0; + + /* storage for shadow, per thread */ + isbdata= shb->isb_result[pa->thread]; + + /* to map the shi->xs and ys coordinate */ + isbdata->minx= pa->disprect.xmin; + isbdata->miny= pa->disprect.ymin; + isbdata->rectx= pa->rectx; + isbdata->recty= pa->recty; + + /* branches are added using memarena (32k branches) */ + memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena"); + BLI_memarena_use_calloc(memarena); + + /* samplebuf is in camera view space (pixels) */ + for (sample=0; sample<(R.osa?R.osa:1); sample++) + samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf"); + + /* setup bsp root */ + memset(&root, 0, sizeof(ISBBranch)); + root.box.xmin = (float)shb->size; + root.box.ymin = (float)shb->size; + + /* create the sample buffers */ + for (ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) { + for (x=0; x<pa->rectx; x++, sindex++, ap++) { + + if (ap->p[0]) { + APixstr *apn; + float xs= (float)(x + pa->disprect.xmin); + float ys= (float)(y + pa->disprect.ymin); + + for (apn=ap; apn; apn= apn->next) { + int a; + for (a=0; a<4; a++) { + if (apn->p[a]) { + ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]]; + ObjectRen *obr= obi->obr; + VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK); + float zco[3]; + + /* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */ + apn->shadfac[a]= 0; + + if (R.osa) { + for (sample=0; sample<R.osa; sample++) { + int mask= (1<<sample); + + if (apn->mask[a] & mask) { + + /* convert image plane pixel location to lamp buffer space */ + if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) { + samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena); + samp->obi= apn->obi[a]; + samp->facenr= apn->p[a] & ~RE_QUAD_OFFS; + samp->shadfac= &apn->shadfac[a]; + + copy_v3_v3(samp->zco, zco); + bound_rectf((rctf *)&root.box, samp->zco); + } + } + } + } + else { + + /* convert image plane pixel location to lamp buffer space */ + if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) { + + samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena); + samp->obi= apn->obi[a]; + samp->facenr= apn->p[a] & ~RE_QUAD_OFFS; + samp->shadfac= &apn->shadfac[a]; + + copy_v3_v3(samp->zco, zco); + bound_rectf((rctf *)&root.box, samp->zco); + } + } + } + } + } + } + } + } + + /* simple method to see if we have samples */ + if (root.box.xmin != (float)shb->size) { + /* now create a regular split, root.box has the initial bounding box of all pixels */ + /* split bsp 8 levels deep, in regular grid (16 x 16) */ + isb_bsp_split_init(&root, memarena, 8); + + /* insert all samples in BSP now */ + bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf); + + if (bsp_err==0) { + ISBShadfacA **isbsa; + + /* go over all faces and fill in shadow values */ + isb_bsp_fillfaces(&R, lar, &root); /* shb->persmat should have been calculated */ + + /* copy shadow samples to persistent buffer, reduce memory overhead */ + isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs"); + + isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena"); + + for (ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) { + + if (ap->p[0]) { + APixstr *apn; + for (apn=ap; apn; apn= apn->next) { + int a; + for (a=0; a<4; a++) { + if (apn->p[a] && apn->shadfac[a]) { + if (R.osa) + isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a])); + else + isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0); + } + } + } + } + } + } + } + + /* free BSP */ + BLI_memarena_free(memarena); + + /* free samples */ + for (x=0; x<(R.osa?R.osa:1); x++) + MEM_freeN(samplebuf[x]); + + if (bsp_err) printf("error in filling bsp\n"); +} + + + +/* exported */ + +/* returns amount of light (1.0 = no shadow) */ +/* note, shadepixel() rounds the coordinate, not the real sample info */ +float ISB_getshadow(ShadeInput *shi, ShadBuf *shb) +{ + /* if raytracing, we can't accept irregular shadow */ + if (shi->depth==0) { + ISBData *isbdata= shb->isb_result[shi->thread]; + + if (isbdata) { + if (isbdata->shadfacs || isbdata->shadfaca) { + int x= shi->xs - isbdata->minx; + + if (x >= 0 && x < isbdata->rectx) { + int y= shi->ys - isbdata->miny; + + if (y >= 0 && y < isbdata->recty) { + if (isbdata->shadfacs) { + const short *sp= isbdata->shadfacs + y*isbdata->rectx + x; + return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f; + } + else { + int sindex= y*isbdata->rectx + x; + int obi= shi->obi - R.objectinstance; + ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex); + + while (isbsa) { + if (isbsa->facenr==shi->facenr+1 && isbsa->obi==obi) + return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac; + isbsa= isbsa->next; + } + } + } + } + } + } + } + return 1.0f; +} + +/* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */ +void ISB_create(RenderPart *pa, APixstr *apixbuf) +{ + GroupObject *go; + + /* go over all lamps, and make the irregular buffers */ + for (go=R.lights.first; go; go= go->next) { + LampRen *lar= go->lampren; + + if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) { + + /* create storage for shadow, per thread */ + lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data"); + + if (apixbuf) + isb_make_buffer_transp(pa, apixbuf, lar); + else + isb_make_buffer(pa, lar); + } + } +} + + +/* end of part rendering, free stored shadow data for this thread from all lamps */ +void ISB_free(RenderPart *pa) +{ + GroupObject *go; + + /* go over all lamps, and free the irregular buffers */ + for (go=R.lights.first; go; go= go->next) { + LampRen *lar= go->lampren; + + if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) { + ISBData *isbdata= lar->shb->isb_result[pa->thread]; + + if (isbdata) { + if (isbdata->shadfacs) + MEM_freeN(isbdata->shadfacs); + if (isbdata->shadfaca) + MEM_freeN(isbdata->shadfaca); + + if (isbdata->memarena) + BLI_memarena_free(isbdata->memarena); + + MEM_freeN(isbdata); + lar->shb->isb_result[pa->thread]= NULL; + } + } + } +} diff --git a/source/blender/render/intern/source/shadeinput.c b/source/blender/render/intern/source/shadeinput.c new file mode 100644 index 00000000000..d79749871c3 --- /dev/null +++ b/source/blender/render/intern/source/shadeinput.c @@ -0,0 +1,1490 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2006 Blender Foundation + * All rights reserved. + * + * Contributors: Hos, Robert Wenzlaff. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/shadeinput.c + * \ingroup render + */ + + +#include <stdio.h> +#include <math.h> +#include <string.h> + + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +#include "DNA_lamp_types.h" +#include "DNA_meshdata_types.h" +#include "DNA_material_types.h" +#include "DNA_particle_types.h" + +#include "BKE_scene.h" + +#include "BKE_node.h" + +/* local include */ +#include "raycounter.h" +#include "render_types.h" +#include "renderdatabase.h" +#include "rendercore.h" +#include "shading.h" +#include "strand.h" +#include "texture.h" +#include "volumetric.h" +#include "zbuf.h" + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +/* Shade Sample order: + * + * - shade_samples_fill_with_ps() + * - for each sample + * - shade_input_set_triangle() <- if prev sample-face is same, use shade_input_copy_triangle() + * - if vlr + * - shade_input_set_viewco() <- not for ray or bake + * - shade_input_set_uv() <- not for ray or bake + * - shade_input_set_normals() + * - shade_samples() + * - if AO + * - shade_samples_do_AO() + * - if shading happens + * - for each sample + * - shade_input_set_shade_texco() + * - shade_samples_do_shade() + * - OSA: distribute sample result with filter masking + * + */ + +/* initialize material variables in shadeinput, + * doing inverse gamma correction where applicable */ +void shade_input_init_material(ShadeInput *shi) +{ + /* note, keep this synced with render_types.h */ + memcpy(&shi->r, &shi->mat->r, 23 * sizeof(float)); + shi->har = shi->mat->har; +} + +/* also used as callback for nodes */ +/* delivers a fully filled in ShadeResult, for all passes */ +void shade_material_loop(ShadeInput *shi, ShadeResult *shr) +{ + + shade_lamp_loop(shi, shr); /* clears shr */ + + if (shi->translucency != 0.0f) { + ShadeResult shr_t; + float fac = shi->translucency; + + shade_input_init_material(shi); + negate_v3_v3(shi->vn, shi->vno); + negate_v3(shi->facenor); + shi->depth++; /* hack to get real shadow now */ + shade_lamp_loop(shi, &shr_t); + shi->depth--; + + /* a couple of passes */ + madd_v3_v3fl(shr->combined, shr_t.combined, fac); + if (shi->passflag & SCE_PASS_SPEC) + madd_v3_v3fl(shr->spec, shr_t.spec, fac); + if (shi->passflag & SCE_PASS_DIFFUSE) { + madd_v3_v3fl(shr->diff, shr_t.diff, fac); + madd_v3_v3fl(shr->diffshad, shr_t.diffshad, fac); + } + if (shi->passflag & SCE_PASS_SHADOW) + madd_v3_v3fl(shr->shad, shr_t.shad, fac); + + negate_v3(shi->vn); + negate_v3(shi->facenor); + } + + /* depth >= 1 when ray-shading */ + if (shi->depth == 0 || shi->volume_depth > 0) { + if (R.r.mode & R_RAYTRACE) { + if (shi->ray_mirror != 0.0f || ((shi->mode & MA_TRANSP) && (shi->mode & MA_RAYTRANSP) && shr->alpha != 1.0f)) { + /* ray trace works on combined, but gives pass info */ + ray_trace(shi, shr); + } + } + /* disable adding of sky for raytransp */ + if ((shi->mode & MA_TRANSP) && (shi->mode & MA_RAYTRANSP)) + if ((shi->layflag & SCE_LAY_SKY) && (R.r.alphamode == R_ADDSKY)) + shr->alpha = 1.0f; + } + + if (R.r.mode & R_RAYTRACE) { + if (R.render_volumes_inside.first) + shade_volume_inside(shi, shr); + } +} + + +/* do a shade, finish up some passes, apply mist */ +void shade_input_do_shade(ShadeInput *shi, ShadeResult *shr) +{ + bool compat = false; + float alpha; + + /* ------ main shading loop -------- */ +#ifdef RE_RAYCOUNTER + memset(&shi->raycounter, 0, sizeof(shi->raycounter)); +#endif + + if (shi->mat->nodetree && shi->mat->use_nodes) { + compat = ntreeShaderExecTree(shi->mat->nodetree, shi, shr); + } + + /* also run this when node shaders fail, due to incompatible shader nodes */ + if (compat == false) { + /* copy all relevant material vars, note, keep this synced with render_types.h */ + shade_input_init_material(shi); + + if (shi->mat->material_type == MA_TYPE_VOLUME) { + if (R.r.mode & R_RAYTRACE) { + shade_volume_outside(shi, shr); + } + } + else { /* MA_TYPE_SURFACE, MA_TYPE_WIRE */ + shade_material_loop(shi, shr); + } + } + + /* copy additional passes */ + if (shi->passflag & (SCE_PASS_VECTOR | SCE_PASS_NORMAL)) { + copy_v4_v4(shr->winspeed, shi->winspeed); + copy_v3_v3(shr->nor, shi->vn); + } + + /* MIST */ + if ((shi->passflag & SCE_PASS_MIST) || ((R.wrld.mode & WO_MIST) && (shi->mat->mode & MA_NOMIST) == 0)) { + if (R.r.mode & R_ORTHO) + shr->mist = mistfactor(-shi->co[2], shi->co); + else + shr->mist = mistfactor(len_v3(shi->co), shi->co); + } + else shr->mist = 0.0f; + + if ((R.wrld.mode & WO_MIST) && (shi->mat->mode & MA_NOMIST) == 0) { + alpha = shr->mist; + } + else alpha = 1.0f; + + /* add mist and premul color */ + if (shr->alpha != 1.0f || alpha != 1.0f) { + float fac = alpha * (shr->alpha); + shr->combined[3] = fac; + + if (shi->mat->material_type != MA_TYPE_VOLUME) + mul_v3_fl(shr->combined, fac); + } + else + shr->combined[3] = 1.0f; + + /* add z */ + shr->z = -shi->co[2]; + + /* RAYHITS */ +#if 0 + if (1 || shi->passflag & SCE_PASS_RAYHITS) { + shr->rayhits[0] = (float)shi->raycounter.faces.test; + shr->rayhits[1] = (float)shi->raycounter.bb.hit; + shr->rayhits[2] = 0.0; + shr->rayhits[3] = 1.0; + } +#endif + + RE_RC_MERGE(&re_rc_counter[shi->thread], &shi->raycounter); +} + +/* **************************************************************************** */ +/* ShadeInput */ +/* **************************************************************************** */ + + +void vlr_set_uv_indices(VlakRen *vlr, int *i1, int *i2, int *i3) +{ + /* to prevent storing new tfaces or vcols, we check a split runtime */ + /* 4---3 4---3 */ + /* |\ 1| or |1 /| */ + /* |0\ | |/ 0| */ + /* 1---2 1---2 0 = orig face, 1 = new face */ + + /* Update vert nums to point to correct verts of original face */ + if (vlr->flag & R_DIVIDE_24) { + if (vlr->flag & R_FACE_SPLIT) { + (*i1)++; (*i2)++; (*i3)++; + } + else { + (*i3)++; + } + } + else if (vlr->flag & R_FACE_SPLIT) { + (*i2)++; (*i3)++; + } +} + +/* copy data from face to ShadeInput, general case */ +/* indices 0 1 2 3 only */ +void shade_input_set_triangle_i(ShadeInput *shi, ObjectInstanceRen *obi, VlakRen *vlr, short i1, short i2, short i3) +{ + VertRen **vpp = &vlr->v1; + + shi->vlr = vlr; + shi->obi = obi; + shi->obr = obi->obr; + + shi->v1 = vpp[i1]; + shi->v2 = vpp[i2]; + shi->v3 = vpp[i3]; + + shi->i1 = i1; + shi->i2 = i2; + shi->i3 = i3; + + /* note, shi->mat is set in node shaders */ + shi->mat = shi->mat_override ? shi->mat_override : vlr->mat; + + shi->osatex = (shi->mat->texco & TEXCO_OSA); + shi->mode = shi->mat->mode_l; /* or-ed result for all nodes */ + shi->mode2 = shi->mat->mode2_l; + + /* facenormal copy, can get flipped */ + shi->flippednor = 0; + RE_vlakren_get_normal(&R, obi, vlr, shi->facenor); + + /* calculate vertexnormals */ + if (vlr->flag & R_SMOOTH) { + copy_v3_v3(shi->n1, shi->v1->n); + copy_v3_v3(shi->n2, shi->v2->n); + copy_v3_v3(shi->n3, shi->v3->n); + + if (obi->flag & R_TRANSFORMED) { + mul_m3_v3(obi->nmat, shi->n1); normalize_v3(shi->n1); + mul_m3_v3(obi->nmat, shi->n2); normalize_v3(shi->n2); + mul_m3_v3(obi->nmat, shi->n3); normalize_v3(shi->n3); + } + } +} + +/* copy data from face to ShadeInput, scanline case */ +void shade_input_set_triangle(ShadeInput *shi, int obi, int facenr, int UNUSED(normal_flip)) +{ + if (facenr > 0) { + shi->obi = &R.objectinstance[obi]; + shi->obr = shi->obi->obr; + shi->facenr = (facenr - 1) & RE_QUAD_MASK; + if (shi->facenr < shi->obr->totvlak) { + VlakRen *vlr = RE_findOrAddVlak(shi->obr, shi->facenr); + + if (facenr & RE_QUAD_OFFS) + shade_input_set_triangle_i(shi, shi->obi, vlr, 0, 2, 3); + else + shade_input_set_triangle_i(shi, shi->obi, vlr, 0, 1, 2); + } + else + shi->vlr = NULL; /* general signal we got sky */ + } + else + shi->vlr = NULL; /* general signal we got sky */ +} + +/* full osa case: copy static info */ +void shade_input_copy_triangle(ShadeInput *shi, ShadeInput *from) +{ + /* not so nice, but works... warning is in RE_shader_ext.h */ + memcpy(shi, from, sizeof(struct ShadeInputCopy)); +} + +/* copy data from strand to shadeinput */ +void shade_input_set_strand(ShadeInput *shi, StrandRen *strand, StrandPoint *spoint) +{ + /* note, shi->mat is set in node shaders */ + shi->mat = shi->mat_override ? shi->mat_override : strand->buffer->ma; + + shi->osatex = (shi->mat->texco & TEXCO_OSA); + shi->mode = shi->mat->mode_l; /* or-ed result for all nodes */ + + /* shade_input_set_viewco equivalent */ + copy_v3_v3(shi->co, spoint->co); + copy_v3_v3(shi->view, shi->co); + normalize_v3(shi->view); + + shi->xs = (int)spoint->x; + shi->ys = (int)spoint->y; + + if (shi->osatex || (R.r.mode & R_SHADOW)) { + copy_v3_v3(shi->dxco, spoint->dtco); + copy_v3_v3(shi->dyco, spoint->dsco); + } + + /* dxview, dyview, not supported */ + + /* facenormal, simply viewco flipped */ + copy_v3_v3(shi->facenor, spoint->nor); + + /* shade_input_set_normals equivalent */ + if (shi->mat->mode & MA_TANGENT_STR) { + copy_v3_v3(shi->vn, spoint->tan); + } + else { + float cross[3]; + + cross_v3_v3v3(cross, spoint->co, spoint->tan); + cross_v3_v3v3(shi->vn, cross, spoint->tan); + normalize_v3(shi->vn); + + if (dot_v3v3(shi->vn, shi->view) < 0.0f) + negate_v3(shi->vn); + } + + copy_v3_v3(shi->vno, shi->vn); +} + +void shade_input_set_strand_texco(ShadeInput *shi, StrandRen *strand, StrandVert *svert, StrandPoint *spoint) +{ + StrandBuffer *strandbuf = strand->buffer; + ObjectRen *obr = strandbuf->obr; + StrandVert *sv; + int mode = shi->mode; /* or-ed result for all nodes */ + short texco = shi->mat->texco; + + if ((shi->mat->texco & TEXCO_REFL)) { + /* shi->dxview, shi->dyview, not supported */ + } + + if (shi->osatex && (texco & (TEXCO_NORM | TEXCO_REFL))) { + /* not supported */ + } + + if (mode & (MA_TANGENT_V | MA_NORMAP_TANG)) { + copy_v3_v3(shi->tang, spoint->tan); + copy_v3_v3(shi->nmaptang, spoint->tan); + } + + if (mode & MA_STR_SURFDIFF) { + const float *surfnor = RE_strandren_get_surfnor(obr, strand, 0); + + if (surfnor) + copy_v3_v3(shi->surfnor, surfnor); + else + copy_v3_v3(shi->surfnor, shi->vn); + + if (shi->mat->strand_surfnor > 0.0f) { + shi->surfdist = 0.0f; + for (sv = strand->vert; sv != svert; sv++) + shi->surfdist += len_v3v3(sv->co, (sv + 1)->co); + shi->surfdist += spoint->t * len_v3v3(sv->co, (sv + 1)->co); + } + } + + if (R.r.mode & R_SPEED) { + const float *speed; + + speed = RE_strandren_get_winspeed(shi->obi, strand, 0); + if (speed) + copy_v4_v4(shi->winspeed, speed); + else + shi->winspeed[0] = shi->winspeed[1] = shi->winspeed[2] = shi->winspeed[3] = 0.0f; + } + + /* shade_input_set_shade_texco equivalent */ + if (texco & NEED_UV) { + if (texco & TEXCO_ORCO) { + copy_v3_v3(shi->lo, strand->orco); + /* no shi->osatex, orco derivatives are zero */ + } + + if (texco & TEXCO_GLOB) { + mul_v3_m4v3(shi->gl, R.viewinv, shi->co); + + if (shi->osatex) { + mul_v3_mat3_m4v3(shi->dxgl, R.viewinv, shi->dxco); + mul_v3_mat3_m4v3(shi->dygl, R.viewinv, shi->dyco); + } + } + + if (texco & TEXCO_STRAND) { + shi->strandco = spoint->strandco; + + if (shi->osatex) { + shi->dxstrand = spoint->dtstrandco; + shi->dystrand = 0.0f; + } + } + + if ((texco & TEXCO_UV) || (mode & (MA_VERTEXCOL | MA_VERTEXCOLP | MA_FACETEXTURE))) { + MCol *mcol; + const float *uv; + char *name; + int i; + + shi->totuv = 0; + shi->totcol = 0; + shi->actuv = obr->actmtface; + shi->actcol = obr->actmcol; + + if (mode & (MA_VERTEXCOL | MA_VERTEXCOLP)) { + for (i = 0; (mcol = RE_strandren_get_mcol(obr, strand, i, &name, 0)); i++) { + ShadeInputCol *scol = &shi->col[i]; + const char *cp = (char *)mcol; + + shi->totcol++; + scol->name = name; + + scol->col[0] = cp[3] / 255.0f; + scol->col[1] = cp[2] / 255.0f; + scol->col[2] = cp[1] / 255.0f; + scol->col[3] = cp[0] / 255.0f; + } + + if (shi->totcol) { + shi->vcol[0] = shi->col[shi->actcol].col[0]; + shi->vcol[1] = shi->col[shi->actcol].col[1]; + shi->vcol[2] = shi->col[shi->actcol].col[2]; + shi->vcol[3] = shi->col[shi->actcol].col[3]; + } + else { + shi->vcol[0] = 0.0f; + shi->vcol[1] = 0.0f; + shi->vcol[2] = 0.0f; + shi->vcol[3] = 0.0f; + } + } + + for (i = 0; (uv = RE_strandren_get_uv(obr, strand, i, &name, 0)); i++) { + ShadeInputUV *suv = &shi->uv[i]; + + shi->totuv++; + suv->name = name; + + if (strandbuf->overrideuv == i) { + suv->uv[0] = -1.0f; + suv->uv[1] = spoint->strandco; + suv->uv[2] = 0.0f; + } + else { + suv->uv[0] = -1.0f + 2.0f * uv[0]; + suv->uv[1] = -1.0f + 2.0f * uv[1]; + suv->uv[2] = 0.0f; /* texture.c assumes there are 3 coords */ + } + + if (shi->osatex) { + suv->dxuv[0] = 0.0f; + suv->dxuv[1] = 0.0f; + suv->dyuv[0] = 0.0f; + suv->dyuv[1] = 0.0f; + } + + if ((mode & MA_FACETEXTURE) && i == obr->actmtface) { + if ((mode & (MA_VERTEXCOL | MA_VERTEXCOLP)) == 0) { + shi->vcol[0] = 1.0f; + shi->vcol[1] = 1.0f; + shi->vcol[2] = 1.0f; + shi->vcol[3] = 1.0f; + } + } + } + + if (shi->totuv == 0) { + ShadeInputUV *suv = &shi->uv[0]; + + suv->uv[0] = 0.0f; + suv->uv[1] = spoint->strandco; + suv->uv[2] = 0.0f; /* texture.c assumes there are 3 coords */ + + if (mode & MA_FACETEXTURE) { + /* no tface? set at 1.0f */ + shi->vcol[0] = 1.0f; + shi->vcol[1] = 1.0f; + shi->vcol[2] = 1.0f; + shi->vcol[3] = 1.0f; + } + } + + } + + if (texco & TEXCO_NORM) { + shi->orn[0] = -shi->vn[0]; + shi->orn[1] = -shi->vn[1]; + shi->orn[2] = -shi->vn[2]; + } + + if (texco & TEXCO_STRESS) { + /* not supported */ + } + + if (texco & TEXCO_TANGENT) { + if ((mode & MA_TANGENT_V) == 0) { + /* just prevent surprises */ + shi->tang[0] = shi->tang[1] = shi->tang[2] = 0.0f; + shi->nmaptang[0] = shi->nmaptang[1] = shi->nmaptang[2] = 0.0f; + } + } + } + + /* this only avalailable for scanline renders */ + if (shi->depth == 0) { + if (texco & TEXCO_WINDOW) { + shi->winco[0] = -1.0f + 2.0f * spoint->x / (float)R.winx; + shi->winco[1] = -1.0f + 2.0f * spoint->y / (float)R.winy; + shi->winco[2] = 0.0f; + + /* not supported */ + if (shi->osatex) { + shi->dxwin[0] = 0.0f; + shi->dywin[1] = 0.0f; + shi->dxwin[0] = 0.0f; + shi->dywin[1] = 0.0f; + } + } + } + + if (shi->do_manage) { + if (mode & (MA_VERTEXCOL | MA_VERTEXCOLP | MA_FACETEXTURE)) { + srgb_to_linearrgb_v3_v3(shi->vcol, shi->vcol); + } + } + +} + +/* from scanline pixel coordinates to 3d coordinates, requires set_triangle */ +void shade_input_calc_viewco(ShadeInput *shi, float x, float y, float z, float view[3], float dxyview[2], float co[3], float dxco[3], float dyco[3]) +{ + /* returns not normalized, so is in viewplane coords */ + calc_view_vector(view, x, y); + + if (shi->mat->material_type == MA_TYPE_WIRE) { + /* wire cannot use normal for calculating shi->co, so + * we reconstruct the coordinate less accurate */ + if (R.r.mode & R_ORTHO) + calc_renderco_ortho(co, x, y, z); + else + calc_renderco_zbuf(co, view, z); + } + else { + /* for non-wire, intersect with the triangle to get the exact coord */ + float fac, dface, v1[3]; + + copy_v3_v3(v1, shi->v1->co); + if (shi->obi->flag & R_TRANSFORMED) + mul_m4_v3(shi->obi->mat, v1); + + dface = dot_v3v3(v1, shi->facenor); + + /* ortho viewplane cannot intersect using view vector originating in (0,0,0) */ + if (R.r.mode & R_ORTHO) { + /* x and y 3d coordinate can be derived from pixel coord and winmat */ + float fx = 2.0f / (R.winx * R.winmat[0][0]); + float fy = 2.0f / (R.winy * R.winmat[1][1]); + + co[0] = (x - 0.5f * R.winx) * fx - R.winmat[3][0] / R.winmat[0][0]; + co[1] = (y - 0.5f * R.winy) * fy - R.winmat[3][1] / R.winmat[1][1]; + + /* using a*x + b*y + c*z = d equation, (a b c) is normal */ + if (shi->facenor[2] != 0.0f) + co[2] = (dface - shi->facenor[0] * co[0] - shi->facenor[1] * co[1]) / shi->facenor[2]; + else + co[2] = 0.0f; + + if (dxco && dyco) { + dxco[0] = fx; + dxco[1] = 0.0f; + if (shi->facenor[2] != 0.0f) + dxco[2] = -(shi->facenor[0] * fx) / shi->facenor[2]; + else + dxco[2] = 0.0f; + + dyco[0] = 0.0f; + dyco[1] = fy; + if (shi->facenor[2] != 0.0f) + dyco[2] = -(shi->facenor[1] * fy) / shi->facenor[2]; + else + dyco[2] = 0.0f; + + if (dxyview) { + fac = (co[2] != 0.0f) ? (1.0f / co[2]) : 0.0f; + dxyview[0] = -R.viewdx * fac; + dxyview[1] = -R.viewdy * fac; + } + } + } + else { + float div; + + div = dot_v3v3(shi->facenor, view); + if (div != 0.0f) fac = dface / div; + else fac = 0.0f; + + co[0] = fac * view[0]; + co[1] = fac * view[1]; + co[2] = fac * view[2]; + + /* pixel dx/dy for render coord */ + if (dxco && dyco) { + float u = dface / (div - R.viewdx * shi->facenor[0]); + float v = dface / (div - R.viewdy * shi->facenor[1]); + + dxco[0] = co[0] - (view[0] - R.viewdx) * u; + dxco[1] = co[1] - (view[1]) * u; + dxco[2] = co[2] - (view[2]) * u; + + dyco[0] = co[0] - (view[0]) * v; + dyco[1] = co[1] - (view[1] - R.viewdy) * v; + dyco[2] = co[2] - (view[2]) * v; + + if (dxyview) { + if (fac != 0.0f) fac = 1.0f / fac; + dxyview[0] = -R.viewdx * fac; + dxyview[1] = -R.viewdy * fac; + } + } + } + } + + /* set camera coords - for scanline, it's always 0.0,0.0,0.0 (render is in camera space) + * however for raytrace it can be different - the position of the last intersection */ + shi->camera_co[0] = shi->camera_co[1] = shi->camera_co[2] = 0.0f; + + /* cannot normalize earlier, code above needs it at viewplane level */ + normalize_v3(view); +} + +/* from scanline pixel coordinates to 3d coordinates, requires set_triangle */ +void shade_input_set_viewco(ShadeInput *shi, float x, float y, float xs, float ys, float z) +{ + float *dxyview = NULL, *dxco = NULL, *dyco = NULL; + + /* currently in use for dithering (soft shadow), node preview, irregular shad */ + shi->xs = (int)xs; + shi->ys = (int)ys; + + /* original scanline coordinate without jitter */ + shi->scanco[0] = x; + shi->scanco[1] = y; + shi->scanco[2] = z; + + /* check if we need derivatives */ + if (shi->osatex || (R.r.mode & R_SHADOW)) { + dxco = shi->dxco; + dyco = shi->dyco; + + if ((shi->mat->texco & TEXCO_REFL)) + dxyview = &shi->dxview; + } + + shade_input_calc_viewco(shi, xs, ys, z, shi->view, dxyview, shi->co, dxco, dyco); +} + +void barycentric_differentials_from_position( + const float co[3], const float v1[3], const float v2[3], const float v3[3], + const float dxco[3], const float dyco[3], const float facenor[3], const bool differentials, + float *u, float *v, float *dx_u, float *dx_v, float *dy_u, float *dy_v) +{ + /* find most stable axis to project */ + int axis1, axis2; + axis_dominant_v3(&axis1, &axis2, facenor); + + /* compute u,v and derivatives */ + float t00 = v3[axis1] - v1[axis1]; + float t01 = v3[axis2] - v1[axis2]; + float t10 = v3[axis1] - v2[axis1]; + float t11 = v3[axis2] - v2[axis2]; + + float detsh = (t00 * t11 - t10 * t01); + detsh = (detsh != 0.0f) ? 1.0f / detsh : 0.0f; + t00 *= detsh; t01 *= detsh; + t10 *= detsh; t11 *= detsh; + + *u = (v3[axis1] - co[axis1]) * t11 - (v3[axis2] - co[axis2]) * t10; + *v = (v3[axis2] - co[axis2]) * t00 - (v3[axis1] - co[axis1]) * t01; + if (differentials) { + *dx_u = dxco[axis1] * t11 - dxco[axis2] * t10; + *dx_v = dxco[axis2] * t00 - dxco[axis1] * t01; + *dy_u = dyco[axis1] * t11 - dyco[axis2] * t10; + *dy_v = dyco[axis2] * t00 - dyco[axis1] * t01; + } +} +/* calculate U and V, for scanline (silly render face u and v are in range -1 to 0) */ +void shade_input_set_uv(ShadeInput *shi) +{ + VlakRen *vlr = shi->vlr; + + if ((vlr->flag & R_SMOOTH) || (shi->mat->texco & NEED_UV) || (shi->passflag & SCE_PASS_UV)) { + float v1[3], v2[3], v3[3]; + + copy_v3_v3(v1, shi->v1->co); + copy_v3_v3(v2, shi->v2->co); + copy_v3_v3(v3, shi->v3->co); + + if (shi->obi->flag & R_TRANSFORMED) { + mul_m4_v3(shi->obi->mat, v1); + mul_m4_v3(shi->obi->mat, v2); + mul_m4_v3(shi->obi->mat, v3); + } + + /* exception case for wire render of edge */ + if (vlr->v2 == vlr->v3) { + float lend, lenc; + + lend = len_v3v3(v2, v1); + lenc = len_v3v3(shi->co, v1); + + if (lend == 0.0f) { + shi->u = shi->v = 0.0f; + } + else { + shi->u = -(1.0f - lenc / lend); + shi->v = 0.0f; + } + + if (shi->osatex) { + shi->dx_u = 0.0f; + shi->dx_v = 0.0f; + shi->dy_u = 0.0f; + shi->dy_v = 0.0f; + } + } + else { + barycentric_differentials_from_position( + shi->co, v1, v2, v3, shi->dxco, shi->dyco, shi->facenor, shi->osatex, + &shi->u, &shi->v, &shi->dx_u, &shi->dx_v, &shi->dy_u, &shi->dy_v); + + shi->u = -shi->u; + shi->v = -shi->v; + + /* u and v are in range -1 to 0, we allow a little bit extra but not too much, screws up speedvectors */ + CLAMP(shi->u, -2.0f, 1.0f); + CLAMP(shi->v, -2.0f, 1.0f); + } + } +} + +void shade_input_set_normals(ShadeInput *shi) +{ + float u = shi->u, v = shi->v; + float l = 1.0f + u + v; + + shi->flippednor = 0; + + /* test flip normals to viewing direction */ + if (!(shi->vlr->flag & R_TANGENT)) { + if (dot_v3v3(shi->facenor, shi->view) < 0.0f) { + negate_v3(shi->facenor); + shi->flippednor = 1; + } + } + + /* calculate vertexnormals */ + if (shi->vlr->flag & R_SMOOTH) { + float *n1 = shi->n1, *n2 = shi->n2, *n3 = shi->n3; + + if (shi->flippednor) { + negate_v3(n1); + negate_v3(n2); + negate_v3(n3); + } + + shi->vn[0] = l * n3[0] - u * n1[0] - v * n2[0]; + shi->vn[1] = l * n3[1] - u * n1[1] - v * n2[1]; + shi->vn[2] = l * n3[2] - u * n1[2] - v * n2[2]; + + /* use unnormalized normal (closer to games) */ + copy_v3_v3(shi->nmapnorm, shi->vn); + + normalize_v3(shi->vn); + } + else { + copy_v3_v3(shi->vn, shi->facenor); + copy_v3_v3(shi->nmapnorm, shi->vn); + } + + /* used in nodes */ + copy_v3_v3(shi->vno, shi->vn); + + /* flip normals to viewing direction */ + if (!(shi->vlr->flag & R_TANGENT)) + if (dot_v3v3(shi->facenor, shi->view) < 0.0f) + shade_input_flip_normals(shi); +} + +/* XXX shi->flippednor messes up otherwise */ +void shade_input_set_vertex_normals(ShadeInput *shi) +{ + float u = shi->u, v = shi->v; + float l = 1.0f + u + v; + + /* calculate vertexnormals */ + if (shi->vlr->flag & R_SMOOTH) { + const float *n1 = shi->n1, *n2 = shi->n2, *n3 = shi->n3; + + shi->vn[0] = l * n3[0] - u * n1[0] - v * n2[0]; + shi->vn[1] = l * n3[1] - u * n1[1] - v * n2[1]; + shi->vn[2] = l * n3[2] - u * n1[2] - v * n2[2]; + + /* use unnormalized normal (closer to games) */ + copy_v3_v3(shi->nmapnorm, shi->vn); + + normalize_v3(shi->vn); + } + else { + copy_v3_v3(shi->vn, shi->facenor); + copy_v3_v3(shi->nmapnorm, shi->vn); + } + + /* used in nodes */ + copy_v3_v3(shi->vno, shi->vn); +} + + +/* use by raytrace, sss, bake to flip into the right direction */ +void shade_input_flip_normals(ShadeInput *shi) +{ + negate_v3(shi->facenor); + negate_v3(shi->vn); + negate_v3(shi->vno); + negate_v3(shi->nmapnorm); + shi->flippednor = !shi->flippednor; +} + +void shade_input_set_shade_texco(ShadeInput *shi) +{ + ObjectInstanceRen *obi = shi->obi; + ObjectRen *obr = shi->obr; + VertRen *v1 = shi->v1, *v2 = shi->v2, *v3 = shi->v3; + float u = shi->u, v = shi->v; + float l = 1.0f + u + v, dl; + int mode = shi->mode; /* or-ed result for all nodes */ + int mode2 = shi->mode2; + short texco = shi->mat->texco; + const bool need_mikk_tangent = (mode & MA_NORMAP_TANG || R.flag & R_NEED_TANGENT); + const bool need_mikk_tangent_concrete = (mode2 & MA_TANGENT_CONCRETE) != 0; + + /* calculate dxno */ + if (shi->vlr->flag & R_SMOOTH) { + + if (shi->osatex && (texco & (TEXCO_NORM | TEXCO_REFL)) ) { + const float *n1 = shi->n1, *n2 = shi->n2, *n3 = shi->n3; + + dl = shi->dx_u + shi->dx_v; + shi->dxno[0] = dl * n3[0] - shi->dx_u * n1[0] - shi->dx_v * n2[0]; + shi->dxno[1] = dl * n3[1] - shi->dx_u * n1[1] - shi->dx_v * n2[1]; + shi->dxno[2] = dl * n3[2] - shi->dx_u * n1[2] - shi->dx_v * n2[2]; + dl = shi->dy_u + shi->dy_v; + shi->dyno[0] = dl * n3[0] - shi->dy_u * n1[0] - shi->dy_v * n2[0]; + shi->dyno[1] = dl * n3[1] - shi->dy_u * n1[1] - shi->dy_v * n2[1]; + shi->dyno[2] = dl * n3[2] - shi->dy_u * n1[2] - shi->dy_v * n2[2]; + + } + } + + /* calc tangents */ + if (mode & (MA_TANGENT_V | MA_NORMAP_TANG) || mode2 & MA_TANGENT_CONCRETE || R.flag & R_NEED_TANGENT) { + const float *s1, *s2, *s3; + float tl, tu, tv; + + if (shi->vlr->flag & R_SMOOTH) { + tl = l; + tu = u; + tv = v; + } + else { + /* qdn: flat faces have tangents too, + * could pick either one, using average here */ + tl = 1.0f / 3.0f; + tu = -1.0f / 3.0f; + tv = -1.0f / 3.0f; + } + + shi->tang[0] = shi->tang[1] = shi->tang[2] = 0.0f; + shi->nmaptang[0] = shi->nmaptang[1] = shi->nmaptang[2] = 0.0f; + + if (mode & MA_TANGENT_V) { + s1 = RE_vertren_get_tangent(obr, v1, 0); + s2 = RE_vertren_get_tangent(obr, v2, 0); + s3 = RE_vertren_get_tangent(obr, v3, 0); + + if (s1 && s2 && s3) { + shi->tang[0] = (tl * s3[0] - tu * s1[0] - tv * s2[0]); + shi->tang[1] = (tl * s3[1] - tu * s1[1] - tv * s2[1]); + shi->tang[2] = (tl * s3[2] - tu * s1[2] - tv * s2[2]); + + if (obi->flag & R_TRANSFORMED) + mul_m3_v3(obi->nmat, shi->tang); + + normalize_v3(shi->tang); + copy_v3_v3(shi->nmaptang, shi->tang); + } + } + + if (need_mikk_tangent || need_mikk_tangent_concrete) { + int j1 = shi->i1, j2 = shi->i2, j3 = shi->i3; + float c0[3], c1[3], c2[3]; + int acttang = obr->actmtface; + + vlr_set_uv_indices(shi->vlr, &j1, &j2, &j3); + + /* cycle through all tangent in vlakren */ + for (int i = 0; i < MAX_MTFACE; i++) { + const float *tangent = RE_vlakren_get_nmap_tangent(obr, shi->vlr, i, false); + if (!tangent) + continue; + + copy_v3_v3(c0, &tangent[j1 * 4]); + copy_v3_v3(c1, &tangent[j2 * 4]); + copy_v3_v3(c2, &tangent[j3 * 4]); + + /* keeping tangents normalized at vertex level + * corresponds better to how it's done in game engines */ + if (obi->flag & R_TRANSFORMED) { + mul_mat3_m4_v3(obi->mat, c0); normalize_v3(c0); + mul_mat3_m4_v3(obi->mat, c1); normalize_v3(c1); + mul_mat3_m4_v3(obi->mat, c2); normalize_v3(c2); + } + + /* we don't normalize the interpolated TBN tangent + * corresponds better to how it's done in game engines */ + shi->tangents[i][0] = (tl * c2[0] - tu * c0[0] - tv * c1[0]); + shi->tangents[i][1] = (tl * c2[1] - tu * c0[1] - tv * c1[1]); + shi->tangents[i][2] = (tl * c2[2] - tu * c0[2] - tv * c1[2]); + + /* the sign is the same for all 3 vertices of any + * non degenerate triangle. */ + shi->tangents[i][3] = tangent[j1 * 4 + 3]; + + if (acttang == i && need_mikk_tangent) { + for (int m = 0; m < 4; m++) { + shi->nmaptang[m] = shi->tangents[i][m]; + } + } + } + } + } + + if (mode & MA_STR_SURFDIFF) { + const float *surfnor = RE_vlakren_get_surfnor(obr, shi->vlr, 0); + + if (surfnor) { + copy_v3_v3(shi->surfnor, surfnor); + if (obi->flag & R_TRANSFORMED) + mul_m3_v3(obi->nmat, shi->surfnor); + } + else + copy_v3_v3(shi->surfnor, shi->vn); + + shi->surfdist = 0.0f; + } + + if (R.r.mode & R_SPEED) { + const float *s1, *s2, *s3; + + s1 = RE_vertren_get_winspeed(obi, v1, 0); + s2 = RE_vertren_get_winspeed(obi, v2, 0); + s3 = RE_vertren_get_winspeed(obi, v3, 0); + if (s1 && s2 && s3) { + shi->winspeed[0] = (l * s3[0] - u * s1[0] - v * s2[0]); + shi->winspeed[1] = (l * s3[1] - u * s1[1] - v * s2[1]); + shi->winspeed[2] = (l * s3[2] - u * s1[2] - v * s2[2]); + shi->winspeed[3] = (l * s3[3] - u * s1[3] - v * s2[3]); + } + else { + shi->winspeed[0] = shi->winspeed[1] = shi->winspeed[2] = shi->winspeed[3] = 0.0f; + } + } + + /* pass option forces UV calc */ + if ((shi->passflag & SCE_PASS_UV) || (R.flag & R_NEED_VCOL)) + texco |= (NEED_UV | TEXCO_UV); + + /* texture coordinates. shi->dxuv shi->dyuv have been set */ + if (texco & NEED_UV) { + + if (texco & TEXCO_ORCO) { + if (v1->orco) { + const float *o1, *o2, *o3; + + o1 = v1->orco; + o2 = v2->orco; + o3 = v3->orco; + + shi->lo[0] = l * o3[0] - u * o1[0] - v * o2[0]; + shi->lo[1] = l * o3[1] - u * o1[1] - v * o2[1]; + shi->lo[2] = l * o3[2] - u * o1[2] - v * o2[2]; + + if (shi->osatex) { + dl = shi->dx_u + shi->dx_v; + shi->dxlo[0] = dl * o3[0] - shi->dx_u * o1[0] - shi->dx_v * o2[0]; + shi->dxlo[1] = dl * o3[1] - shi->dx_u * o1[1] - shi->dx_v * o2[1]; + shi->dxlo[2] = dl * o3[2] - shi->dx_u * o1[2] - shi->dx_v * o2[2]; + dl = shi->dy_u + shi->dy_v; + shi->dylo[0] = dl * o3[0] - shi->dy_u * o1[0] - shi->dy_v * o2[0]; + shi->dylo[1] = dl * o3[1] - shi->dy_u * o1[1] - shi->dy_v * o2[1]; + shi->dylo[2] = dl * o3[2] - shi->dy_u * o1[2] - shi->dy_v * o2[2]; + } + } + + copy_v3_v3(shi->duplilo, obi->dupliorco); + } + + if (texco & TEXCO_GLOB) { + copy_v3_v3(shi->gl, shi->co); + mul_m4_v3(R.viewinv, shi->gl); + if (shi->osatex) { + copy_v3_v3(shi->dxgl, shi->dxco); + mul_mat3_m4_v3(R.viewinv, shi->dxgl); + copy_v3_v3(shi->dygl, shi->dyco); + mul_mat3_m4_v3(R.viewinv, shi->dygl); + } + } + + if (texco & TEXCO_STRAND) { + shi->strandco = (l * v3->accum - u * v1->accum - v * v2->accum); + if (shi->osatex) { + dl = shi->dx_u + shi->dx_v; + shi->dxstrand = dl * v3->accum - shi->dx_u * v1->accum - shi->dx_v * v2->accum; + dl = shi->dy_u + shi->dy_v; + shi->dystrand = dl * v3->accum - shi->dy_u * v1->accum - shi->dy_v * v2->accum; + } + } + + if ((texco & TEXCO_UV) || (mode & (MA_VERTEXCOL | MA_VERTEXCOLP | MA_FACETEXTURE)) || (R.flag & R_NEED_VCOL)) { + VlakRen *vlr = shi->vlr; + MTFace *tface; + MCol *mcol; + char *name; + int i, j1 = shi->i1, j2 = shi->i2, j3 = shi->i3; + + /* uv and vcols are not copied on split, so set them according vlr divide flag */ + vlr_set_uv_indices(vlr, &j1, &j2, &j3); + + shi->totuv = 0; + shi->totcol = 0; + shi->actuv = obr->actmtface; + shi->actcol = obr->actmcol; + + if ((mode & (MA_VERTEXCOL | MA_VERTEXCOLP)) || (R.flag & R_NEED_VCOL)) { + for (i = 0; (mcol = RE_vlakren_get_mcol(obr, vlr, i, &name, 0)); i++) { + ShadeInputCol *scol = &shi->col[i]; + const char *cp1, *cp2, *cp3; + float a[3]; + + shi->totcol++; + scol->name = name; + + cp1 = (char *)(mcol + j1); + cp2 = (char *)(mcol + j2); + cp3 = (char *)(mcol + j3); + + /* alpha values */ + a[0] = ((float)cp1[0]) / 255.f; + a[1] = ((float)cp2[0]) / 255.f; + a[2] = ((float)cp3[0]) / 255.f; + scol->col[3] = l * a[2] - u * a[0] - v * a[1]; + + /* sample premultiplied color value */ + scol->col[0] = (l * ((float)cp3[3]) * a[2] - u * ((float)cp1[3]) * a[0] - v * ((float)cp2[3]) * a[1]) / 255.f; + scol->col[1] = (l * ((float)cp3[2]) * a[2] - u * ((float)cp1[2]) * a[0] - v * ((float)cp2[2]) * a[1]) / 255.f; + scol->col[2] = (l * ((float)cp3[1]) * a[2] - u * ((float)cp1[1]) * a[0] - v * ((float)cp2[1]) * a[1]) / 255.f; + + /* if not zero alpha, restore non-multiplied color */ + if (scol->col[3]) { + mul_v3_fl(scol->col, 1.0f / scol->col[3]); + } + } + + if (shi->totcol) { + shi->vcol[0] = shi->col[shi->actcol].col[0]; + shi->vcol[1] = shi->col[shi->actcol].col[1]; + shi->vcol[2] = shi->col[shi->actcol].col[2]; + shi->vcol[3] = shi->col[shi->actcol].col[3]; + } + else { + shi->vcol[0] = 0.0f; + shi->vcol[1] = 0.0f; + shi->vcol[2] = 0.0f; + shi->vcol[3] = 1.0f; + } + } + + for (i = 0; (tface = RE_vlakren_get_tface(obr, vlr, i, &name, 0)); i++) { + ShadeInputUV *suv = &shi->uv[i]; + const float *uv1 = tface->uv[j1]; + const float *uv2 = tface->uv[j2]; + const float *uv3 = tface->uv[j3]; + + shi->totuv++; + suv->name = name; + + if ((shi->mat->mapflag & MA_MAPFLAG_UVPROJECT) && (shi->depth == 0)) { + float x = shi->xs; + float y = shi->ys; + + float s1[2] = {-1.0f + 2.0f * uv1[0], -1.0f + 2.0f * uv1[1]}; + float s2[2] = {-1.0f + 2.0f * uv2[0], -1.0f + 2.0f * uv2[1]}; + float s3[2] = {-1.0f + 2.0f * uv3[0], -1.0f + 2.0f * uv3[1]}; + + + float obwinmat[4][4], winmat[4][4], ho1[4], ho2[4], ho3[4]; + float Zmulx, Zmuly; + float hox, hoy, l_proj, dl_proj, u_proj, v_proj; + float s00, s01, s10, s11, detsh; + + /* old globals, localized now */ + Zmulx = ((float)R.winx) / 2.0f; + Zmuly = ((float)R.winy) / 2.0f; + + zbuf_make_winmat(&R, winmat); + if (shi->obi->flag & R_TRANSFORMED) + mul_m4_m4m4(obwinmat, winmat, obi->mat); + else + copy_m4_m4(obwinmat, winmat); + + zbuf_render_project(obwinmat, v1->co, ho1); + zbuf_render_project(obwinmat, v2->co, ho2); + zbuf_render_project(obwinmat, v3->co, ho3); + + s00 = ho3[0] / ho3[3] - ho1[0] / ho1[3]; + s01 = ho3[1] / ho3[3] - ho1[1] / ho1[3]; + s10 = ho3[0] / ho3[3] - ho2[0] / ho2[3]; + s11 = ho3[1] / ho3[3] - ho2[1] / ho2[3]; + + detsh = s00 * s11 - s10 * s01; + detsh = (detsh != 0.0f) ? 1.0f / detsh : 0.0f; + s00 *= detsh; s01 *= detsh; + s10 *= detsh; s11 *= detsh; + + /* recalc u and v again */ + hox = x / Zmulx - 1.0f; + hoy = y / Zmuly - 1.0f; + u_proj = (hox - ho3[0] / ho3[3]) * s11 - (hoy - ho3[1] / ho3[3]) * s10; + v_proj = (hoy - ho3[1] / ho3[3]) * s00 - (hox - ho3[0] / ho3[3]) * s01; + l_proj = 1.0f + u_proj + v_proj; + + suv->uv[0] = l_proj * s3[0] - u_proj * s1[0] - v_proj * s2[0]; + suv->uv[1] = l_proj * s3[1] - u_proj * s1[1] - v_proj * s2[1]; + suv->uv[2] = 0.0f; + + if (shi->osatex) { + float dxuv[2], dyuv[2]; + dxuv[0] = s11 / Zmulx; + dxuv[1] = -s01 / Zmulx; + dyuv[0] = -s10 / Zmuly; + dyuv[1] = s00 / Zmuly; + + dl_proj = dxuv[0] + dxuv[1]; + suv->dxuv[0] = dl_proj * s3[0] - dxuv[0] * s1[0] - dxuv[1] * s2[0]; + suv->dxuv[1] = dl_proj * s3[1] - dxuv[0] * s1[1] - dxuv[1] * s2[1]; + dl_proj = dyuv[0] + dyuv[1]; + suv->dyuv[0] = dl_proj * s3[0] - dyuv[0] * s1[0] - dyuv[1] * s2[0]; + suv->dyuv[1] = dl_proj * s3[1] - dyuv[0] * s1[1] - dyuv[1] * s2[1]; + } + } + else { + + suv->uv[0] = -1.0f + 2.0f * (l * uv3[0] - u * uv1[0] - v * uv2[0]); + suv->uv[1] = -1.0f + 2.0f * (l * uv3[1] - u * uv1[1] - v * uv2[1]); + suv->uv[2] = 0.0f; /* texture.c assumes there are 3 coords */ + + if (shi->osatex) { + float duv[2]; + + dl = shi->dx_u + shi->dx_v; + duv[0] = shi->dx_u; + duv[1] = shi->dx_v; + + suv->dxuv[0] = 2.0f * (dl * uv3[0] - duv[0] * uv1[0] - duv[1] * uv2[0]); + suv->dxuv[1] = 2.0f * (dl * uv3[1] - duv[0] * uv1[1] - duv[1] * uv2[1]); + + dl = shi->dy_u + shi->dy_v; + duv[0] = shi->dy_u; + duv[1] = shi->dy_v; + + suv->dyuv[0] = 2.0f * (dl * uv3[0] - duv[0] * uv1[0] - duv[1] * uv2[0]); + suv->dyuv[1] = 2.0f * (dl * uv3[1] - duv[0] * uv1[1] - duv[1] * uv2[1]); + } + + if ((mode & MA_FACETEXTURE) && i == obr->actmtface) { + if (((mode & (MA_VERTEXCOL | MA_VERTEXCOLP)) == 0) && ((R.flag & R_NEED_VCOL) == 0)) { + shi->vcol[0] = 1.0f; + shi->vcol[1] = 1.0f; + shi->vcol[2] = 1.0f; + shi->vcol[3] = 1.0f; + } + if (tface->tpage) { + render_realtime_texture(shi, tface->tpage); + } + } + } + } + + shi->dupliuv[0] = -1.0f + 2.0f * obi->dupliuv[0]; + shi->dupliuv[1] = -1.0f + 2.0f * obi->dupliuv[1]; + shi->dupliuv[2] = 0.0f; + + if (shi->totuv == 0) { + ShadeInputUV *suv = &shi->uv[0]; + + suv->uv[0] = 2.0f * (u + .5f); + suv->uv[1] = 2.0f * (v + .5f); + suv->uv[2] = 0.0f; /* texture.c assumes there are 3 coords */ + + if (mode & MA_FACETEXTURE) { + /* no tface? set at 1.0f */ + shi->vcol[0] = 1.0f; + shi->vcol[1] = 1.0f; + shi->vcol[2] = 1.0f; + shi->vcol[3] = 1.0f; + } + } + } + + if (texco & TEXCO_NORM) { + shi->orn[0] = -shi->vn[0]; + shi->orn[1] = -shi->vn[1]; + shi->orn[2] = -shi->vn[2]; + } + + if (texco & TEXCO_STRESS) { + const float *s1, *s2, *s3; + + s1 = RE_vertren_get_stress(obr, v1, 0); + s2 = RE_vertren_get_stress(obr, v2, 0); + s3 = RE_vertren_get_stress(obr, v3, 0); + if (s1 && s2 && s3) { + shi->stress = l * s3[0] - u * s1[0] - v * s2[0]; + if (shi->stress < 1.0f) shi->stress -= 1.0f; + else shi->stress = (shi->stress - 1.0f) / shi->stress; + } + else shi->stress = 0.0f; + } + + if (texco & TEXCO_TANGENT) { + if ((mode & MA_TANGENT_V) == 0) { + /* just prevent surprises */ + shi->tang[0] = shi->tang[1] = shi->tang[2] = 0.0f; + shi->nmaptang[0] = shi->nmaptang[1] = shi->nmaptang[2] = 0.0f; + } + } + } + + /* this only avalailable for scanline renders */ + if (shi->depth == 0) { + float x = shi->xs; + float y = shi->ys; + + if (texco & TEXCO_WINDOW) { + shi->winco[0] = -1.0f + 2.0f * x / (float)R.winx; + shi->winco[1] = -1.0f + 2.0f * y / (float)R.winy; + shi->winco[2] = 0.0f; + if (shi->osatex) { + shi->dxwin[0] = 2.0f / (float)R.winx; + shi->dywin[1] = 2.0f / (float)R.winy; + shi->dxwin[1] = shi->dxwin[2] = 0.0f; + shi->dywin[0] = shi->dywin[2] = 0.0f; + } + } + } + /* else { + * Note! For raytracing winco is not set, + * important because thus means all shader input's need to have their variables set to zero + * else un-initialized values are used + */ + if (shi->do_manage) { + if ((mode & (MA_VERTEXCOL | MA_VERTEXCOLP | MA_FACETEXTURE)) || (R.flag & R_NEED_VCOL)) { + srgb_to_linearrgb_v3_v3(shi->vcol, shi->vcol); + } + } + +} + +/* ****************** ShadeSample ************************************** */ + +/* initialize per part, not per pixel! */ +void shade_input_initialize(ShadeInput *shi, RenderPart *pa, RenderLayer *rl, int sample) +{ + + memset(shi, 0, sizeof(ShadeInput)); + + shi->sample = sample; + shi->thread = pa->thread; + shi->do_preview = (R.r.scemode & R_MATNODE_PREVIEW) != 0; + + shi->do_manage = BKE_scene_check_color_management_enabled(R.scene); + shi->use_world_space_shading = BKE_scene_use_world_space_shading(R.scene); + + shi->lay = rl->lay; + shi->layflag = rl->layflag; + shi->passflag = rl->passflag; + shi->combinedflag = ~rl->pass_xor; + shi->mat_override = rl->mat_override; + shi->light_override = rl->light_override; +// shi->rl= rl; + /* note shi.depth==0 means first hit, not raytracing */ + +} + +/* initialize per part, not per pixel! */ +void shade_sample_initialize(ShadeSample *ssamp, RenderPart *pa, RenderLayer *rl) +{ + int a, tot; + + tot = R.osa == 0 ? 1 : R.osa; + + for (a = 0; a < tot; a++) { + shade_input_initialize(&ssamp->shi[a], pa, rl, a); + memset(&ssamp->shr[a], 0, sizeof(ShadeResult)); + } + + get_sample_layers(pa, rl, ssamp->rlpp); +} + +/* Do AO or (future) GI */ +void shade_samples_do_AO(ShadeSample *ssamp) +{ + if (!(R.r.mode & R_SHADOW)) + return; + if (!(R.r.mode & R_RAYTRACE) && !(R.wrld.ao_gather_method == WO_AOGATHER_APPROX)) + return; + + if (R.wrld.mode & (WO_AMB_OCC | WO_ENV_LIGHT | WO_INDIRECT_LIGHT)) { + ShadeInput *shi = &ssamp->shi[0]; + int sample; + + if (((shi->passflag & SCE_PASS_COMBINED) && (shi->combinedflag & (SCE_PASS_AO | SCE_PASS_ENVIRONMENT | SCE_PASS_INDIRECT))) || + (shi->passflag & (SCE_PASS_AO | SCE_PASS_ENVIRONMENT | SCE_PASS_INDIRECT))) + { + for (sample = 0; sample < ssamp->tot; shi++, sample++) + if (!(shi->mode & MA_SHLESS)) + ambient_occlusion(shi); /* stores in shi->ao[] */ + } + } +} + + +void shade_samples_fill_with_ps(ShadeSample *ssamp, PixStr *ps, int x, int y) +{ + ShadeInput *shi; + float xs, ys; + + ssamp->tot = 0; + + for (shi = ssamp->shi; ps; ps = ps->next) { + shade_input_set_triangle(shi, ps->obi, ps->facenr, 1); + + if (shi->vlr) { /* NULL happens for env material or for 'all z' */ + unsigned short curmask = ps->mask; + + /* full osa is only set for OSA renders */ + if (shi->vlr->flag & R_FULL_OSA) { + short shi_cp = 0, samp; + + for (samp = 0; samp < R.osa; samp++) { + if (curmask & (1 << samp)) { + /* zbuffer has this inverse corrected, ensures xs,ys are inside pixel */ + xs = (float)x + R.jit[samp][0] + 0.5f; + ys = (float)y + R.jit[samp][1] + 0.5f; + + if (shi_cp) + shade_input_copy_triangle(shi, shi - 1); + + shi->mask = (1 << samp); +// shi->rl= ssamp->rlpp[samp]; + shi->samplenr = R.shadowsamplenr[shi->thread]++; /* this counter is not being reset per pixel */ + shade_input_set_viewco(shi, x, y, xs, ys, (float)ps->z); + shade_input_set_uv(shi); + if (shi_cp == 0) + shade_input_set_normals(shi); + else /* XXX shi->flippednor messes up otherwise */ + shade_input_set_vertex_normals(shi); + + shi_cp = 1; + shi++; + } + } + } + else { + if (R.osa) { + short b = R.samples->centmask[curmask]; + xs = (float)x + R.samples->centLut[b & 15] + 0.5f; + ys = (float)y + R.samples->centLut[b >> 4] + 0.5f; + } + else if (R.i.curblur) { + xs= (float)x + R.mblur_jit[R.i.curblur-1][0] + 0.5f; + ys= (float)y + R.mblur_jit[R.i.curblur-1][1] + 0.5f; + } + else { + xs = (float)x + 0.5f; + ys = (float)y + 0.5f; + } + + shi->mask = curmask; + shi->samplenr = R.shadowsamplenr[shi->thread]++; + shade_input_set_viewco(shi, x, y, xs, ys, (float)ps->z); + shade_input_set_uv(shi); + shade_input_set_normals(shi); + shi++; + } + + /* total sample amount, shi->sample is static set in initialize */ + if (shi != ssamp->shi) + ssamp->tot = (shi - 1)->sample + 1; + } + } +} + +/* shades samples, returns true if anything happened */ +int shade_samples(ShadeSample *ssamp, PixStr *ps, int x, int y) +{ + shade_samples_fill_with_ps(ssamp, ps, x, y); + + if (ssamp->tot) { + ShadeInput *shi = ssamp->shi; + ShadeResult *shr = ssamp->shr; + int samp; + + /* if shadow or AO? */ + shade_samples_do_AO(ssamp); + + /* if shade (all shadepinputs have same passflag) */ + if (ssamp->shi[0].passflag & ~(SCE_PASS_Z | SCE_PASS_INDEXOB | SCE_PASS_INDEXMA)) { + + for (samp = 0; samp < ssamp->tot; samp++, shi++, shr++) { + shade_input_set_shade_texco(shi); + shade_input_do_shade(shi, shr); + } + } + else if (shi->passflag & SCE_PASS_Z) { + for (samp = 0; samp < ssamp->tot; samp++, shi++, shr++) + shr->z = -shi->co[2]; + } + + return 1; + } + return 0; +} + diff --git a/source/blender/render/intern/source/shadeoutput.c b/source/blender/render/intern/source/shadeoutput.c new file mode 100644 index 00000000000..090c249defb --- /dev/null +++ b/source/blender/render/intern/source/shadeoutput.c @@ -0,0 +1,2182 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2006 Blender Foundation + * All rights reserved. + * + * Contributors: Hos, Robert Wenzlaff. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/shadeoutput.c + * \ingroup render + */ + +#include <stdio.h> +#include <float.h> +#include <math.h> +#include <string.h> + +#include "BLI_math.h" +#include "BLI_utildefines.h" + +#include "BKE_colorband.h" +#include "BKE_colortools.h" +#include "BKE_material.h" + +#include "DNA_group_types.h" +#include "DNA_lamp_types.h" +#include "DNA_material_types.h" + +/* local include */ +#include "occlusion.h" +#include "render_types.h" +#include "rendercore.h" +#include "shadbuf.h" +#include "sss.h" +#include "texture.h" + +#include "shading.h" /* own include */ + +#include "IMB_colormanagement.h" + +/* could enable at some point but for now there are far too many conversions */ +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wdouble-promotion" +#endif + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +ListBase *get_lights(ShadeInput *shi) +{ + + if (R.r.scemode & R_BUTS_PREVIEW) + return &R.lights; + if (shi->light_override) + return &shi->light_override->gobject; + if (shi->mat && shi->mat->group) + return &shi->mat->group->gobject; + + return &R.lights; +} + +#if 0 +static void fogcolor(const float colf[3], float *rco, float *view) +{ + float alpha, stepsize, startdist, dist, hor[4], zen[3], vec[3], dview[3]; + float div=0.0f, distfac; + + hor[0]= R.wrld.horr; hor[1]= R.wrld.horg; hor[2]= R.wrld.horb; + zen[0]= R.wrld.zenr; zen[1]= R.wrld.zeng; zen[2]= R.wrld.zenb; + + copy_v3_v3(vec, rco); + + /* we loop from cur coord to mist start in steps */ + stepsize= 1.0f; + + div= ABS(view[2]); + dview[0]= view[0]/(stepsize*div); + dview[1]= view[1]/(stepsize*div); + dview[2]= -stepsize; + + startdist= -rco[2] + BLI_frand(); + for (dist= startdist; dist>R.wrld.miststa; dist-= stepsize) { + + hor[0]= R.wrld.horr; hor[1]= R.wrld.horg; hor[2]= R.wrld.horb; + alpha= 1.0f; + do_sky_tex(vec, vec, NULL, hor, zen, &alpha); + + distfac= (dist-R.wrld.miststa)/R.wrld.mistdist; + + hor[3]= hor[0]*distfac*distfac; + + /* premul! */ + alpha= hor[3]; + hor[0]= hor[0]*alpha; + hor[1]= hor[1]*alpha; + hor[2]= hor[2]*alpha; + addAlphaOverFloat(colf, hor); + + sub_v3_v3(vec, dview); + } +} +#endif + +/* zcor is distance, co the 3d coordinate in eye space, return alpha */ +float mistfactor(float zcor, float const co[3]) +{ + float fac, hi; + + fac = zcor - R.wrld.miststa; /* zcor is calculated per pixel */ + + /* fac= -co[2]-R.wrld.miststa; */ + + if (fac > 0.0f) { + if (fac < R.wrld.mistdist) { + + fac = (fac / R.wrld.mistdist); + + if (R.wrld.mistype == 0) { + fac *= fac; + } + else if (R.wrld.mistype == 1) { + /* pass */ + } + else { + fac = sqrtf(fac); + } + } + else { + fac = 1.0f; + } + } + else { + fac = 0.0f; + } + + /* height switched off mist */ + if (R.wrld.misthi!=0.0f && fac!=0.0f) { + /* at height misthi the mist is completely gone */ + + hi = R.viewinv[0][2] * co[0] + + R.viewinv[1][2] * co[1] + + R.viewinv[2][2] * co[2] + + R.viewinv[3][2]; + + if (hi > R.wrld.misthi) { + fac = 0.0f; + } + else if (hi>0.0f) { + hi= (R.wrld.misthi-hi)/R.wrld.misthi; + fac*= hi*hi; + } + } + + return (1.0f-fac)* (1.0f-R.wrld.misi); +} + +static void spothalo(struct LampRen *lar, ShadeInput *shi, float *intens) +{ + double a, b, c, disc, nray[3], npos[3]; + double t0, t1 = 0.0f, t2= 0.0f, t3; + float p1[3], p2[3], ladist, maxz = 0.0f, maxy = 0.0f, haint; + int cuts; + bool do_clip = true, use_yco = false; + + *intens= 0.0f; + haint= lar->haint; + + if (R.r.mode & R_ORTHO) { + /* camera pos (view vector) cannot be used... */ + /* camera position (cox,coy,0) rotate around lamp */ + p1[0]= shi->co[0]-lar->co[0]; + p1[1]= shi->co[1]-lar->co[1]; + p1[2]= -lar->co[2]; + mul_m3_v3(lar->imat, p1); + copy_v3db_v3fl(npos, p1); /* npos is double! */ + + /* pre-scale */ + npos[2] *= (double)lar->sh_zfac; + } + else { + copy_v3db_v3fl(npos, lar->sh_invcampos); /* in initlamp calculated */ + } + + /* rotate view */ + copy_v3db_v3fl(nray, shi->view); + mul_m3_v3_double(lar->imat, nray); + + if (R.wrld.mode & WO_MIST) { + /* patchy... */ + haint *= mistfactor(-lar->co[2], lar->co); + if (haint==0.0f) { + return; + } + } + + + /* rotate maxz */ + if (shi->co[2]==0.0f) { + do_clip = false; /* for when halo at sky */ + } + else { + p1[0]= shi->co[0]-lar->co[0]; + p1[1]= shi->co[1]-lar->co[1]; + p1[2]= shi->co[2]-lar->co[2]; + + maxz= lar->imat[0][2]*p1[0]+lar->imat[1][2]*p1[1]+lar->imat[2][2]*p1[2]; + maxz*= lar->sh_zfac; + maxy= lar->imat[0][1]*p1[0]+lar->imat[1][1]*p1[1]+lar->imat[2][1]*p1[2]; + + if (fabs(nray[2]) < FLT_EPSILON) { + use_yco = true; + } + } + + /* scale z to make sure volume is normalized */ + nray[2] *= (double)lar->sh_zfac; + /* nray does not need normalization */ + + ladist= lar->sh_zfac*lar->dist; + + /* solve */ + a = nray[0] * nray[0] + nray[1] * nray[1] - nray[2]*nray[2]; + b = nray[0] * npos[0] + nray[1] * npos[1] - nray[2]*npos[2]; + c = npos[0] * npos[0] + npos[1] * npos[1] - npos[2]*npos[2]; + + cuts= 0; + if (fabs(a) < DBL_EPSILON) { + /* + * Only one intersection point... + */ + return; + } + else { + disc = b*b - a*c; + + if (disc==0.0) { + t1=t2= (-b)/ a; + cuts= 2; + } + else if (disc > 0.0) { + disc = sqrt(disc); + t1 = (-b + disc) / a; + t2 = (-b - disc) / a; + cuts= 2; + } + } + if (cuts==2) { + int ok1=0, ok2=0; + + /* sort */ + if (t1>t2) { + a= t1; t1= t2; t2= a; + } + + /* z of intersection points with diabolo */ + p1[2]= npos[2] + t1*nray[2]; + p2[2]= npos[2] + t2*nray[2]; + + /* evaluate both points */ + if (p1[2]<=0.0f) ok1= 1; + if (p2[2]<=0.0f && t1!=t2) ok2= 1; + + /* at least 1 point with negative z */ + if (ok1==0 && ok2==0) return; + + /* intersction point with -ladist, the bottom of the cone */ + if (use_yco == false) { + t3= ((double)(-ladist)-npos[2])/nray[2]; + + /* de we have to replace one of the intersection points? */ + if (ok1) { + if (p1[2]<-ladist) t1= t3; + } + else { + t1= t3; + } + if (ok2) { + if (p2[2]<-ladist) t2= t3; + } + else { + t2= t3; + } + } + else if (ok1==0 || ok2==0) return; + + /* at least 1 visible interesction point */ + if (t1<0.0 && t2<0.0) return; + + if (t1<0.0) t1= 0.0; + if (t2<0.0) t2= 0.0; + + if (t1==t2) return; + + /* sort again to be sure */ + if (t1>t2) { + a= t1; t1= t2; t2= a; + } + + /* calculate t0: is the maximum visible z (when halo is intersected by face) */ + if (do_clip) { + if (use_yco == false) t0 = ((double)maxz - npos[2]) / nray[2]; + else t0 = ((double)maxy - npos[1]) / nray[1]; + + if (t0 < t1) return; + if (t0 < t2) t2= t0; + } + + /* calc points */ + p1[0]= npos[0] + t1*nray[0]; + p1[1]= npos[1] + t1*nray[1]; + p1[2]= npos[2] + t1*nray[2]; + p2[0]= npos[0] + t2*nray[0]; + p2[1]= npos[1] + t2*nray[1]; + p2[2]= npos[2] + t2*nray[2]; + + + /* now we have 2 points, make three lengths with it */ + + a = len_v3(p1); + b = len_v3(p2); + c = len_v3v3(p1, p2); + + a/= ladist; + a= sqrt(a); + b/= ladist; + b= sqrt(b); + c/= ladist; + + *intens= c*( (1.0-a)+(1.0-b) ); + + /* WATCH IT: do not clip a,b en c at 1.0, this gives nasty little overflows + * at the edges (especially with narrow halos) */ + if (*intens<=0.0f) return; + + /* soft area */ + /* not needed because t0 has been used for p1/p2 as well */ + /* if (doclip && t0<t2) { */ + /* *intens *= (t0-t1)/(t2-t1); */ + /* } */ + + *intens *= haint; + + if (lar->shb && lar->shb->shadhalostep) { + *intens *= shadow_halo(lar, p1, p2); + } + + } +} + +void renderspothalo(ShadeInput *shi, float col[4], float alpha) +{ + ListBase *lights; + GroupObject *go; + LampRen *lar; + float i; + + if (alpha==0.0f) return; + + lights= get_lights(shi); + for (go=lights->first; go; go= go->next) { + lar= go->lampren; + if (lar==NULL) continue; + + if (lar->type==LA_SPOT && (lar->mode & LA_HALO) && (lar->buftype != LA_SHADBUF_DEEP) && lar->haint>0) { + + if (lar->mode & LA_LAYER) + if (shi->vlr && (lar->lay & shi->obi->lay)==0) + continue; + if ((lar->lay & shi->lay)==0) + continue; + + spothalo(lar, shi, &i); + if (i > 0.0f) { + const float i_alpha = i * alpha; + col[0] += i_alpha * lar->r; + col[1] += i_alpha * lar->g; + col[2] += i_alpha * lar->b; + col[3] += i_alpha; /* all premul */ + } + } + } + /* clip alpha, is needed for unified 'alpha threshold' (vanillaRenderPipe.c) */ + if (col[3]>1.0f) col[3]= 1.0f; +} + + + +/* ---------------- shaders ----------------------- */ + +static double Normalize_d(double *n) +{ + double d; + + d= n[0]*n[0]+n[1]*n[1]+n[2]*n[2]; + + if (d>0.00000000000000001) { + d= sqrt(d); + + n[0]/=d; + n[1]/=d; + n[2]/=d; + } + else { + n[0]=n[1]=n[2]= 0.0; + d= 0.0; + } + return d; +} + +/* mix of 'real' fresnel and allowing control. grad defines blending gradient */ +float fresnel_fac(const float view[3], const float vn[3], float grad, float fac) +{ + float t1, t2; + + if (fac==0.0f) return 1.0f; + + t1 = dot_v3v3(view, vn); + if (t1>0.0f) t2= 1.0f+t1; + else t2= 1.0f-t1; + + t2= grad + (1.0f-grad)*powf(t2, fac); + + if (t2<0.0f) return 0.0f; + else if (t2>1.0f) return 1.0f; + return t2; +} + +static double saacos_d(double fac) +{ + if (fac<= -1.0) return M_PI; + else if (fac>=1.0) return 0.0; + else return acos(fac); +} + +/* Stoke's form factor. Need doubles here for extreme small area sizes */ +static float area_lamp_energy(float (*area)[3], const float co[3], const float vn[3]) +{ + double fac; + double vec[4][3]; /* vectors of rendered co to vertices lamp */ + double cross[4][3]; /* cross products of this */ + double rad[4]; /* angles between vecs */ + + VECSUB(vec[0], co, area[0]); + VECSUB(vec[1], co, area[1]); + VECSUB(vec[2], co, area[2]); + VECSUB(vec[3], co, area[3]); + + Normalize_d(vec[0]); + Normalize_d(vec[1]); + Normalize_d(vec[2]); + Normalize_d(vec[3]); + + /* cross product */ +#define CROSS(dest, a, b) \ + { \ + dest[0]= a[1] * b[2] - a[2] * b[1]; \ + dest[1]= a[2] * b[0] - a[0] * b[2]; \ + dest[2]= a[0] * b[1] - a[1] * b[0]; \ + } (void)0 + + CROSS(cross[0], vec[0], vec[1]); + CROSS(cross[1], vec[1], vec[2]); + CROSS(cross[2], vec[2], vec[3]); + CROSS(cross[3], vec[3], vec[0]); + +#undef CROSS + + Normalize_d(cross[0]); + Normalize_d(cross[1]); + Normalize_d(cross[2]); + Normalize_d(cross[3]); + + /* angles */ + rad[0]= vec[0][0]*vec[1][0]+ vec[0][1]*vec[1][1]+ vec[0][2]*vec[1][2]; + rad[1]= vec[1][0]*vec[2][0]+ vec[1][1]*vec[2][1]+ vec[1][2]*vec[2][2]; + rad[2]= vec[2][0]*vec[3][0]+ vec[2][1]*vec[3][1]+ vec[2][2]*vec[3][2]; + rad[3]= vec[3][0]*vec[0][0]+ vec[3][1]*vec[0][1]+ vec[3][2]*vec[0][2]; + + rad[0]= saacos_d(rad[0]); + rad[1]= saacos_d(rad[1]); + rad[2]= saacos_d(rad[2]); + rad[3]= saacos_d(rad[3]); + + /* Stoke formula */ + fac= rad[0]*(vn[0]*cross[0][0]+ vn[1]*cross[0][1]+ vn[2]*cross[0][2]); + fac+= rad[1]*(vn[0]*cross[1][0]+ vn[1]*cross[1][1]+ vn[2]*cross[1][2]); + fac+= rad[2]*(vn[0]*cross[2][0]+ vn[1]*cross[2][1]+ vn[2]*cross[2][2]); + fac+= rad[3]*(vn[0]*cross[3][0]+ vn[1]*cross[3][1]+ vn[2]*cross[3][2]); + + if (fac<=0.0) return 0.0; + return fac; +} + +static float area_lamp_energy_multisample(LampRen *lar, const float co[3], float *vn) +{ + /* corner vectors are moved around according lamp jitter */ + float *jitlamp= lar->jitter, vec[3]; + float area[4][3], intens= 0.0f; + int a= lar->ray_totsamp; + + /* test if co is behind lamp */ + sub_v3_v3v3(vec, co, lar->co); + if (dot_v3v3(vec, lar->vec) < 0.0f) + return 0.0f; + + while (a--) { + vec[0]= jitlamp[0]; + vec[1]= jitlamp[1]; + vec[2]= 0.0f; + mul_m3_v3(lar->mat, vec); + + add_v3_v3v3(area[0], lar->area[0], vec); + add_v3_v3v3(area[1], lar->area[1], vec); + add_v3_v3v3(area[2], lar->area[2], vec); + add_v3_v3v3(area[3], lar->area[3], vec); + + intens+= area_lamp_energy(area, co, vn); + + jitlamp+= 2; + } + intens /= (float)lar->ray_totsamp; + + return pow(intens * lar->areasize, lar->k); /* corrected for buttons size and lar->dist^2 */ +} + +static float spec(float inp, int hard) +{ + float b1; + + if (inp>=1.0f) return 1.0f; + else if (inp<=0.0f) return 0.0f; + + b1= inp*inp; + /* avoid FPE */ + if (b1<0.01f) b1= 0.01f; + + if ((hard & 1)==0) inp= 1.0f; + if (hard & 2) inp*= b1; + b1*= b1; + if (hard & 4) inp*= b1; + b1*= b1; + if (hard & 8) inp*= b1; + b1*= b1; + if (hard & 16) inp*= b1; + b1*= b1; + + /* avoid FPE */ + if (b1<0.001f) b1= 0.0f; + + if (hard & 32) inp*= b1; + b1*= b1; + if (hard & 64) inp*=b1; + b1*= b1; + if (hard & 128) inp*=b1; + + if (b1<0.001f) b1= 0.0f; + + if (hard & 256) { + b1*= b1; + inp*=b1; + } + + return inp; +} + +static float Phong_Spec(const float n[3], const float l[3], const float v[3], int hard, int tangent ) +{ + float h[3]; + float rslt; + + h[0] = l[0] + v[0]; + h[1] = l[1] + v[1]; + h[2] = l[2] + v[2]; + normalize_v3(h); + + rslt = h[0]*n[0] + h[1]*n[1] + h[2]*n[2]; + if (tangent) rslt= sasqrt(1.0f - rslt*rslt); + + if ( rslt > 0.0f ) rslt= spec(rslt, hard); + else rslt = 0.0f; + + return rslt; +} + + +/* reduced cook torrance spec (for off-specular peak) */ +static float CookTorr_Spec(const float n[3], const float l[3], const float v[3], int hard, int tangent) +{ + float i, nh, nv, h[3]; + + h[0]= v[0]+l[0]; + h[1]= v[1]+l[1]; + h[2]= v[2]+l[2]; + normalize_v3(h); + + nh= n[0]*h[0]+n[1]*h[1]+n[2]*h[2]; + if (tangent) nh= sasqrt(1.0f - nh*nh); + else if (nh<0.0f) return 0.0f; + + nv= n[0]*v[0]+n[1]*v[1]+n[2]*v[2]; + if (tangent) nv= sasqrt(1.0f - nv*nv); + else if (nv<0.0f) nv= 0.0f; + + i= spec(nh, hard); + + i= i/(0.1f+nv); + return i; +} + +/* Blinn spec */ +static float Blinn_Spec(const float n[3], const float l[3], const float v[3], float refrac, float spec_power, int tangent) +{ + float i, nh, nv, nl, vh, h[3]; + float a, b, c, g=0.0f, p, f, ang; + + if (refrac < 1.0f) return 0.0f; + if (spec_power == 0.0f) return 0.0f; + + /* conversion from 'hardness' (1-255) to 'spec_power' (50 maps at 0.1) */ + if (spec_power<100.0f) + spec_power = sqrtf(1.0f / spec_power); + else spec_power= 10.0f/spec_power; + + h[0]= v[0]+l[0]; + h[1]= v[1]+l[1]; + h[2]= v[2]+l[2]; + normalize_v3(h); + + nh= n[0]*h[0]+n[1]*h[1]+n[2]*h[2]; /* Dot product between surface normal and half-way vector */ + if (tangent) nh= sasqrt(1.0f - nh*nh); + else if (nh<0.0f) return 0.0f; + + nv= n[0]*v[0]+n[1]*v[1]+n[2]*v[2]; /* Dot product between surface normal and view vector */ + if (tangent) nv= sasqrt(1.0f - nv*nv); + if (nv<=0.01f) nv= 0.01f; /* hrms... */ + + nl= n[0]*l[0]+n[1]*l[1]+n[2]*l[2]; /* Dot product between surface normal and light vector */ + if (tangent) nl= sasqrt(1.0f - nl*nl); + if (nl<=0.01f) { + return 0.0f; + } + + vh= v[0]*h[0]+v[1]*h[1]+v[2]*h[2]; /* Dot product between view vector and half-way vector */ + if (vh<=0.0f) vh= 0.01f; + + a = 1.0f; + b = (2.0f*nh*nv)/vh; + c = (2.0f*nh*nl)/vh; + + if ( a < b && a < c ) g = a; + else if ( b < a && b < c ) g = b; + else if ( c < a && c < b ) g = c; + + p = sqrt((double)((refrac * refrac)+(vh * vh) - 1.0f)); + f = (((p-vh)*(p-vh))/((p+vh)*(p+vh)))*(1+((((vh*(p+vh))-1.0f)*((vh*(p+vh))-1.0f))/(((vh*(p-vh))+1.0f)*((vh*(p-vh))+1.0f)))); + ang = saacos(nh); + + i= f * g * exp((double)(-(ang*ang) / (2.0f*spec_power*spec_power))); + if (i<0.0f) i= 0.0f; + + return i; +} + +/* cartoon render spec */ +static float Toon_Spec(const float n[3], const float l[3], const float v[3], float size, float smooth, int tangent) +{ + float h[3]; + float ang; + float rslt; + + h[0] = l[0] + v[0]; + h[1] = l[1] + v[1]; + h[2] = l[2] + v[2]; + normalize_v3(h); + + rslt = h[0]*n[0] + h[1]*n[1] + h[2]*n[2]; + if (tangent) rslt = sasqrt(1.0f - rslt*rslt); + + ang = saacos( rslt ); + + if ( ang < size ) rslt = 1.0f; + else if ( ang >= (size + smooth) || smooth == 0.0f ) rslt = 0.0f; + else rslt = 1.0f - ((ang - size) / smooth); + + return rslt; +} + +/* Ward isotropic gaussian spec */ +static float WardIso_Spec(const float n[3], const float l[3], const float v[3], float rms, int tangent) +{ + float i, nh, nv, nl, h[3], angle, alpha; + + + /* half-way vector */ + h[0] = l[0] + v[0]; + h[1] = l[1] + v[1]; + h[2] = l[2] + v[2]; + normalize_v3(h); + + nh = n[0]*h[0]+n[1]*h[1]+n[2]*h[2]; /* Dot product between surface normal and half-way vector */ + if (tangent) nh = sasqrt(1.0f - nh*nh); + if (nh<=0.0f) nh = 0.001f; + + nv = n[0]*v[0]+n[1]*v[1]+n[2]*v[2]; /* Dot product between surface normal and view vector */ + if (tangent) nv = sasqrt(1.0f - nv*nv); + if (nv<=0.0f) nv = 0.001f; + + nl = n[0]*l[0]+n[1]*l[1]+n[2]*l[2]; /* Dot product between surface normal and light vector */ + if (tangent) nl = sasqrt(1.0f - nl*nl); + if (nl<=0.0f) nl = 0.001f; + + angle = tanf(saacos(nh)); + alpha = MAX2(rms, 0.001f); + + i= nl * (1.0f/(4.0f*(float)M_PI*alpha*alpha)) * (expf( -(angle*angle)/(alpha*alpha))/(sqrtf(nv*nl))); + + return i; +} + +/* cartoon render diffuse */ +static float Toon_Diff(const float n[3], const float l[3], const float UNUSED(v[3]), float size, float smooth) +{ + float rslt, ang; + + rslt = n[0]*l[0] + n[1]*l[1] + n[2]*l[2]; + + ang = saacos(rslt); + + if ( ang < size ) rslt = 1.0f; + else if ( ang >= (size + smooth) || smooth == 0.0f ) rslt = 0.0f; + else rslt = 1.0f - ((ang - size) / smooth); + + return rslt; +} + +/* Oren Nayar diffuse */ + +/* 'nl' is either dot product, or return value of area light */ +/* in latter case, only last multiplication uses 'nl' */ +static float OrenNayar_Diff(float nl, const float n[3], const float l[3], const float v[3], float rough ) +{ + float i/*, nh*/, nv /*, vh */, realnl, h[3]; + float a, b, t, A, B; + float Lit_A, View_A, Lit_B[3], View_B[3]; + + h[0]= v[0]+l[0]; + h[1]= v[1]+l[1]; + h[2]= v[2]+l[2]; + normalize_v3(h); + + /* nh= n[0]*h[0]+n[1]*h[1]+n[2]*h[2]; */ /* Dot product between surface normal and half-way vector */ + /* if (nh<0.0f) nh = 0.0f; */ + + nv= n[0]*v[0]+n[1]*v[1]+n[2]*v[2]; /* Dot product between surface normal and view vector */ + if (nv<=0.0f) nv= 0.0f; + + realnl= n[0]*l[0]+n[1]*l[1]+n[2]*l[2]; /* Dot product between surface normal and light vector */ + if (realnl<=0.0f) return 0.0f; + if (nl<0.0f) return 0.0f; /* value from area light */ + + /* vh= v[0]*h[0]+v[1]*h[1]+v[2]*h[2]; */ /* Dot product between view vector and halfway vector */ + /* if (vh<=0.0f) vh= 0.0f; */ + + Lit_A = saacos(realnl); + View_A = saacos( nv ); + + Lit_B[0] = l[0] - (realnl * n[0]); + Lit_B[1] = l[1] - (realnl * n[1]); + Lit_B[2] = l[2] - (realnl * n[2]); + normalize_v3(Lit_B); + + View_B[0] = v[0] - (nv * n[0]); + View_B[1] = v[1] - (nv * n[1]); + View_B[2] = v[2] - (nv * n[2]); + normalize_v3(View_B); + + t = Lit_B[0]*View_B[0] + Lit_B[1]*View_B[1] + Lit_B[2]*View_B[2]; + if ( t < 0 ) t = 0; + + if ( Lit_A > View_A ) { + a = Lit_A; + b = View_A; + } + else { + a = View_A; + b = Lit_A; + } + + A = 1.0f - (0.5f * ((rough * rough) / ((rough * rough) + 0.33f))); + B = 0.45f * ((rough * rough) / ((rough * rough) + 0.09f)); + + b*= 0.95f; /* prevent tangens from shooting to inf, 'nl' can be not a dot product here. */ + /* overflow only happens with extreme size area light, and higher roughness */ + i = nl * ( A + ( B * t * sinf(a) * tanf(b) ) ); + + return i; +} + +/* Minnaert diffuse */ +static float Minnaert_Diff(float nl, const float n[3], const float v[3], float darkness) +{ + float i, nv; + + /* nl = dot product between surface normal and light vector */ + if (nl <= 0.0f) + return 0.0f; + + /* nv = dot product between surface normal and view vector */ + nv = dot_v3v3(n, v); + if (nv < 0.0f) + nv = 0.0f; + + if (darkness <= 1.0f) + i = nl * pow(max_ff(nv * nl, 0.1f), (darkness - 1.0f) ); /*The Real model*/ + else + i = nl * pow( (1.001f - nv), (darkness - 1.0f) ); /*Nvidia model*/ + + return i; +} + +static float Fresnel_Diff(float *vn, float *lv, float *UNUSED(view), float fac_i, float fac) +{ + return fresnel_fac(lv, vn, fac_i, fac); +} + +/* --------------------------------------------- */ +/* also called from texture.c */ +void calc_R_ref(ShadeInput *shi) +{ + float i; + + /* shi->vn dot shi->view */ + i= -2*(shi->vn[0]*shi->view[0]+shi->vn[1]*shi->view[1]+shi->vn[2]*shi->view[2]); + + shi->ref[0]= (shi->view[0]+i*shi->vn[0]); + shi->ref[1]= (shi->view[1]+i*shi->vn[1]); + shi->ref[2]= (shi->view[2]+i*shi->vn[2]); + if (shi->osatex) { + if (shi->vlr->flag & R_SMOOTH) { + i= -2*( (shi->vn[0]+shi->dxno[0])*(shi->view[0]+shi->dxview) + + (shi->vn[1]+shi->dxno[1])*shi->view[1]+ (shi->vn[2]+shi->dxno[2])*shi->view[2] ); + + shi->dxref[0]= shi->ref[0]- ( shi->view[0]+shi->dxview+i*(shi->vn[0]+shi->dxno[0])); + shi->dxref[1]= shi->ref[1]- (shi->view[1]+ i*(shi->vn[1]+shi->dxno[1])); + shi->dxref[2]= shi->ref[2]- (shi->view[2]+ i*(shi->vn[2]+shi->dxno[2])); + + i= -2*( (shi->vn[0]+shi->dyno[0])*shi->view[0]+ + (shi->vn[1]+shi->dyno[1])*(shi->view[1]+shi->dyview)+ (shi->vn[2]+shi->dyno[2])*shi->view[2] ); + + shi->dyref[0]= shi->ref[0]- (shi->view[0]+ i*(shi->vn[0]+shi->dyno[0])); + shi->dyref[1]= shi->ref[1]- (shi->view[1]+shi->dyview+i*(shi->vn[1]+shi->dyno[1])); + shi->dyref[2]= shi->ref[2]- (shi->view[2]+ i*(shi->vn[2]+shi->dyno[2])); + + } + else { + + i= -2*( shi->vn[0]*(shi->view[0]+shi->dxview) + + shi->vn[1]*shi->view[1]+ shi->vn[2]*shi->view[2] ); + + shi->dxref[0]= shi->ref[0]- (shi->view[0]+shi->dxview+i*shi->vn[0]); + shi->dxref[1]= shi->ref[1]- (shi->view[1]+ i*shi->vn[1]); + shi->dxref[2]= shi->ref[2]- (shi->view[2]+ i*shi->vn[2]); + + i= -2*( shi->vn[0]*shi->view[0]+ + shi->vn[1]*(shi->view[1]+shi->dyview)+ shi->vn[2]*shi->view[2] ); + + shi->dyref[0]= shi->ref[0]- (shi->view[0]+ i*shi->vn[0]); + shi->dyref[1]= shi->ref[1]- (shi->view[1]+shi->dyview+i*shi->vn[1]); + shi->dyref[2]= shi->ref[2]- (shi->view[2]+ i*shi->vn[2]); + } + } + +} + +/* called from rayshade.c */ +void shade_color(ShadeInput *shi, ShadeResult *shr) +{ + Material *ma= shi->mat; + + if (ma->mode & (MA_FACETEXTURE)) { + shi->r= shi->vcol[0]; + shi->g= shi->vcol[1]; + shi->b= shi->vcol[2]; + if (ma->mode & (MA_FACETEXTURE_ALPHA)) + shi->alpha= shi->vcol[3]; + } + else if (ma->mode & (MA_VERTEXCOLP)) { + float neg_alpha = 1.0f - shi->vcol[3]; + shi->r= shi->r*neg_alpha + shi->vcol[0]*shi->vcol[3]; + shi->g= shi->g*neg_alpha + shi->vcol[1]*shi->vcol[3]; + shi->b= shi->b*neg_alpha + shi->vcol[2]*shi->vcol[3]; + } + + if (ma->texco) + do_material_tex(shi, &R); + + if (ma->fresnel_tra!=0.0f) + shi->alpha*= fresnel_fac(shi->view, shi->vn, ma->fresnel_tra_i, ma->fresnel_tra); + + if (!(shi->mode & MA_TRANSP)) shi->alpha= 1.0f; + + shr->diff[0]= shi->r; + shr->diff[1]= shi->g; + shr->diff[2]= shi->b; + shr->alpha= shi->alpha; + + /* modulate by the object color */ + if ((ma->shade_flag & MA_OBCOLOR) && shi->obr->ob) { + float obcol[4]; + + copy_v4_v4(obcol, shi->obr->ob->col); + CLAMP(obcol[3], 0.0f, 1.0f); + + shr->diff[0] *= obcol[0]; + shr->diff[1] *= obcol[1]; + shr->diff[2] *= obcol[2]; + if (shi->mode & MA_TRANSP) shr->alpha *= obcol[3]; + } + + copy_v3_v3(shr->diffshad, shr->diff); +} + +/* ramp for at end of shade */ +static void ramp_diffuse_result(float *diff, ShadeInput *shi) +{ + Material *ma= shi->mat; + float col[4]; + + if (ma->ramp_col) { + if (ma->rampin_col==MA_RAMP_IN_RESULT) { + float fac = IMB_colormanagement_get_luminance(diff); + BKE_colorband_evaluate(ma->ramp_col, fac, col); + + /* blending method */ + fac= col[3]*ma->rampfac_col; + + ramp_blend(ma->rampblend_col, diff, fac, col); + } + } +} + +/* r,g,b denote energy, ramp is used with different values to make new material color */ +static void add_to_diffuse(float diff[3], const ShadeInput *shi, const float is, const float rgb[3]) +{ + Material *ma= shi->mat; + + if (ma->ramp_col && (ma->mode & MA_RAMP_COL)) { + + /* MA_RAMP_IN_RESULT is exceptional */ + if (ma->rampin_col==MA_RAMP_IN_RESULT) { + /* normal add */ + diff[0] += rgb[0] * shi->r; + diff[1] += rgb[1] * shi->g; + diff[2] += rgb[2] * shi->b; + } + else { + float colt[3], col[4]; + float fac; + + /* input */ + switch (ma->rampin_col) { + case MA_RAMP_IN_ENERGY: + fac = IMB_colormanagement_get_luminance(rgb); + break; + case MA_RAMP_IN_SHADER: + fac = is; + break; + case MA_RAMP_IN_NOR: + fac = dot_v3v3(shi->view, shi->vn); + break; + default: + fac = 0.0f; + break; + } + + BKE_colorband_evaluate(ma->ramp_col, fac, col); + + /* blending method */ + fac = col[3] * ma->rampfac_col; + copy_v3_v3(colt, &shi->r); + + ramp_blend(ma->rampblend_col, colt, fac, col); + + /* output to */ + diff[0] += rgb[0] * colt[0]; + diff[1] += rgb[1] * colt[1]; + diff[2] += rgb[2] * colt[2]; + } + } + else { + diff[0] += rgb[0] * shi->r; + diff[1] += rgb[1] * shi->g; + diff[2] += rgb[2] * shi->b; + } +} + +static void ramp_spec_result(float spec_col[3], ShadeInput *shi) +{ + Material *ma= shi->mat; + + if (ma->ramp_spec && (ma->rampin_spec==MA_RAMP_IN_RESULT)) { + float col[4]; + float fac = IMB_colormanagement_get_luminance(spec_col); + + BKE_colorband_evaluate(ma->ramp_spec, fac, col); + + /* blending method */ + fac= col[3]*ma->rampfac_spec; + + ramp_blend(ma->rampblend_spec, spec_col, fac, col); + + } +} + +/* is = dot product shade, t = spec energy */ +static void do_specular_ramp(ShadeInput *shi, float is, float t, float spec[3]) +{ + Material *ma= shi->mat; + + spec[0]= shi->specr; + spec[1]= shi->specg; + spec[2]= shi->specb; + + /* MA_RAMP_IN_RESULT is exception */ + if (ma->ramp_spec && (ma->rampin_spec!=MA_RAMP_IN_RESULT)) { + float fac; + float col[4]; + + /* input */ + switch (ma->rampin_spec) { + case MA_RAMP_IN_ENERGY: + fac= t; + break; + case MA_RAMP_IN_SHADER: + fac= is; + break; + case MA_RAMP_IN_NOR: + fac= shi->view[0]*shi->vn[0] + shi->view[1]*shi->vn[1] + shi->view[2]*shi->vn[2]; + break; + default: + fac= 0.0f; + break; + } + + BKE_colorband_evaluate(ma->ramp_spec, fac, col); + + /* blending method */ + fac= col[3]*ma->rampfac_spec; + + ramp_blend(ma->rampblend_spec, spec, fac, col); + } +} + +/* pure AO, check for raytrace and world should have been done */ +/* preprocess, textures were not done, don't use shi->amb for that reason */ +void ambient_occlusion(ShadeInput *shi) +{ + if ((R.wrld.ao_gather_method == WO_AOGATHER_APPROX) && shi->mat->amb!=0.0f) { + sample_occ(&R, shi); + } + else if ((R.r.mode & R_RAYTRACE) && shi->mat->amb!=0.0f) { + ray_ao(shi, shi->ao, shi->env); + } + else { + shi->ao[0]= shi->ao[1]= shi->ao[2]= 1.0f; + zero_v3(shi->env); + zero_v3(shi->indirect); + } +} + + +/* wrld mode was checked for */ +static void ambient_occlusion_apply(ShadeInput *shi, ShadeResult *shr) +{ + float f= R.wrld.aoenergy; + float tmp[3], tmpspec[3]; + + if (!((R.r.mode & R_RAYTRACE) || R.wrld.ao_gather_method == WO_AOGATHER_APPROX)) + return; + if (f == 0.0f) + return; + + if (R.wrld.aomix==WO_AOADD) { + shr->combined[0] += shi->ao[0]*shi->r*shi->refl*f; + shr->combined[1] += shi->ao[1]*shi->g*shi->refl*f; + shr->combined[2] += shi->ao[2]*shi->b*shi->refl*f; + } + else if (R.wrld.aomix==WO_AOMUL) { + mul_v3_v3v3(tmp, shr->combined, shi->ao); + mul_v3_v3v3(tmpspec, shr->spec, shi->ao); + + if (f == 1.0f) { + copy_v3_v3(shr->combined, tmp); + copy_v3_v3(shr->spec, tmpspec); + } + else { + interp_v3_v3v3(shr->combined, shr->combined, tmp, f); + interp_v3_v3v3(shr->spec, shr->spec, tmpspec, f); + } + } +} + +void environment_lighting_apply(ShadeInput *shi, ShadeResult *shr) +{ + float f= R.wrld.ao_env_energy*shi->amb; + + if (!((R.r.mode & R_RAYTRACE) || R.wrld.ao_gather_method == WO_AOGATHER_APPROX)) + return; + if (f == 0.0f) + return; + + shr->combined[0] += shi->env[0]*shi->r*shi->refl*f; + shr->combined[1] += shi->env[1]*shi->g*shi->refl*f; + shr->combined[2] += shi->env[2]*shi->b*shi->refl*f; +} + +static void indirect_lighting_apply(ShadeInput *shi, ShadeResult *shr) +{ + float f= R.wrld.ao_indirect_energy; + + if (!((R.r.mode & R_RAYTRACE) || R.wrld.ao_gather_method == WO_AOGATHER_APPROX)) + return; + if (f == 0.0f) + return; + + shr->combined[0] += shi->indirect[0]*shi->r*shi->refl*f; + shr->combined[1] += shi->indirect[1]*shi->g*shi->refl*f; + shr->combined[2] += shi->indirect[2]*shi->b*shi->refl*f; +} + +/* result written in shadfac */ +void lamp_get_shadow(LampRen *lar, ShadeInput *shi, float inp, float shadfac[4], int do_real) +{ + LampShadowSubSample *lss= &(lar->shadsamp[shi->thread].s[shi->sample]); + + if (do_real || lss->samplenr!=shi->samplenr) { + + shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 1.0f; + + if (lar->shb) { + if (lar->buftype==LA_SHADBUF_IRREGULAR) + shadfac[3]= ISB_getshadow(shi, lar->shb); + else + shadfac[3] = testshadowbuf(&R, lar->shb, shi->co, shi->dxco, shi->dyco, inp, shi->mat->lbias); + } + else if (lar->mode & LA_SHAD_RAY) { + ray_shadow(shi, lar, shadfac); + } + + if (shi->depth==0) { + copy_v4_v4(lss->shadfac, shadfac); + lss->samplenr= shi->samplenr; + } + } + else { + copy_v4_v4(shadfac, lss->shadfac); + } +} + +/* lampdistance and spot angle, writes in lv and dist */ +float lamp_get_visibility(LampRen *lar, const float co[3], float lv[3], float *dist) +{ + if (lar->type==LA_SUN || lar->type==LA_HEMI) { + *dist= 1.0f; + copy_v3_v3(lv, lar->vec); + return 1.0f; + } + else { + float visifac= 1.0f, visifac_r; + + sub_v3_v3v3(lv, co, lar->co); + mul_v3_fl(lv, 1.0f / (*dist = len_v3(lv))); + + /* area type has no quad or sphere option */ + if (lar->type==LA_AREA) { + /* area is single sided */ + //if (dot_v3v3(lv, lar->vec) > 0.0f) + // visifac= 1.0f; + //else + // visifac= 0.0f; + } + else { + switch (lar->falloff_type) { + case LA_FALLOFF_CONSTANT: + visifac = 1.0f; + break; + case LA_FALLOFF_INVLINEAR: + visifac = lar->dist/(lar->dist + dist[0]); + break; + case LA_FALLOFF_INVSQUARE: + /* NOTE: This seems to be a hack since commit r12045 says this + * option is similar to old Quad, but with slight changes. + * Correct inv square would be (which would be old Quad): + * visifac = lar->distkw / (lar->distkw + dist[0]*dist[0]); + */ + visifac = lar->dist / (lar->dist + dist[0]*dist[0]); + break; + case LA_FALLOFF_SLIDERS: + if (lar->ld1>0.0f) + visifac= lar->dist/(lar->dist+lar->ld1*dist[0]); + if (lar->ld2>0.0f) + visifac*= lar->distkw/(lar->distkw+lar->ld2*dist[0]*dist[0]); + break; + case LA_FALLOFF_INVCOEFFICIENTS: + visifac_r = lar->coeff_const + + lar->coeff_lin * dist[0] + + lar->coeff_quad * dist[0] * dist[0]; + if (visifac_r > 0.0) + visifac = 1.0 / visifac_r; + else + visifac = 0.0; + break; + case LA_FALLOFF_CURVE: + /* curvemapping_initialize is called from #add_render_lamp */ + visifac = curvemapping_evaluateF(lar->curfalloff, 0, dist[0]/lar->dist); + break; + } + + if (lar->mode & LA_SPHERE) { + float t= lar->dist - dist[0]; + if (t<=0.0f) + visifac= 0.0f; + else + visifac*= t/lar->dist; + } + + if (visifac > 0.0f) { + if (lar->type==LA_SPOT) { + float inpr, t; + + if (lar->mode & LA_SQUARE) { + if (dot_v3v3(lv, lar->vec) > 0.0f) { + float lvrot[3], x; + + /* rotate view to lampspace */ + copy_v3_v3(lvrot, lv); + mul_m3_v3(lar->imat, lvrot); + + x = max_ff(fabsf(lvrot[0]/lvrot[2]), fabsf(lvrot[1]/lvrot[2])); + /* 1.0f/(sqrt(1+x*x)) is equivalent to cos(atan(x)) */ + + inpr = 1.0f / (sqrtf(1.0f + x * x)); + } + else inpr= 0.0f; + } + else { + inpr= lv[0]*lar->vec[0]+lv[1]*lar->vec[1]+lv[2]*lar->vec[2]; + } + + t= lar->spotsi; + if (inpr<=t) + visifac= 0.0f; + else { + t= inpr-t; + if (t<lar->spotbl && lar->spotbl!=0.0f) { + /* soft area */ + float i= t/lar->spotbl; + t= i*i; + inpr*= (3.0f*t-2.0f*t*i); + } + visifac*= inpr; + } + } + } + } + if (visifac <= 0.001f) visifac = 0.0f; + return visifac; + } +} + +/* function returns raw diff, spec and full shadowed diff in the 'shad' pass */ +static void shade_one_light(LampRen *lar, ShadeInput *shi, ShadeResult *shr, int passflag) +{ + Material *ma= shi->mat; + VlakRen *vlr= shi->vlr; + float lv[3], lampdist, lacol[3], shadfac[4], lashdw[3]; + float i, is, i_noshad, inp, *vn, *view, vnor[3], phongcorr=1.0f; + float visifac; + + vn= shi->vn; + view= shi->view; + + + if (lar->energy == 0.0f) return; + /* only shadow lamps shouldn't affect shadow-less materials at all */ + if ((lar->mode & LA_ONLYSHADOW) && (!(ma->mode & MA_SHADOW) || !(R.r.mode & R_SHADOW))) + return; + /* optimization, don't render fully black lamps */ + if (!(lar->mode & LA_TEXTURE) && (lar->r + lar->g + lar->b == 0.0f)) + return; + + /* lampdist, spot angle, area side, ... */ + visifac= lamp_get_visibility(lar, shi->co, lv, &lampdist); + if (visifac==0.0f) + return; + + if (lar->type==LA_SPOT) { + if (lar->mode & LA_OSATEX) { + shi->osatex= 1; /* signal for multitex() */ + + shi->dxlv[0]= lv[0] - (shi->co[0]-lar->co[0]+shi->dxco[0])/lampdist; + shi->dxlv[1]= lv[1] - (shi->co[1]-lar->co[1]+shi->dxco[1])/lampdist; + shi->dxlv[2]= lv[2] - (shi->co[2]-lar->co[2]+shi->dxco[2])/lampdist; + + shi->dylv[0]= lv[0] - (shi->co[0]-lar->co[0]+shi->dyco[0])/lampdist; + shi->dylv[1]= lv[1] - (shi->co[1]-lar->co[1]+shi->dyco[1])/lampdist; + shi->dylv[2]= lv[2] - (shi->co[2]-lar->co[2]+shi->dyco[2])/lampdist; + } + } + + /* lamp color texture */ + lacol[0]= lar->r; + lacol[1]= lar->g; + lacol[2]= lar->b; + + lashdw[0]= lar->shdwr; + lashdw[1]= lar->shdwg; + lashdw[2]= lar->shdwb; + + if (lar->mode & LA_TEXTURE) do_lamp_tex(lar, lv, shi, lacol, LA_TEXTURE); + if (lar->mode & LA_SHAD_TEX) do_lamp_tex(lar, lv, shi, lashdw, LA_SHAD_TEX); + + /* tangent case; calculate fake face normal, aligned with lampvector */ + /* note, vnor==vn is used as tangent trigger for buffer shadow */ + if (vlr->flag & R_TANGENT) { + float cross[3], nstrand[3], blend; + + if (ma->mode & MA_STR_SURFDIFF) { + cross_v3_v3v3(cross, shi->surfnor, vn); + cross_v3_v3v3(nstrand, vn, cross); + + blend= dot_v3v3(nstrand, shi->surfnor); + blend= 1.0f - blend; + CLAMP(blend, 0.0f, 1.0f); + + interp_v3_v3v3(vnor, nstrand, shi->surfnor, blend); + normalize_v3(vnor); + } + else { + cross_v3_v3v3(cross, lv, vn); + cross_v3_v3v3(vnor, cross, vn); + normalize_v3(vnor); + } + + if (ma->strand_surfnor > 0.0f) { + if (ma->strand_surfnor > shi->surfdist) { + blend= (ma->strand_surfnor - shi->surfdist)/ma->strand_surfnor; + interp_v3_v3v3(vnor, vnor, shi->surfnor, blend); + normalize_v3(vnor); + } + } + + vnor[0]= -vnor[0];vnor[1]= -vnor[1];vnor[2]= -vnor[2]; + vn= vnor; + } + else if (ma->mode & MA_TANGENT_V) { + float cross[3]; + cross_v3_v3v3(cross, lv, shi->tang); + cross_v3_v3v3(vnor, cross, shi->tang); + normalize_v3(vnor); + vnor[0]= -vnor[0];vnor[1]= -vnor[1];vnor[2]= -vnor[2]; + vn= vnor; + } + + /* dot product and reflectivity */ + /* inp = dotproduct, is = shader result, i = lamp energy (with shadow), i_noshad = i without shadow */ + inp= dot_v3v3(vn, lv); + + /* phong threshold to prevent backfacing faces having artifacts on ray shadow (terminator problem) */ + /* this complex construction screams for a nicer implementation! (ton) */ + if (R.r.mode & R_SHADOW) { + if (ma->mode & MA_SHADOW) { + if (lar->type == LA_HEMI || lar->type == LA_AREA) { + /* pass */ + } + else if ((ma->mode & MA_RAYBIAS) && (lar->mode & LA_SHAD_RAY) && (vlr->flag & R_SMOOTH)) { + float thresh= shi->obr->ob->smoothresh; + if (inp>thresh) + phongcorr= (inp-thresh)/(inp*(1.0f-thresh)); + else + phongcorr= 0.0f; + } + else if (ma->sbias!=0.0f && ((lar->mode & LA_SHAD_RAY) || lar->shb)) { + if (inp>ma->sbias) + phongcorr= (inp-ma->sbias)/(inp*(1.0f-ma->sbias)); + else + phongcorr= 0.0f; + } + } + } + + /* diffuse shaders */ + if (lar->mode & LA_NO_DIFF) { + is = 0.0f; /* skip shaders */ + } + else if (lar->type==LA_HEMI) { + is = 0.5f * inp + 0.5f; + } + else { + + if (lar->type==LA_AREA) + inp= area_lamp_energy_multisample(lar, shi->co, vn); + + /* diffuse shaders (oren nayer gets inp from area light) */ + if (ma->diff_shader==MA_DIFF_ORENNAYAR) is= OrenNayar_Diff(inp, vn, lv, view, ma->roughness); + else if (ma->diff_shader==MA_DIFF_TOON) is= Toon_Diff(vn, lv, view, ma->param[0], ma->param[1]); + else if (ma->diff_shader==MA_DIFF_MINNAERT) is= Minnaert_Diff(inp, vn, view, ma->darkness); + else if (ma->diff_shader==MA_DIFF_FRESNEL) is= Fresnel_Diff(vn, lv, view, ma->param[0], ma->param[1]); + else is= inp; /* Lambert */ + } + + /* 'is' is diffuse */ + if ((ma->shade_flag & MA_CUBIC) && is > 0.0f && is < 1.0f) { + is= 3.0f * is * is - 2.0f * is * is * is; /* nicer termination of shades */ + } + + i= is*phongcorr; + + if (i>0.0f) { + i*= visifac*shi->refl; + } + i_noshad= i; + + vn = shi->vn; /* bring back original vector, we use special specular shaders for tangent */ + if (ma->mode & MA_TANGENT_V) + vn= shi->tang; + + /* init transp shadow */ + shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 1.0f; + + /* shadow and spec, (visifac==0 outside spot) */ + if (visifac> 0.0f) { + + if ((R.r.mode & R_SHADOW)) { + if (ma->mode & MA_SHADOW) { + if (lar->shb || (lar->mode & LA_SHAD_RAY)) { + + if (vn==vnor) /* tangent trigger */ + lamp_get_shadow(lar, shi, dot_v3v3(shi->vn, lv), shadfac, shi->depth); + else + lamp_get_shadow(lar, shi, inp, shadfac, shi->depth); + + /* warning, here it skips the loop */ + if ((lar->mode & LA_ONLYSHADOW) && i>0.0f) { + + shadfac[3]= i*lar->energy*(1.0f-shadfac[3]); + shr->shad[0] -= shadfac[3]*shi->r*(1.0f-lashdw[0]); + shr->shad[1] -= shadfac[3]*shi->g*(1.0f-lashdw[1]); + shr->shad[2] -= shadfac[3]*shi->b*(1.0f-lashdw[2]); + + if (!(lar->mode & LA_NO_SPEC)) { + shr->spec[0] -= shadfac[3]*shi->specr*(1.0f-lashdw[0]); + shr->spec[1] -= shadfac[3]*shi->specg*(1.0f-lashdw[1]); + shr->spec[2] -= shadfac[3]*shi->specb*(1.0f-lashdw[2]); + } + + return; + } + + i*= shadfac[3]; + shr->shad[3] = shadfac[3]; /* store this for possible check in troublesome cases */ + } + else { + shr->shad[3] = 1.0f; /* No shadow at all! */ + } + } + } + + /* in case 'no diffuse' we still do most calculus, spec can be in shadow.*/ + if (!(lar->mode & LA_NO_DIFF)) { + if (i>0.0f) { + if (ma->mode & MA_SHADOW_TRA) { + const float tcol[3] = { + i * shadfac[0] * lacol[0], + i * shadfac[1] * lacol[1], + i * shadfac[2] * lacol[2], + }; + add_to_diffuse(shr->shad, shi, is, tcol); + } + else { + const float tcol[3] = { + i * lacol[0], + i * lacol[1], + i * lacol[2], + }; + add_to_diffuse(shr->shad, shi, is, tcol); + } + } + /* add light for colored shadow */ + if (i_noshad>i && !(lashdw[0]==0 && lashdw[1]==0 && lashdw[2]==0)) { + const float tcol[3] = { + lashdw[0] * (i_noshad - i) * lacol[0], + lashdw[1] * (i_noshad - i) * lacol[1], + lashdw[2] * (i_noshad - i) * lacol[2], + }; + add_to_diffuse(shr->shad, shi, is, tcol); + } + if (i_noshad>0.0f) { + if (passflag & (SCE_PASS_DIFFUSE|SCE_PASS_SHADOW) || + ((passflag & SCE_PASS_COMBINED) && !(shi->combinedflag & SCE_PASS_SHADOW))) + { + const float tcol[3] = { + i_noshad * lacol[0], + i_noshad * lacol[1], + i_noshad * lacol[2] + }; + add_to_diffuse(shr->diff, shi, is, tcol); + } + else { + copy_v3_v3(shr->diff, shr->shad); + } + } + } + + /* specularity */ + shadfac[3]*= phongcorr; /* note, shadfac not allowed to be stored nonlocal */ + + if (shadfac[3]>0.0f && shi->spec!=0.0f && !(lar->mode & LA_NO_SPEC) && !(lar->mode & LA_ONLYSHADOW)) { + + if (!(passflag & (SCE_PASS_COMBINED | SCE_PASS_SPEC))) { + /* pass */ + } + else if (lar->type == LA_HEMI) { + float t; + /* hemi uses no spec shaders (yet) */ + + lv[0]+= view[0]; + lv[1]+= view[1]; + lv[2]+= view[2]; + + normalize_v3(lv); + + t= vn[0]*lv[0]+vn[1]*lv[1]+vn[2]*lv[2]; + + if (lar->type==LA_HEMI) { + t= 0.5f*t+0.5f; + } + + t= shadfac[3]*shi->spec*spec(t, shi->har); + + shr->spec[0]+= t*(lacol[0] * shi->specr); + shr->spec[1]+= t*(lacol[1] * shi->specg); + shr->spec[2]+= t*(lacol[2] * shi->specb); + } + else { + /* specular shaders */ + float specfac, t; + + if (ma->spec_shader==MA_SPEC_PHONG) + specfac= Phong_Spec(vn, lv, view, shi->har, (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V)); + else if (ma->spec_shader==MA_SPEC_COOKTORR) + specfac= CookTorr_Spec(vn, lv, view, shi->har, (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V)); + else if (ma->spec_shader==MA_SPEC_BLINN) + specfac= Blinn_Spec(vn, lv, view, ma->refrac, (float)shi->har, (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V)); + else if (ma->spec_shader==MA_SPEC_WARDISO) + specfac= WardIso_Spec( vn, lv, view, ma->rms, (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V)); + else + specfac= Toon_Spec(vn, lv, view, ma->param[2], ma->param[3], (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V)); + + /* area lamp correction */ + if (lar->type==LA_AREA) specfac*= inp; + + t= shadfac[3]*shi->spec*visifac*specfac; + + if (ma->mode & MA_RAMP_SPEC) { + float spec[3]; + do_specular_ramp(shi, specfac, t, spec); + shr->spec[0]+= t*(lacol[0] * spec[0]); + shr->spec[1]+= t*(lacol[1] * spec[1]); + shr->spec[2]+= t*(lacol[2] * spec[2]); + } + else { + shr->spec[0]+= t*(lacol[0] * shi->specr); + shr->spec[1]+= t*(lacol[1] * shi->specg); + shr->spec[2]+= t*(lacol[2] * shi->specb); + } + } + } + } +} + +static void shade_lamp_loop_only_shadow(ShadeInput *shi, ShadeResult *shr) +{ + + if (R.r.mode & R_SHADOW) { + ListBase *lights; + LampRen *lar; + GroupObject *go; + float inpr, lv[3]; + float /* *view, */ shadfac[4]; + float ir, accum, visifac, lampdist; + float shaded = 0.0f, lightness = 0.0f; + + + /* view= shi->view; */ /* UNUSED */ + accum= ir= 0.0f; + + lights= get_lights(shi); + for (go=lights->first; go; go= go->next) { + lar= go->lampren; + if (lar==NULL) continue; + + if (lar->mode & LA_LAYER) if ((lar->lay & shi->obi->lay)==0) continue; + if ((lar->lay & shi->lay)==0) continue; + + if (lar->shb || (lar->mode & LA_SHAD_RAY)) { + visifac= lamp_get_visibility(lar, shi->co, lv, &lampdist); + ir+= 1.0f; + + if (visifac <= 0.0f) { + if (shi->mat->shadowonly_flag == MA_SO_OLD) + accum+= 1.0f; + + continue; + } + inpr= dot_v3v3(shi->vn, lv); + if (inpr <= 0.0f) { + if (shi->mat->shadowonly_flag == MA_SO_OLD) + accum+= 1.0f; + + continue; + } + + lamp_get_shadow(lar, shi, inpr, shadfac, shi->depth); + + if (shi->mat->shadowonly_flag == MA_SO_OLD) { + /* Old "Shadows Only" */ + accum+= (1.0f-visifac) + (visifac)*IMB_colormanagement_get_luminance(shadfac)*shadfac[3]; + } + else { + shaded += IMB_colormanagement_get_luminance(shadfac)*shadfac[3] * visifac * lar->energy; + + if (shi->mat->shadowonly_flag == MA_SO_SHADOW) { + lightness += visifac * lar->energy; + } + } + } + } + + /* Apply shadows as alpha */ + if (ir>0.0f) { + if (shi->mat->shadowonly_flag == MA_SO_OLD) { + accum = 1.0f - accum/ir; + } + else { + if (shi->mat->shadowonly_flag == MA_SO_SHADOW) { + if (lightness > 0.0f) { + /* Get shadow value from between 0.0f and non-shadowed lightness */ + accum = (lightness - shaded) / (lightness); + } + else { + accum = 0.0f; + } + } + else { /* shadowonly_flag == MA_SO_SHADED */ + /* Use shaded value */ + accum = 1.0f - shaded; + } + } + + shr->alpha= (shi->alpha)*(accum); + if (shr->alpha<0.0f) shr->alpha=0.0f; + } + else { + /* If "fully shaded", use full alpha even on areas that have no lights */ + if (shi->mat->shadowonly_flag == MA_SO_SHADED) shr->alpha=shi->alpha; + else shr->alpha= 0.f; + } + } + + /* quite disputable this... also note it doesn't mirror-raytrace */ + if ((R.wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT)) && shi->amb!=0.0f) { + float f; + + if (R.wrld.mode & WO_AMB_OCC) { + f= R.wrld.aoenergy*shi->amb; + + if (R.wrld.aomix==WO_AOADD) { + if (shi->mat->shadowonly_flag == MA_SO_OLD) { + f= f*(1.0f - IMB_colormanagement_get_luminance(shi->ao)); + shr->alpha= (shr->alpha + f)*f; + } + else { + shr->alpha -= f*IMB_colormanagement_get_luminance(shi->ao); + if (shr->alpha<0.0f) shr->alpha=0.0f; + } + } + else /* AO Multiply */ + shr->alpha= (1.0f - f)*shr->alpha + f*(1.0f - (1.0f - shr->alpha)*IMB_colormanagement_get_luminance(shi->ao)); + } + + if (R.wrld.mode & WO_ENV_LIGHT) { + if (shi->mat->shadowonly_flag == MA_SO_OLD) { + f= R.wrld.ao_env_energy*shi->amb*(1.0f - IMB_colormanagement_get_luminance(shi->env)); + shr->alpha= (shr->alpha + f)*f; + } + else { + f= R.wrld.ao_env_energy*shi->amb; + shr->alpha -= f*IMB_colormanagement_get_luminance(shi->env); + if (shr->alpha<0.0f) shr->alpha=0.0f; + } + } + } +} + +/* let's map negative light as if it mirrors positive light, otherwise negative values disappear */ +static void wrld_exposure_correct(float diff[3]) +{ + + diff[0]= R.wrld.linfac*(1.0f-expf( diff[0]*R.wrld.logfac) ); + diff[1]= R.wrld.linfac*(1.0f-expf( diff[1]*R.wrld.logfac) ); + diff[2]= R.wrld.linfac*(1.0f-expf( diff[2]*R.wrld.logfac) ); +} + +void shade_lamp_loop(ShadeInput *shi, ShadeResult *shr) +{ + /* Passes which might need to know material color. + * + * It seems to be faster to just calculate material color + * even if the pass doesn't really need it than trying to + * figure out whether color is really needed or not. + */ + const int color_passes = + SCE_PASS_COMBINED | SCE_PASS_RGBA | SCE_PASS_DIFFUSE | SCE_PASS_SPEC | + SCE_PASS_REFLECT | SCE_PASS_NORMAL | SCE_PASS_REFRACT | SCE_PASS_EMIT | SCE_PASS_SHADOW; + + Material *ma= shi->mat; + int passflag= shi->passflag; + + memset(shr, 0, sizeof(ShadeResult)); + + if (!(shi->mode & MA_TRANSP)) shi->alpha = 1.0f; + + /* separate loop */ + if (ma->mode & MA_ONLYSHADOW) { + shade_lamp_loop_only_shadow(shi, shr); + return; + } + + /* envmap hack, always reset */ + shi->refcol[0]= shi->refcol[1]= shi->refcol[2]= shi->refcol[3]= 0.0f; + + /* material color itself */ + if (passflag & color_passes) { + if (ma->mode & (MA_FACETEXTURE)) { + shi->r= shi->vcol[0]; + shi->g= shi->vcol[1]; + shi->b= shi->vcol[2]; + if (ma->mode & (MA_FACETEXTURE_ALPHA)) + shi->alpha= shi->vcol[3]; + } +#ifdef WITH_FREESTYLE + else if (ma->vcol_alpha) { + shi->r= shi->vcol[0]; + shi->g= shi->vcol[1]; + shi->b= shi->vcol[2]; + shi->alpha= shi->vcol[3]; + } +#endif + else if (ma->mode & (MA_VERTEXCOLP)) { + float neg_alpha = 1.0f - shi->vcol[3]; + shi->r= shi->r*neg_alpha + shi->vcol[0]*shi->vcol[3]; + shi->g= shi->g*neg_alpha + shi->vcol[1]*shi->vcol[3]; + shi->b= shi->b*neg_alpha + shi->vcol[2]*shi->vcol[3]; + } + if (ma->texco) { + do_material_tex(shi, &R); + if (!(shi->mode & MA_TRANSP)) shi->alpha = 1.0f; + } + + shr->col[0]= shi->r*shi->alpha; + shr->col[1]= shi->g*shi->alpha; + shr->col[2]= shi->b*shi->alpha; + shr->col[3]= shi->alpha; + + if ((ma->sss_flag & MA_DIFF_SSS) && !sss_pass_done(&R, ma)) { + if (ma->sss_texfac == 0.0f) { + shi->r= shi->g= shi->b= shi->alpha= 1.0f; + shr->col[0]= shr->col[1]= shr->col[2]= shr->col[3]= 1.0f; + } + else { + shi->r= pow(max_ff(shi->r, 0.0f), ma->sss_texfac); + shi->g= pow(max_ff(shi->g, 0.0f), ma->sss_texfac); + shi->b= pow(max_ff(shi->b, 0.0f), ma->sss_texfac); + shi->alpha= pow(max_ff(shi->alpha, 0.0f), ma->sss_texfac); + + shr->col[0]= pow(max_ff(shr->col[0], 0.0f), ma->sss_texfac); + shr->col[1]= pow(max_ff(shr->col[1], 0.0f), ma->sss_texfac); + shr->col[2]= pow(max_ff(shr->col[2], 0.0f), ma->sss_texfac); + shr->col[3]= pow(max_ff(shr->col[3], 0.0f), ma->sss_texfac); + } + } + } + + if (ma->mode & MA_SHLESS) { + shr->combined[0]= shi->r; + shr->combined[1]= shi->g; + shr->combined[2]= shi->b; + shr->alpha= shi->alpha; + goto finally_shadeless; + } + + if ( (ma->mode & (MA_VERTEXCOL|MA_VERTEXCOLP))== MA_VERTEXCOL ) { /* vertexcolor light */ + shr->emit[0]= shi->r*(shi->emit+shi->vcol[0]*shi->vcol[3]); + shr->emit[1]= shi->g*(shi->emit+shi->vcol[1]*shi->vcol[3]); + shr->emit[2]= shi->b*(shi->emit+shi->vcol[2]*shi->vcol[3]); + } + else { + shr->emit[0]= shi->r*shi->emit; + shr->emit[1]= shi->g*shi->emit; + shr->emit[2]= shi->b*shi->emit; + } + + /* AO pass */ + if (((passflag & SCE_PASS_COMBINED) && (shi->combinedflag & (SCE_PASS_AO|SCE_PASS_ENVIRONMENT|SCE_PASS_INDIRECT))) || + (passflag & (SCE_PASS_AO|SCE_PASS_ENVIRONMENT|SCE_PASS_INDIRECT))) { + if ((R.wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) && (R.r.mode & R_SHADOW)) { + /* AO was calculated for scanline already */ + if (shi->depth || shi->volume_depth) + ambient_occlusion(shi); + copy_v3_v3(shr->ao, shi->ao); + copy_v3_v3(shr->env, shi->env); /* XXX multiply */ + copy_v3_v3(shr->indirect, shi->indirect); /* XXX multiply */ + } + else { + shr->ao[0]= shr->ao[1]= shr->ao[2]= 1.0f; + zero_v3(shr->env); + zero_v3(shr->indirect); + } + } + + /* lighting pass */ + if (passflag & (SCE_PASS_COMBINED|SCE_PASS_DIFFUSE|SCE_PASS_SPEC|SCE_PASS_SHADOW)) { + GroupObject *go; + ListBase *lights; + LampRen *lar; + + lights= get_lights(shi); + for (go=lights->first; go; go= go->next) { + lar= go->lampren; + if (lar==NULL) continue; + + /* test for lamp layer */ + if (lar->mode & LA_LAYER) if ((lar->lay & shi->obi->lay)==0) continue; + if ((lar->lay & shi->lay)==0) continue; + + /* accumulates in shr->diff and shr->spec and shr->shad (diffuse with shadow!) */ + shade_one_light(lar, shi, shr, passflag); + } + + /* this check is to prevent only shadow lamps from producing negative + * colors.*/ + if (shr->spec[0] < 0) shr->spec[0] = 0; + if (shr->spec[1] < 0) shr->spec[1] = 0; + if (shr->spec[2] < 0) shr->spec[2] = 0; + + if (shr->shad[0] < 0) shr->shad[0] = 0; + if (shr->shad[1] < 0) shr->shad[1] = 0; + if (shr->shad[2] < 0) shr->shad[2] = 0; + + if (ma->sss_flag & MA_DIFF_SSS) { + float sss[3], col[3], invalpha, texfac= ma->sss_texfac; + + /* this will return false in the preprocess stage */ + if (sample_sss(&R, ma, shi->co, sss)) { + invalpha= (shr->col[3] > FLT_EPSILON)? 1.0f/shr->col[3]: 1.0f; + + if (texfac==0.0f) { + copy_v3_v3(col, shr->col); + mul_v3_fl(col, invalpha); + } + else if (texfac==1.0f) { + col[0]= col[1]= col[2]= 1.0f; + mul_v3_fl(col, invalpha); + } + else { + copy_v3_v3(col, shr->col); + mul_v3_fl(col, invalpha); + col[0]= pow(max_ff(col[0], 0.0f), 1.0f-texfac); + col[1]= pow(max_ff(col[1], 0.0f), 1.0f-texfac); + col[2]= pow(max_ff(col[2], 0.0f), 1.0f-texfac); + } + + shr->diff[0]= sss[0]*col[0]; + shr->diff[1]= sss[1]*col[1]; + shr->diff[2]= sss[2]*col[2]; + + if (shi->combinedflag & SCE_PASS_SHADOW) { + shr->shad[0]= shr->diff[0]; + shr->shad[1]= shr->diff[1]; + shr->shad[2]= shr->diff[2]; + } + } + } + + if (shi->combinedflag & SCE_PASS_SHADOW) + copy_v3_v3(shr->diffshad, shr->shad); + else + copy_v3_v3(shr->diffshad, shr->diff); + + copy_v3_v3(shr->combined, shr->diffshad); + + /* calculate shadow pass, we use a multiplication mask */ + /* Even if diff = 0,0,0, it does matter what the shadow pass is, since we may want it 'for itself'! */ + if (passflag & SCE_PASS_SHADOW) { + if (shr->diff[0]!=0.0f) shr->shad[0]= shr->shad[0]/shr->diff[0]; + /* can't determine proper shadow from shad/diff (0/0), so use shadow intensity */ + else if (shr->shad[0]==0.0f) shr->shad[0]= shr->shad[3]; + + if (shr->diff[1]!=0.0f) shr->shad[1]= shr->shad[1]/shr->diff[1]; + else if (shr->shad[1]==0.0f) shr->shad[1]= shr->shad[3]; + + if (shr->diff[2]!=0.0f) shr->shad[2]= shr->shad[2]/shr->diff[2]; + else if (shr->shad[2]==0.0f) shr->shad[2]= shr->shad[3]; + } + + /* exposure correction */ + if ((R.wrld.exp!=0.0f || R.wrld.range!=1.0f) && !R.sss_points) { + wrld_exposure_correct(shr->combined); /* has no spec! */ + wrld_exposure_correct(shr->spec); + } + } + + /* alpha in end, spec can influence it */ + if (passflag & (SCE_PASS_COMBINED)) { + if ((ma->fresnel_tra!=0.0f) && (shi->mode & MA_TRANSP)) + shi->alpha*= fresnel_fac(shi->view, shi->vn, ma->fresnel_tra_i, ma->fresnel_tra); + + /* note: shi->mode! */ + if (shi->mode & MA_TRANSP && (shi->mode & (MA_ZTRANSP|MA_RAYTRANSP))) { + if (shi->spectra!=0.0f) { + float t = max_fff(shr->spec[0], shr->spec[1], shr->spec[2]); + t *= shi->spectra; + if (t>1.0f) t= 1.0f; + shi->alpha= (1.0f-t)*shi->alpha+t; + } + } + } + shr->alpha= shi->alpha; + + /* from now stuff everything in shr->combined: ambient, AO, ramps, exposure */ + if (!(ma->sss_flag & MA_DIFF_SSS) || !sss_pass_done(&R, ma)) { + if (R.r.mode & R_SHADOW) { + /* add AO in combined? */ + if (R.wrld.mode & WO_AMB_OCC) + if (shi->combinedflag & SCE_PASS_AO) + ambient_occlusion_apply(shi, shr); + + if (R.wrld.mode & WO_ENV_LIGHT) + if (shi->combinedflag & SCE_PASS_ENVIRONMENT) + environment_lighting_apply(shi, shr); + + if (R.wrld.mode & WO_INDIRECT_LIGHT) + if (shi->combinedflag & SCE_PASS_INDIRECT) + indirect_lighting_apply(shi, shr); + } + + shr->combined[0]+= shi->ambr; + shr->combined[1]+= shi->ambg; + shr->combined[2]+= shi->ambb; + + if (ma->mode & MA_RAMP_COL) ramp_diffuse_result(shr->combined, shi); + } + + if (ma->mode & MA_RAMP_SPEC) ramp_spec_result(shr->spec, shi); + + /* refcol is for envmap only */ + if (shi->refcol[0]!=0.0f) { + float result[3]; + + result[0]= shi->mirr*shi->refcol[1] + (1.0f - shi->mirr*shi->refcol[0])*shr->combined[0]; + result[1]= shi->mirg*shi->refcol[2] + (1.0f - shi->mirg*shi->refcol[0])*shr->combined[1]; + result[2]= shi->mirb*shi->refcol[3] + (1.0f - shi->mirb*shi->refcol[0])*shr->combined[2]; + + if (passflag & SCE_PASS_REFLECT) + sub_v3_v3v3(shr->refl, result, shr->combined); + + if (shi->combinedflag & SCE_PASS_REFLECT) + copy_v3_v3(shr->combined, result); + + } + + /* and add emit and spec */ + if (shi->combinedflag & SCE_PASS_EMIT) + add_v3_v3(shr->combined, shr->emit); + if (shi->combinedflag & SCE_PASS_SPEC) + add_v3_v3(shr->combined, shr->spec); + + + /* Last section of this function applies to shadeless colors too */ +finally_shadeless: + + /* modulate by the object color */ + if ((ma->shade_flag & MA_OBCOLOR) && shi->obr->ob) { + if (!(ma->sss_flag & MA_DIFF_SSS) || !sss_pass_done(&R, ma)) { + float obcol[4]; + + copy_v4_v4(obcol, shi->obr->ob->col); + CLAMP(obcol[3], 0.0f, 1.0f); + + shr->combined[0] *= obcol[0]; + shr->combined[1] *= obcol[1]; + shr->combined[2] *= obcol[2]; + if (shi->mode & MA_TRANSP) shr->alpha *= obcol[3]; + } + } + + shr->combined[3]= shr->alpha; +} + +/* used for "Lamp Data" shader node */ +static float lamp_get_data_internal(ShadeInput *shi, GroupObject *go, float col[4], float lv[3], float *dist, float shadow[4]) +{ + LampRen *lar = go->lampren; + float visifac, inp; + + if (!lar + || ((lar->mode & LA_LAYER) && (lar->lay & shi->obi->lay) == 0) + || (lar->lay & shi->lay) == 0) + return 0.0f; + + if (lar->mode & LA_TEXTURE) + do_lamp_tex(lar, lv, shi, col, LA_TEXTURE); + + visifac = lamp_get_visibility(lar, shi->co, lv, dist); + + if (visifac == 0.0f + || lar->type == LA_HEMI + || (lar->type != LA_SPOT && !(lar->mode & LA_SHAD_RAY)) + || (R.r.scemode & R_BUTS_PREVIEW)) + return visifac; + + inp = dot_v3v3(shi->vn, lv); + + if (inp > 0.0f) { + float shadfac[4]; + + shadow[0] = lar->shdwr; + shadow[1] = lar->shdwg; + shadow[2] = lar->shdwb; + + if (lar->mode & LA_SHAD_TEX) + do_lamp_tex(lar, lv, shi, shadow, LA_SHAD_TEX); + + if (R.r.mode & R_SHADOW) { + lamp_get_shadow(lar, shi, inp, shadfac, shi->depth); + + shadow[0] = 1.0f - ((1.0f - shadfac[0] * shadfac[3]) * (1.0f - shadow[0])); + shadow[1] = 1.0f - ((1.0f - shadfac[1] * shadfac[3]) * (1.0f - shadow[1])); + shadow[2] = 1.0f - ((1.0f - shadfac[2] * shadfac[3]) * (1.0f - shadow[2])); + } + } + + return visifac; +} + +float RE_lamp_get_data(ShadeInput *shi, Object *lamp_obj, float col[4], float lv[3], float *dist, float shadow[4]) +{ + col[0] = col[1] = col[2] = 0.0f; + col[3] = 1.0f; + copy_v3_v3(lv, shi->vn); + *dist = 1.0f; + shadow[0] = shadow[1] = shadow[2] = shadow[3] = 1.0f; + + if (lamp_obj->type == OB_LAMP) { + GroupObject *go; + Lamp *lamp = (Lamp *)lamp_obj->data; + + col[0] = lamp->r * lamp->energy; + col[1] = lamp->g * lamp->energy; + col[2] = lamp->b * lamp->energy; + + if (R.r.scemode & R_BUTS_PREVIEW) { + for (go = R.lights.first; go; go = go->next) { + /* "Lamp.002" is main key light of material preview */ + if (STREQ(go->ob->id.name + 2, "Lamp.002")) + return lamp_get_data_internal(shi, go, col, lv, dist, shadow); + } + return 0.0f; + } + + if (shi->light_override) { + for (go = shi->light_override->gobject.first; go; go = go->next) { + if (go->ob == lamp_obj) + return lamp_get_data_internal(shi, go, col, lv, dist, shadow); + } + } + + if (shi->mat && shi->mat->group) { + for (go = shi->mat->group->gobject.first; go; go = go->next) { + if (go->ob == lamp_obj) + return lamp_get_data_internal(shi, go, col, lv, dist, shadow); + } + } + + for (go = R.lights.first; go; go = go->next) { + if (go->ob == lamp_obj) + return lamp_get_data_internal(shi, go, col, lv, dist, shadow); + } + } + + return 0.0f; +} + +const float (*RE_object_instance_get_matrix(struct ObjectInstanceRen *obi, int matrix_id))[4] +{ + if (obi) { + switch (matrix_id) { + case RE_OBJECT_INSTANCE_MATRIX_OB: + return (const float(*)[4])obi->obmat; + case RE_OBJECT_INSTANCE_MATRIX_OBINV: + return (const float(*)[4])obi->obinvmat; + case RE_OBJECT_INSTANCE_MATRIX_LOCALTOVIEW: + return (const float(*)[4])obi->localtoviewmat; + case RE_OBJECT_INSTANCE_MATRIX_LOCALTOVIEWINV: + return (const float(*)[4])obi->localtoviewinvmat; + } + } + return NULL; +} + +float RE_object_instance_get_object_pass_index(struct ObjectInstanceRen *obi) +{ + return obi->ob->index; +} + +float RE_object_instance_get_random_id(struct ObjectInstanceRen *obi) +{ + return obi->random_id; +} + +const float (*RE_render_current_get_matrix(int matrix_id))[4] +{ + switch (matrix_id) { + case RE_VIEW_MATRIX: + return (const float(*)[4])R.viewmat; + case RE_VIEWINV_MATRIX: + return (const float(*)[4])R.viewinv; + } + return NULL; +} + +float RE_fresnel_dielectric(float incoming[3], float normal[3], float eta) +{ + /* compute fresnel reflectance without explicitly computing + * the refracted direction */ + float c = fabs(dot_v3v3(incoming, normal)); + float g = eta * eta - 1.0 + c * c; + float result; + + if (g > 0.0) { + g = sqrtf(g); + float A = (g - c) / (g + c); + float B = (c * (g + c) - 1.0) / (c * (g - c) + 1.0); + result = 0.5 * A * A * (1.0 + B * B); + } + else { + result = 1.0; /* TIR (no refracted component) */ + } + + return result; +} diff --git a/source/blender/render/intern/source/sss.c b/source/blender/render/intern/source/sss.c new file mode 100644 index 00000000000..5919b8130d7 --- /dev/null +++ b/source/blender/render/intern/source/sss.c @@ -0,0 +1,1074 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2007 Blender Foundation. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): none yet. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/sss.c + * \ingroup render + */ + +/* Possible Improvements: + * - add fresnel terms + * - adapt Rd table to scale, now with small scale there are a lot of misses? + * - possible interesting method: perform sss on all samples in the tree, + * and then use those values interpolated somehow later. can also do this + * filtering on demand for speed. since we are doing things in screen + * space now there is an exact correspondence + * - avoid duplicate shading (filtering points in advance, irradiance cache + * like lookup?) + * - lower resolution samples + */ + +#include <math.h> +#include <string.h> +#include <stdio.h> +#include <string.h> + +/* external modules: */ +#include "MEM_guardedalloc.h" + +#include "BLI_math.h" +#include "BLI_blenlib.h" +#include "BLI_utildefines.h" +#include "BLI_ghash.h" +#include "BLI_memarena.h" + +#include "BLT_translation.h" + + +#include "DNA_material_types.h" + +#include "BKE_global.h" +#include "BKE_main.h" +#include "BKE_scene.h" + + +/* this module */ +#include "render_types.h" +#include "sss.h" + +/* Generic Multiple Scattering API */ + +/* Relevant papers: + * [1] A Practical Model for Subsurface Light Transport + * [2] A Rapid Hierarchical Rendering Technique for Translucent Materials + * [3] Efficient Rendering of Local Subsurface Scattering + * [4] Implementing a skin BSSRDF (or several...) + */ + +/* Defines */ + +#define RD_TABLE_RANGE 100.0f +#define RD_TABLE_RANGE_2 10000.0f +#define RD_TABLE_SIZE 10000 + +#define MAX_OCTREE_NODE_POINTS 8 +#define MAX_OCTREE_DEPTH 15 + +/* Struct Definitions */ + +struct ScatterSettings { + float eta; /* index of refraction */ + float sigma_a; /* absorption coefficient */ + float sigma_s_; /* reduced scattering coefficient */ + float sigma_t_; /* reduced extinction coefficient */ + float sigma; /* effective extinction coefficient */ + float Fdr; /* diffuse fresnel reflectance */ + float D; /* diffusion constant */ + float A; + float alpha_; /* reduced albedo */ + float zr; /* distance of virtual lightsource above surface */ + float zv; /* distance of virtual lightsource below surface */ + float ld; /* mean free path */ + float ro; /* diffuse reflectance */ + float color; + float invsigma_t_; + float frontweight; + float backweight; + + float *tableRd; /* lookup table to avoid computing Rd */ + float *tableRd2; /* lookup table to avoid computing Rd for bigger values */ +}; + +typedef struct ScatterPoint { + float co[3]; + float rad[3]; + float area; + int back; +} ScatterPoint; + +typedef struct ScatterNode { + float co[3]; + float rad[3]; + float backrad[3]; + float area, backarea; + + int totpoint; + ScatterPoint *points; + + float split[3]; + struct ScatterNode *child[8]; +} ScatterNode; + +struct ScatterTree { + MemArena *arena; + + ScatterSettings *ss[3]; + float error, scale; + + ScatterNode *root; + ScatterPoint *points; + ScatterPoint **refpoints; + ScatterPoint **tmppoints; + int totpoint; + float min[3], max[3]; +}; + +typedef struct ScatterResult { + float rad[3]; + float backrad[3]; + float rdsum[3]; + float backrdsum[3]; +} ScatterResult; + +/* Functions for BSSRDF reparametrization in to more intuitive parameters, + * see [2] section 4 for more info. */ + +static float f_Rd(float alpha_, float A, float ro) +{ + float sq; + + sq = sqrtf(3.0f * (1.0f - alpha_)); + return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro; +} + +static float compute_reduced_albedo(ScatterSettings *ss) +{ + const float tolerance= 1e-8; + const int max_iteration_count= 20; + float d, fsub, xn_1= 0.0f, xn= 1.0f, fxn, fxn_1; + int i; + + /* use secant method to compute reduced albedo using Rd function inverse + * with a given reflectance */ + fxn= f_Rd(xn, ss->A, ss->ro); + fxn_1= f_Rd(xn_1, ss->A, ss->ro); + + for (i= 0; i < max_iteration_count; i++) { + fsub= (fxn - fxn_1); + if (fabsf(fsub) < tolerance) + break; + d= ((xn - xn_1)/fsub)*fxn; + if (fabsf(d) < tolerance) + break; + + xn_1= xn; + fxn_1= fxn; + xn= xn - d; + + if (xn > 1.0f) xn= 1.0f; + if (xn_1 > 1.0f) xn_1= 1.0f; + + fxn= f_Rd(xn, ss->A, ss->ro); + } + + /* avoid division by zero later */ + if (xn <= 0.0f) + xn= 0.00001f; + + return xn; +} + +/* Exponential falloff functions */ + +static float Rd_rsquare(ScatterSettings *ss, float rr) +{ + float sr, sv, Rdr, Rdv; + + sr = sqrtf(rr + ss->zr * ss->zr); + sv = sqrtf(rr + ss->zv * ss->zv); + + Rdr= ss->zr*(1.0f + ss->sigma*sr)*expf(-ss->sigma*sr)/(sr*sr*sr); + Rdv= ss->zv*(1.0f + ss->sigma*sv)*expf(-ss->sigma*sv)/(sv*sv*sv); + + return /*ss->alpha_*/(1.0f/(4.0f*(float)M_PI))*(Rdr + Rdv); +} + +static float Rd(ScatterSettings *ss, float r) +{ + return Rd_rsquare(ss, r*r); +} + +/* table lookups for Rd. this avoids expensive exp calls. we use two + * separate tables as well for lower and higher numbers to improve + * precision, since the number are poorly distributed because we do + * a lookup with the squared distance for smaller distances, saving + * another sqrt. */ + +static void approximate_Rd_rgb(ScatterSettings **ss, float rr, float *rd) +{ + float indexf, t, idxf; + int index; + + if (rr > (RD_TABLE_RANGE_2 * RD_TABLE_RANGE_2)) { + /* pass */ + } + else if (rr > RD_TABLE_RANGE) { + rr = sqrtf(rr); + indexf= rr*(RD_TABLE_SIZE/RD_TABLE_RANGE_2); + index= (int)indexf; + idxf= (float)index; + t= indexf - idxf; + + if (index >= 0 && index < RD_TABLE_SIZE) { + rd[0]= (ss[0]->tableRd2[index]*(1-t) + ss[0]->tableRd2[index+1]*t); + rd[1]= (ss[1]->tableRd2[index]*(1-t) + ss[1]->tableRd2[index+1]*t); + rd[2]= (ss[2]->tableRd2[index]*(1-t) + ss[2]->tableRd2[index+1]*t); + return; + } + } + else { + indexf= rr*(RD_TABLE_SIZE/RD_TABLE_RANGE); + index= (int)indexf; + idxf= (float)index; + t= indexf - idxf; + + if (index >= 0 && index < RD_TABLE_SIZE) { + rd[0]= (ss[0]->tableRd[index]*(1-t) + ss[0]->tableRd[index+1]*t); + rd[1]= (ss[1]->tableRd[index]*(1-t) + ss[1]->tableRd[index+1]*t); + rd[2]= (ss[2]->tableRd[index]*(1-t) + ss[2]->tableRd[index+1]*t); + return; + } + } + + /* fallback to slow Rd computation */ + rd[0]= Rd_rsquare(ss[0], rr); + rd[1]= Rd_rsquare(ss[1], rr); + rd[2]= Rd_rsquare(ss[2], rr); +} + +static void build_Rd_table(ScatterSettings *ss) +{ + float r; + int i, size = RD_TABLE_SIZE+1; + + ss->tableRd= MEM_mallocN(sizeof(float)*size, "scatterTableRd"); + ss->tableRd2= MEM_mallocN(sizeof(float)*size, "scatterTableRd"); + + for (i= 0; i < size; i++) { + r= i*(RD_TABLE_RANGE/RD_TABLE_SIZE); +#if 0 + if (r < ss->invsigma_t_*ss->invsigma_t_) { + r= ss->invsigma_t_*ss->invsigma_t_; + } +#endif + ss->tableRd[i]= Rd(ss, sqrtf(r)); + + r= i*(RD_TABLE_RANGE_2/RD_TABLE_SIZE); +#if 0 + if (r < ss->invsigma_t_) { + r= ss->invsigma_t_; + } +#endif + ss->tableRd2[i]= Rd(ss, r); + } +} + +ScatterSettings *scatter_settings_new(float refl, float radius, float ior, float reflfac, float frontweight, float backweight) +{ + ScatterSettings *ss; + + ss= MEM_callocN(sizeof(ScatterSettings), "ScatterSettings"); + + /* see [1] and [3] for these formulas */ + ss->eta= ior; + ss->Fdr= -1.440f/ior*ior + 0.710f/ior + 0.668f + 0.0636f*ior; + ss->A= (1.0f + ss->Fdr)/(1.0f - ss->Fdr); + ss->ld= radius; + ss->ro= min_ff(refl, 0.99f); + ss->color= ss->ro*reflfac + (1.0f-reflfac); + + ss->alpha_= compute_reduced_albedo(ss); + + ss->sigma= 1.0f/ss->ld; + ss->sigma_t_= ss->sigma/sqrtf(3.0f*(1.0f - ss->alpha_)); + ss->sigma_s_= ss->alpha_*ss->sigma_t_; + ss->sigma_a= ss->sigma_t_ - ss->sigma_s_; + + ss->D= 1.0f/(3.0f*ss->sigma_t_); + + ss->zr= 1.0f/ss->sigma_t_; + ss->zv= ss->zr + 4.0f*ss->A*ss->D; + + ss->invsigma_t_= 1.0f/ss->sigma_t_; + + ss->frontweight= frontweight; + ss->backweight= backweight; + + /* precompute a table of Rd values for quick lookup */ + build_Rd_table(ss); + + return ss; +} + +void scatter_settings_free(ScatterSettings *ss) +{ + MEM_freeN(ss->tableRd); + MEM_freeN(ss->tableRd2); + MEM_freeN(ss); +} + +/* Hierarchical method as in [2]. */ + +/* traversal */ + +#define SUBNODE_INDEX(co, split) \ + ((co[0]>=split[0]) + (co[1]>=split[1])*2 + (co[2]>=split[2])*4) + +static void add_radiance(ScatterTree *tree, float *frontrad, float *backrad, float area, float backarea, float rr, ScatterResult *result) +{ + float rd[3], frontrd[3], backrd[3]; + + approximate_Rd_rgb(tree->ss, rr, rd); + + if (frontrad && area) { + frontrd[0] = rd[0]*area; + frontrd[1] = rd[1]*area; + frontrd[2] = rd[2]*area; + + result->rad[0] += frontrad[0]*frontrd[0]; + result->rad[1] += frontrad[1]*frontrd[1]; + result->rad[2] += frontrad[2]*frontrd[2]; + + result->rdsum[0] += frontrd[0]; + result->rdsum[1] += frontrd[1]; + result->rdsum[2] += frontrd[2]; + } + if (backrad && backarea) { + backrd[0] = rd[0]*backarea; + backrd[1] = rd[1]*backarea; + backrd[2] = rd[2]*backarea; + + result->backrad[0] += backrad[0]*backrd[0]; + result->backrad[1] += backrad[1]*backrd[1]; + result->backrad[2] += backrad[2]*backrd[2]; + + result->backrdsum[0] += backrd[0]; + result->backrdsum[1] += backrd[1]; + result->backrdsum[2] += backrd[2]; + } +} + +static void traverse_octree(ScatterTree *tree, ScatterNode *node, const float co[3], int self, ScatterResult *result) +{ + float sub[3], dist; + int i, index = 0; + + if (node->totpoint > 0) { + /* leaf - add radiance from all samples */ + for (i=0; i<node->totpoint; i++) { + ScatterPoint *p= &node->points[i]; + + sub_v3_v3v3(sub, co, p->co); + dist= dot_v3v3(sub, sub); + + if (p->back) + add_radiance(tree, NULL, p->rad, 0.0f, p->area, dist, result); + else + add_radiance(tree, p->rad, NULL, p->area, 0.0f, dist, result); + } + } + else { + /* branch */ + if (self) + index = SUBNODE_INDEX(co, node->split); + + for (i=0; i<8; i++) { + if (node->child[i]) { + ScatterNode *subnode= node->child[i]; + + if (self && index == i) { + /* always traverse node containing the point */ + traverse_octree(tree, subnode, co, 1, result); + } + else { + /* decide subnode traversal based on maximum solid angle */ + sub_v3_v3v3(sub, co, subnode->co); + dist= dot_v3v3(sub, sub); + + /* actually area/dist > error, but this avoids division */ + if (subnode->area+subnode->backarea>tree->error*dist) { + traverse_octree(tree, subnode, co, 0, result); + } + else { + add_radiance(tree, subnode->rad, subnode->backrad, + subnode->area, subnode->backarea, dist, result); + } + } + } + } + } +} + +static void compute_radiance(ScatterTree *tree, const float co[3], float *rad) +{ + ScatterResult result; + float rdsum[3], backrad[3], backrdsum[3]; + + memset(&result, 0, sizeof(result)); + + traverse_octree(tree, tree->root, co, 1, &result); + + /* the original paper doesn't do this, but we normalize over the + * sampled area and multiply with the reflectance. this is because + * our point samples are incomplete, there are no samples on parts + * of the mesh not visible from the camera. this can not only make + * it darker, but also lead to ugly color shifts */ + + mul_v3_fl(result.rad, tree->ss[0]->frontweight); + mul_v3_fl(result.backrad, tree->ss[0]->backweight); + + copy_v3_v3(rad, result.rad); + add_v3_v3v3(backrad, result.rad, result.backrad); + + copy_v3_v3(rdsum, result.rdsum); + add_v3_v3v3(backrdsum, result.rdsum, result.backrdsum); + + if (rdsum[0] > 1e-16f) rad[0]= tree->ss[0]->color*rad[0]/rdsum[0]; + if (rdsum[1] > 1e-16f) rad[1]= tree->ss[1]->color*rad[1]/rdsum[1]; + if (rdsum[2] > 1e-16f) rad[2]= tree->ss[2]->color*rad[2]/rdsum[2]; + + if (backrdsum[0] > 1e-16f) backrad[0]= tree->ss[0]->color*backrad[0]/backrdsum[0]; + if (backrdsum[1] > 1e-16f) backrad[1]= tree->ss[1]->color*backrad[1]/backrdsum[1]; + if (backrdsum[2] > 1e-16f) backrad[2]= tree->ss[2]->color*backrad[2]/backrdsum[2]; + + rad[0]= MAX2(rad[0], backrad[0]); + rad[1]= MAX2(rad[1], backrad[1]); + rad[2]= MAX2(rad[2], backrad[2]); +} + +/* building */ + +static void sum_leaf_radiance(ScatterTree *UNUSED(tree), ScatterNode *node) +{ + ScatterPoint *p; + float rad, totrad= 0.0f, inv; + int i; + + node->co[0]= node->co[1]= node->co[2]= 0.0; + node->rad[0]= node->rad[1]= node->rad[2]= 0.0; + node->backrad[0]= node->backrad[1]= node->backrad[2]= 0.0; + + /* compute total rad, rad weighted average position, + * and total area */ + for (i=0; i<node->totpoint; i++) { + p= &node->points[i]; + + rad= p->area*fabsf(p->rad[0] + p->rad[1] + p->rad[2]); + totrad += rad; + + node->co[0] += rad*p->co[0]; + node->co[1] += rad*p->co[1]; + node->co[2] += rad*p->co[2]; + + if (p->back) { + node->backrad[0] += p->rad[0]*p->area; + node->backrad[1] += p->rad[1]*p->area; + node->backrad[2] += p->rad[2]*p->area; + + node->backarea += p->area; + } + else { + node->rad[0] += p->rad[0]*p->area; + node->rad[1] += p->rad[1]*p->area; + node->rad[2] += p->rad[2]*p->area; + + node->area += p->area; + } + } + + if (node->area > 1e-16f) { + inv= 1.0f/node->area; + node->rad[0] *= inv; + node->rad[1] *= inv; + node->rad[2] *= inv; + } + if (node->backarea > 1e-16f) { + inv= 1.0f/node->backarea; + node->backrad[0] *= inv; + node->backrad[1] *= inv; + node->backrad[2] *= inv; + } + + if (totrad > 1e-16f) { + inv= 1.0f/totrad; + node->co[0] *= inv; + node->co[1] *= inv; + node->co[2] *= inv; + } + else { + /* make sure that if radiance is 0.0f, we still have these points in + * the tree at a good position, they count for rdsum too */ + for (i=0; i<node->totpoint; i++) { + p= &node->points[i]; + + node->co[0] += p->co[0]; + node->co[1] += p->co[1]; + node->co[2] += p->co[2]; + } + + node->co[0] /= node->totpoint; + node->co[1] /= node->totpoint; + node->co[2] /= node->totpoint; + } +} + +static void sum_branch_radiance(ScatterTree *UNUSED(tree), ScatterNode *node) +{ + ScatterNode *subnode; + float rad, totrad= 0.0f, inv; + int i, totnode; + + node->co[0]= node->co[1]= node->co[2]= 0.0; + node->rad[0]= node->rad[1]= node->rad[2]= 0.0; + node->backrad[0]= node->backrad[1]= node->backrad[2]= 0.0; + + /* compute total rad, rad weighted average position, + * and total area */ + for (i=0; i<8; i++) { + if (node->child[i] == NULL) + continue; + + subnode= node->child[i]; + + rad= subnode->area*fabsf(subnode->rad[0] + subnode->rad[1] + subnode->rad[2]); + rad += subnode->backarea*fabsf(subnode->backrad[0] + subnode->backrad[1] + subnode->backrad[2]); + totrad += rad; + + node->co[0] += rad*subnode->co[0]; + node->co[1] += rad*subnode->co[1]; + node->co[2] += rad*subnode->co[2]; + + node->rad[0] += subnode->rad[0]*subnode->area; + node->rad[1] += subnode->rad[1]*subnode->area; + node->rad[2] += subnode->rad[2]*subnode->area; + + node->backrad[0] += subnode->backrad[0]*subnode->backarea; + node->backrad[1] += subnode->backrad[1]*subnode->backarea; + node->backrad[2] += subnode->backrad[2]*subnode->backarea; + + node->area += subnode->area; + node->backarea += subnode->backarea; + } + + if (node->area > 1e-16f) { + inv= 1.0f/node->area; + node->rad[0] *= inv; + node->rad[1] *= inv; + node->rad[2] *= inv; + } + if (node->backarea > 1e-16f) { + inv= 1.0f/node->backarea; + node->backrad[0] *= inv; + node->backrad[1] *= inv; + node->backrad[2] *= inv; + } + + if (totrad > 1e-16f) { + inv= 1.0f/totrad; + node->co[0] *= inv; + node->co[1] *= inv; + node->co[2] *= inv; + } + else { + /* make sure that if radiance is 0.0f, we still have these points in + * the tree at a good position, they count for rdsum too */ + totnode= 0; + + for (i=0; i<8; i++) { + if (node->child[i]) { + subnode= node->child[i]; + + node->co[0] += subnode->co[0]; + node->co[1] += subnode->co[1]; + node->co[2] += subnode->co[2]; + + totnode++; + } + } + + node->co[0] /= totnode; + node->co[1] /= totnode; + node->co[2] /= totnode; + } +} + +static void sum_radiance(ScatterTree *tree, ScatterNode *node) +{ + if (node->totpoint > 0) { + sum_leaf_radiance(tree, node); + } + else { + int i; + + for (i=0; i<8; i++) + if (node->child[i]) + sum_radiance(tree, node->child[i]); + + sum_branch_radiance(tree, node); + } +} + +static void subnode_middle(int i, float *mid, float *subsize, float *submid) +{ + int x= i & 1, y= i & 2, z= i & 4; + + submid[0]= mid[0] + ((x)? subsize[0]: -subsize[0]); + submid[1]= mid[1] + ((y)? subsize[1]: -subsize[1]); + submid[2]= mid[2] + ((z)? subsize[2]: -subsize[2]); +} + +static void create_octree_node(ScatterTree *tree, ScatterNode *node, float *mid, float *size, ScatterPoint **refpoints, int depth) +{ + ScatterNode *subnode; + ScatterPoint **subrefpoints, **tmppoints= tree->tmppoints; + int index, nsize[8], noffset[8], i, subco, used_nodes, usedi; + float submid[3], subsize[3]; + + /* stopping condition */ + if (node->totpoint <= MAX_OCTREE_NODE_POINTS || depth == MAX_OCTREE_DEPTH) { + for (i=0; i<node->totpoint; i++) + node->points[i]= *(refpoints[i]); + + return; + } + + subsize[0]= size[0]*0.5f; + subsize[1]= size[1]*0.5f; + subsize[2]= size[2]*0.5f; + + node->split[0]= mid[0]; + node->split[1]= mid[1]; + node->split[2]= mid[2]; + + memset(nsize, 0, sizeof(nsize)); + memset(noffset, 0, sizeof(noffset)); + + /* count points in subnodes */ + for (i=0; i<node->totpoint; i++) { + index= SUBNODE_INDEX(refpoints[i]->co, node->split); + tmppoints[i]= refpoints[i]; + nsize[index]++; + } + + /* here we check if only one subnode is used. if this is the case, we don't + * create a new node, but rather call this function again, with different + * size and middle position for the same node. */ + for (usedi=0, used_nodes=0, i=0; i<8; i++) { + if (nsize[i]) { + used_nodes++; + usedi = i; + } + if (i != 0) + noffset[i]= noffset[i-1]+nsize[i-1]; + } + + if (used_nodes <= 1) { + subnode_middle(usedi, mid, subsize, submid); + create_octree_node(tree, node, submid, subsize, refpoints, depth+1); + return; + } + + /* reorder refpoints by subnode */ + for (i=0; i<node->totpoint; i++) { + index= SUBNODE_INDEX(tmppoints[i]->co, node->split); + refpoints[noffset[index]]= tmppoints[i]; + noffset[index]++; + } + + /* create subnodes */ + for (subco=0, i=0; i<8; subco+=nsize[i], i++) { + if (nsize[i] > 0) { + subnode= BLI_memarena_alloc(tree->arena, sizeof(ScatterNode)); + node->child[i]= subnode; + subnode->points= node->points + subco; + subnode->totpoint= nsize[i]; + subrefpoints= refpoints + subco; + + subnode_middle(i, mid, subsize, submid); + + create_octree_node(tree, subnode, submid, subsize, subrefpoints, + depth+1); + } + else + node->child[i]= NULL; + } + + node->points= NULL; + node->totpoint= 0; +} + +/* public functions */ + +ScatterTree *scatter_tree_new(ScatterSettings *ss[3], float scale, float error, + float (*co)[3], float (*color)[3], float *area, int totpoint) +{ + ScatterTree *tree; + ScatterPoint *points, **refpoints; + int i; + + /* allocate tree */ + tree= MEM_callocN(sizeof(ScatterTree), "ScatterTree"); + tree->scale= scale; + tree->error= error; + tree->totpoint= totpoint; + + tree->ss[0]= ss[0]; + tree->ss[1]= ss[1]; + tree->ss[2]= ss[2]; + + points = MEM_callocN(sizeof(ScatterPoint) * totpoint, "ScatterPoints"); + refpoints = MEM_callocN(sizeof(ScatterPoint *) * totpoint, "ScatterRefPoints"); + + tree->points= points; + tree->refpoints= refpoints; + + /* build points */ + INIT_MINMAX(tree->min, tree->max); + + for (i=0; i<totpoint; i++) { + copy_v3_v3(points[i].co, co[i]); + copy_v3_v3(points[i].rad, color[i]); + points[i].area= fabsf(area[i])/(tree->scale*tree->scale); + points[i].back= (area[i] < 0.0f); + + mul_v3_fl(points[i].co, 1.0f / tree->scale); + minmax_v3v3_v3(tree->min, tree->max, points[i].co); + + refpoints[i]= points + i; + } + + return tree; +} + +void scatter_tree_build(ScatterTree *tree) +{ + ScatterPoint *newpoints, **tmppoints; + float mid[3], size[3]; + int totpoint= tree->totpoint; + + newpoints = MEM_callocN(sizeof(ScatterPoint) * totpoint, "ScatterPoints"); + tmppoints = MEM_callocN(sizeof(ScatterPoint *) * totpoint, "ScatterTmpPoints"); + tree->tmppoints= tmppoints; + + tree->arena= BLI_memarena_new(0x8000 * sizeof(ScatterNode), "sss tree arena"); + BLI_memarena_use_calloc(tree->arena); + + /* build tree */ + tree->root= BLI_memarena_alloc(tree->arena, sizeof(ScatterNode)); + tree->root->points= newpoints; + tree->root->totpoint= totpoint; + + mid[0]= (tree->min[0]+tree->max[0])*0.5f; + mid[1]= (tree->min[1]+tree->max[1])*0.5f; + mid[2]= (tree->min[2]+tree->max[2])*0.5f; + + size[0]= (tree->max[0]-tree->min[0])*0.5f; + size[1]= (tree->max[1]-tree->min[1])*0.5f; + size[2]= (tree->max[2]-tree->min[2])*0.5f; + + create_octree_node(tree, tree->root, mid, size, tree->refpoints, 0); + + MEM_freeN(tree->points); + MEM_freeN(tree->refpoints); + MEM_freeN(tree->tmppoints); + tree->refpoints= NULL; + tree->tmppoints= NULL; + tree->points= newpoints; + + /* sum radiance at nodes */ + sum_radiance(tree, tree->root); +} + +void scatter_tree_sample(ScatterTree *tree, const float co[3], float color[3]) +{ + float sco[3]; + + copy_v3_v3(sco, co); + mul_v3_fl(sco, 1.0f / tree->scale); + + compute_radiance(tree, sco, color); +} + +void scatter_tree_free(ScatterTree *tree) +{ + if (tree->arena) BLI_memarena_free(tree->arena); + if (tree->points) MEM_freeN(tree->points); + if (tree->refpoints) MEM_freeN(tree->refpoints); + + MEM_freeN(tree); +} + +/* Internal Renderer API */ + +/* sss tree building */ + +typedef struct SSSData { + ScatterTree *tree; + ScatterSettings *ss[3]; +} SSSData; + +typedef struct SSSPoints { + struct SSSPoints *next, *prev; + + float (*co)[3]; + float (*color)[3]; + float *area; + int totpoint; +} SSSPoints; + +static void sss_create_tree_mat(Render *re, Material *mat) +{ + SSSPoints *p; + RenderResult *rr; + ListBase points; + float (*co)[3] = NULL, (*color)[3] = NULL, *area = NULL; + int totpoint = 0, osa, osaflag, frsflag, partsdone; + + if (re->test_break(re->tbh)) + return; + + points.first= points.last= NULL; + + /* TODO: this is getting a bit ugly, copying all those variables and + * setting them back, maybe we need to create our own Render? */ + + /* do SSS preprocessing render */ + BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE); + rr= re->result; + osa= re->osa; + osaflag= re->r.mode & R_OSA; + frsflag= re->r.mode & R_EDGE_FRS; + partsdone= re->i.partsdone; + + re->osa= 0; + re->r.mode &= ~(R_OSA | R_EDGE_FRS); + re->sss_points= &points; + re->sss_mat= mat; + re->i.partsdone = 0; + + if (!(re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))) + re->result= NULL; + BLI_rw_mutex_unlock(&re->resultmutex); + + RE_TileProcessor(re); + + BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE); + if (!(re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))) { + RE_FreeRenderResult(re->result); + re->result= rr; + } + BLI_rw_mutex_unlock(&re->resultmutex); + + re->i.partsdone= partsdone; + re->sss_mat= NULL; + re->sss_points= NULL; + re->osa= osa; + if (osaflag) re->r.mode |= R_OSA; + if (frsflag) re->r.mode |= R_EDGE_FRS; + + /* no points? no tree */ + if (!points.first) + return; + + /* merge points together into a single buffer */ + if (!re->test_break(re->tbh)) { + for (totpoint=0, p=points.first; p; p=p->next) + totpoint += p->totpoint; + + co= MEM_mallocN(sizeof(*co)*totpoint, "SSSCo"); + color= MEM_mallocN(sizeof(*color)*totpoint, "SSSColor"); + area= MEM_mallocN(sizeof(*area)*totpoint, "SSSArea"); + + for (totpoint=0, p=points.first; p; p=p->next) { + memcpy(co+totpoint, p->co, sizeof(*co)*p->totpoint); + memcpy(color+totpoint, p->color, sizeof(*color)*p->totpoint); + memcpy(area+totpoint, p->area, sizeof(*area)*p->totpoint); + totpoint += p->totpoint; + } + } + + /* free points */ + for (p=points.first; p; p=p->next) { + MEM_freeN(p->co); + MEM_freeN(p->color); + MEM_freeN(p->area); + } + BLI_freelistN(&points); + + /* build tree */ + if (!re->test_break(re->tbh)) { + SSSData *sss= MEM_callocN(sizeof(*sss), "SSSData"); + float ior= mat->sss_ior, cfac= mat->sss_colfac; + const float *radius = mat->sss_radius; + float fw= mat->sss_front, bw= mat->sss_back; + float error = mat->sss_error; + + error= get_render_aosss_error(&re->r, error); + if ((re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW)) && error < 0.5f) + error= 0.5f; + + sss->ss[0]= scatter_settings_new(mat->sss_col[0], radius[0], ior, cfac, fw, bw); + sss->ss[1]= scatter_settings_new(mat->sss_col[1], radius[1], ior, cfac, fw, bw); + sss->ss[2]= scatter_settings_new(mat->sss_col[2], radius[2], ior, cfac, fw, bw); + sss->tree= scatter_tree_new(sss->ss, mat->sss_scale, error, + co, color, area, totpoint); + + MEM_freeN(co); + MEM_freeN(color); + MEM_freeN(area); + + scatter_tree_build(sss->tree); + + BLI_ghash_insert(re->sss_hash, mat, sss); + } + else { + if (co) MEM_freeN(co); + if (color) MEM_freeN(color); + if (area) MEM_freeN(area); + } +} + +void sss_add_points(Render *re, float (*co)[3], float (*color)[3], float *area, int totpoint) +{ + SSSPoints *p; + + if (totpoint > 0) { + p= MEM_callocN(sizeof(SSSPoints), "SSSPoints"); + + p->co= co; + p->color= color; + p->area= area; + p->totpoint= totpoint; + + BLI_thread_lock(LOCK_CUSTOM1); + BLI_addtail(re->sss_points, p); + BLI_thread_unlock(LOCK_CUSTOM1); + } +} + +static void sss_free_tree(SSSData *sss) +{ + scatter_tree_free(sss->tree); + scatter_settings_free(sss->ss[0]); + scatter_settings_free(sss->ss[1]); + scatter_settings_free(sss->ss[2]); + MEM_freeN(sss); +} + +/* public functions */ + +void make_sss_tree(Render *re) +{ + Material *mat; + bool infostr_set = false; + const char *prevstr = NULL; + + free_sss(re); + + re->sss_hash= BLI_ghash_ptr_new("make_sss_tree gh"); + + re->stats_draw(re->sdh, &re->i); + + for (mat= re->main->mat.first; mat; mat= mat->id.next) { + if (mat->id.us && (mat->flag & MA_IS_USED) && (mat->sss_flag & MA_DIFF_SSS)) { + if (!infostr_set) { + prevstr = re->i.infostr; + re->i.infostr = IFACE_("SSS preprocessing"); + infostr_set = true; + } + + sss_create_tree_mat(re, mat); + } + } + + /* XXX preview exception */ + /* localizing preview render data is not fun for node trees :( */ + if (re->main!=G.main) { + for (mat= G.main->mat.first; mat; mat= mat->id.next) { + if (mat->id.us && (mat->flag & MA_IS_USED) && (mat->sss_flag & MA_DIFF_SSS)) { + if (!infostr_set) { + prevstr = re->i.infostr; + re->i.infostr = IFACE_("SSS preprocessing"); + infostr_set = true; + } + + sss_create_tree_mat(re, mat); + } + } + } + + if (infostr_set) + re->i.infostr = prevstr; +} + +void free_sss(Render *re) +{ + if (re->sss_hash) { + GHashIterator gh_iter; + + GHASH_ITER (gh_iter, re->sss_hash) { + sss_free_tree(BLI_ghashIterator_getValue(&gh_iter)); + } + + BLI_ghash_free(re->sss_hash, NULL, NULL); + re->sss_hash= NULL; + } +} + +int sample_sss(Render *re, Material *mat, const float co[3], float color[3]) +{ + if (re->sss_hash) { + SSSData *sss= BLI_ghash_lookup(re->sss_hash, mat); + + if (sss) { + scatter_tree_sample(sss->tree, co, color); + return 1; + } + else { + color[0]= 0.0f; + color[1]= 0.0f; + color[2]= 0.0f; + } + } + + return 0; +} + +int sss_pass_done(struct Render *re, struct Material *mat) +{ + return ((re->flag & R_BAKING) || !(re->r.mode & R_SSS) || (re->sss_hash && BLI_ghash_lookup(re->sss_hash, mat))); +} + diff --git a/source/blender/render/intern/source/strand.c b/source/blender/render/intern/source/strand.c new file mode 100644 index 00000000000..5fde688481a --- /dev/null +++ b/source/blender/render/intern/source/strand.c @@ -0,0 +1,1069 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Contributors: Brecht Van Lommel. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/strand.c + * \ingroup render + */ + + +#include <math.h> +#include <string.h> +#include <stdlib.h> + +#include "MEM_guardedalloc.h" + +#include "DNA_key_types.h" +#include "DNA_material_types.h" +#include "DNA_meshdata_types.h" + +#include "BLI_math.h" +#include "BLI_blenlib.h" +#include "BLI_utildefines.h" +#include "BLI_ghash.h" +#include "BLI_memarena.h" +#include "BLI_rand.h" + +#include "BKE_DerivedMesh.h" +#include "BKE_key.h" + + +#include "render_types.h" +#include "rendercore.h" +#include "renderdatabase.h" +#include "shading.h" +#include "strand.h" +#include "zbuf.h" + +/* *************** */ + +static float strand_eval_width(Material *ma, float strandco) +{ + float fac; + + strandco= 0.5f*(strandco + 1.0f); + + if (ma->strand_ease!=0.0f) { + if (ma->strand_ease<0.0f) + fac= pow(strandco, 1.0f+ma->strand_ease); + else + fac= pow(strandco, 1.0f/(1.0f-ma->strand_ease)); + } + else fac= strandco; + + return ((1.0f-fac)*ma->strand_sta + (fac)*ma->strand_end); +} + +void strand_eval_point(StrandSegment *sseg, StrandPoint *spoint) +{ + Material *ma; + StrandBuffer *strandbuf; + const float *simplify; + float p[4][3], data[4], cross[3], w, dx, dy, t; + int type; + + strandbuf= sseg->buffer; + ma= sseg->buffer->ma; + t= spoint->t; + type= (strandbuf->flag & R_STRAND_BSPLINE)? KEY_BSPLINE: KEY_CARDINAL; + + copy_v3_v3(p[0], sseg->v[0]->co); + copy_v3_v3(p[1], sseg->v[1]->co); + copy_v3_v3(p[2], sseg->v[2]->co); + copy_v3_v3(p[3], sseg->v[3]->co); + + if (sseg->obi->flag & R_TRANSFORMED) { + mul_m4_v3(sseg->obi->mat, p[0]); + mul_m4_v3(sseg->obi->mat, p[1]); + mul_m4_v3(sseg->obi->mat, p[2]); + mul_m4_v3(sseg->obi->mat, p[3]); + } + + if (t == 0.0f) { + copy_v3_v3(spoint->co, p[1]); + spoint->strandco= sseg->v[1]->strandco; + + spoint->dtstrandco= (sseg->v[2]->strandco - sseg->v[0]->strandco); + if (sseg->v[0] != sseg->v[1]) + spoint->dtstrandco *= 0.5f; + } + else if (t == 1.0f) { + copy_v3_v3(spoint->co, p[2]); + spoint->strandco= sseg->v[2]->strandco; + + spoint->dtstrandco= (sseg->v[3]->strandco - sseg->v[1]->strandco); + if (sseg->v[3] != sseg->v[2]) + spoint->dtstrandco *= 0.5f; + } + else { + key_curve_position_weights(t, data, type); + spoint->co[0]= data[0]*p[0][0] + data[1]*p[1][0] + data[2]*p[2][0] + data[3]*p[3][0]; + spoint->co[1]= data[0]*p[0][1] + data[1]*p[1][1] + data[2]*p[2][1] + data[3]*p[3][1]; + spoint->co[2]= data[0]*p[0][2] + data[1]*p[1][2] + data[2]*p[2][2] + data[3]*p[3][2]; + spoint->strandco= (1.0f-t)*sseg->v[1]->strandco + t*sseg->v[2]->strandco; + } + + key_curve_tangent_weights(t, data, type); + spoint->dtco[0]= data[0]*p[0][0] + data[1]*p[1][0] + data[2]*p[2][0] + data[3]*p[3][0]; + spoint->dtco[1]= data[0]*p[0][1] + data[1]*p[1][1] + data[2]*p[2][1] + data[3]*p[3][1]; + spoint->dtco[2]= data[0]*p[0][2] + data[1]*p[1][2] + data[2]*p[2][2] + data[3]*p[3][2]; + + normalize_v3_v3(spoint->tan, spoint->dtco); + normalize_v3_v3(spoint->nor, spoint->co); + negate_v3(spoint->nor); + + spoint->width= strand_eval_width(ma, spoint->strandco); + + /* simplification */ + simplify= RE_strandren_get_simplify(strandbuf->obr, sseg->strand, 0); + spoint->alpha= (simplify)? simplify[1]: 1.0f; + + /* outer points */ + cross_v3_v3v3(cross, spoint->co, spoint->tan); + + w= spoint->co[2]*strandbuf->winmat[2][3] + strandbuf->winmat[3][3]; + dx= strandbuf->winx*cross[0]*strandbuf->winmat[0][0]/w; + dy= strandbuf->winy*cross[1]*strandbuf->winmat[1][1]/w; + w = sqrtf(dx * dx + dy * dy); + + if (w > 0.0f) { + if (strandbuf->flag & R_STRAND_B_UNITS) { + const float crosslen= len_v3(cross); + w= 2.0f*crosslen*strandbuf->minwidth/w; + + if (spoint->width < w) { + spoint->alpha= spoint->width/w; + spoint->width= w; + } + + if (simplify) + /* squared because we only change width, not length */ + spoint->width *= simplify[0]*simplify[0]; + + mul_v3_fl(cross, spoint->width*0.5f/crosslen); + } + else + mul_v3_fl(cross, spoint->width/w); + } + + sub_v3_v3v3(spoint->co1, spoint->co, cross); + add_v3_v3v3(spoint->co2, spoint->co, cross); + + copy_v3_v3(spoint->dsco, cross); +} + +/* *************** */ + +static void interpolate_vec1(float *v1, float *v2, float t, float negt, float *v) +{ + v[0]= negt*v1[0] + t*v2[0]; +} + +static void interpolate_vec3(float *v1, float *v2, float t, float negt, float *v) +{ + v[0]= negt*v1[0] + t*v2[0]; + v[1]= negt*v1[1] + t*v2[1]; + v[2]= negt*v1[2] + t*v2[2]; +} + +static void interpolate_vec4(float *v1, float *v2, float t, float negt, float *v) +{ + v[0]= negt*v1[0] + t*v2[0]; + v[1]= negt*v1[1] + t*v2[1]; + v[2]= negt*v1[2] + t*v2[2]; + v[3]= negt*v1[3] + t*v2[3]; +} + +static void interpolate_shade_result(ShadeResult *shr1, ShadeResult *shr2, float t, ShadeResult *shr, int addpassflag) +{ + float negt= 1.0f - t; + + interpolate_vec4(shr1->combined, shr2->combined, t, negt, shr->combined); + + if (addpassflag & SCE_PASS_VECTOR) { + interpolate_vec4(shr1->winspeed, shr2->winspeed, t, negt, shr->winspeed); + } + /* optim... */ + if (addpassflag & ~(SCE_PASS_VECTOR)) { + if (addpassflag & SCE_PASS_Z) + interpolate_vec1(&shr1->z, &shr2->z, t, negt, &shr->z); + if (addpassflag & SCE_PASS_RGBA) + interpolate_vec4(shr1->col, shr2->col, t, negt, shr->col); + if (addpassflag & SCE_PASS_NORMAL) { + interpolate_vec3(shr1->nor, shr2->nor, t, negt, shr->nor); + normalize_v3(shr->nor); + } + if (addpassflag & SCE_PASS_EMIT) + interpolate_vec3(shr1->emit, shr2->emit, t, negt, shr->emit); + if (addpassflag & SCE_PASS_DIFFUSE) { + interpolate_vec3(shr1->diff, shr2->diff, t, negt, shr->diff); + interpolate_vec3(shr1->diffshad, shr2->diffshad, t, negt, shr->diffshad); + } + if (addpassflag & SCE_PASS_SPEC) + interpolate_vec3(shr1->spec, shr2->spec, t, negt, shr->spec); + if (addpassflag & SCE_PASS_SHADOW) + interpolate_vec3(shr1->shad, shr2->shad, t, negt, shr->shad); + if (addpassflag & SCE_PASS_AO) + interpolate_vec3(shr1->ao, shr2->ao, t, negt, shr->ao); + if (addpassflag & SCE_PASS_ENVIRONMENT) + interpolate_vec3(shr1->env, shr2->env, t, negt, shr->env); + if (addpassflag & SCE_PASS_INDIRECT) + interpolate_vec3(shr1->indirect, shr2->indirect, t, negt, shr->indirect); + if (addpassflag & SCE_PASS_REFLECT) + interpolate_vec3(shr1->refl, shr2->refl, t, negt, shr->refl); + if (addpassflag & SCE_PASS_REFRACT) + interpolate_vec3(shr1->refr, shr2->refr, t, negt, shr->refr); + if (addpassflag & SCE_PASS_MIST) + interpolate_vec1(&shr1->mist, &shr2->mist, t, negt, &shr->mist); + } +} + +static void strand_apply_shaderesult_alpha(ShadeResult *shr, float alpha) +{ + if (alpha < 1.0f) { + shr->combined[0] *= alpha; + shr->combined[1] *= alpha; + shr->combined[2] *= alpha; + shr->combined[3] *= alpha; + + shr->col[0] *= alpha; + shr->col[1] *= alpha; + shr->col[2] *= alpha; + shr->col[3] *= alpha; + + shr->alpha *= alpha; + } +} + +static void strand_shade_point(Render *re, ShadeSample *ssamp, StrandSegment *sseg, StrandVert *svert, StrandPoint *spoint) +{ + ShadeInput *shi= ssamp->shi; + ShadeResult *shr= ssamp->shr; + VlakRen vlr; + int seed; + + memset(&vlr, 0, sizeof(vlr)); + vlr.flag= R_SMOOTH; + if (sseg->buffer->ma->mode & MA_TANGENT_STR) + vlr.flag |= R_TANGENT; + + shi->vlr= &vlr; + shi->v1= NULL; + shi->v2= NULL; + shi->v3= NULL; + shi->strand= sseg->strand; + shi->obi= sseg->obi; + shi->obr= sseg->obi->obr; + + /* cache for shadow */ + shi->samplenr= re->shadowsamplenr[shi->thread]++; + + /* all samples */ + shi->mask= 0xFFFF; + + /* seed RNG for consistent results across tiles */ + seed = shi->strand->index + (svert - shi->strand->vert); + BLI_thread_srandom(shi->thread, seed); + + shade_input_set_strand(shi, sseg->strand, spoint); + shade_input_set_strand_texco(shi, sseg->strand, sseg->v[1], spoint); + + /* init material vars */ + shade_input_init_material(shi); + + /* shade */ + shade_samples_do_AO(ssamp); + shade_input_do_shade(shi, shr); + + /* apply simplification */ + strand_apply_shaderesult_alpha(shr, spoint->alpha); + + /* include lamphalos for strand, since halo layer was added already */ + if (re->flag & R_LAMPHALO) + if (shi->layflag & SCE_LAY_HALO) + renderspothalo(shi, shr->combined, shr->combined[3]); + + shi->strand= NULL; +} + +/* *************** */ + +struct StrandShadeCache { + GHash *resulthash; + GHash *refcounthash; + MemArena *memarena; +}; + +typedef struct StrandCacheEntry { + GHashPair pair; + ShadeResult shr; +} StrandCacheEntry; + +StrandShadeCache *strand_shade_cache_create(void) +{ + StrandShadeCache *cache; + + cache= MEM_callocN(sizeof(StrandShadeCache), "StrandShadeCache"); + cache->resulthash= BLI_ghash_pair_new("strand_shade_cache_create1 gh"); + cache->refcounthash= BLI_ghash_pair_new("strand_shade_cache_create2 gh"); + cache->memarena= BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "strand shade cache arena"); + + return cache; +} + +void strand_shade_cache_free(StrandShadeCache *cache) +{ + BLI_ghash_free(cache->refcounthash, NULL, NULL); + BLI_ghash_free(cache->resulthash, MEM_freeN, NULL); + BLI_memarena_free(cache->memarena); + MEM_freeN(cache); +} + +static GHashPair strand_shade_hash_pair(ObjectInstanceRen *obi, StrandVert *svert) +{ + GHashPair pair = {obi, svert}; + return pair; +} + +static void strand_shade_get(Render *re, StrandShadeCache *cache, ShadeSample *ssamp, StrandSegment *sseg, StrandVert *svert) +{ + StrandCacheEntry *entry; + StrandPoint p; + int *refcount; + GHashPair pair = strand_shade_hash_pair(sseg->obi, svert); + + entry= BLI_ghash_lookup(cache->resulthash, &pair); + refcount= BLI_ghash_lookup(cache->refcounthash, &pair); + + if (!entry) { + /* not shaded yet, shade and insert into hash */ + p.t= (sseg->v[1] == svert)? 0.0f: 1.0f; + strand_eval_point(sseg, &p); + strand_shade_point(re, ssamp, sseg, svert, &p); + + entry= MEM_callocN(sizeof(StrandCacheEntry), "StrandCacheEntry"); + entry->pair = pair; + entry->shr = ssamp->shr[0]; + BLI_ghash_insert(cache->resulthash, entry, entry); + } + else + /* already shaded, just copy previous result from hash */ + ssamp->shr[0]= entry->shr; + + /* lower reference count and remove if not needed anymore by any samples */ + (*refcount)--; + if (*refcount == 0) { + BLI_ghash_remove(cache->resulthash, &pair, MEM_freeN, NULL); + BLI_ghash_remove(cache->refcounthash, &pair, NULL, NULL); + } +} + +void strand_shade_segment(Render *re, StrandShadeCache *cache, StrandSegment *sseg, ShadeSample *ssamp, float t, float s, int addpassflag) +{ + ShadeResult shr1, shr2; + + /* get shading for two endpoints and interpolate */ + strand_shade_get(re, cache, ssamp, sseg, sseg->v[1]); + shr1= ssamp->shr[0]; + strand_shade_get(re, cache, ssamp, sseg, sseg->v[2]); + shr2= ssamp->shr[0]; + + interpolate_shade_result(&shr1, &shr2, t, ssamp->shr, addpassflag); + + /* apply alpha along width */ + if (sseg->buffer->widthfade != -1.0f) { + s = 1.0f - powf(fabsf(s), sseg->buffer->widthfade); + + strand_apply_shaderesult_alpha(ssamp->shr, s); + } +} + +void strand_shade_unref(StrandShadeCache *cache, ObjectInstanceRen *obi, StrandVert *svert) +{ + GHashPair pair = strand_shade_hash_pair(obi, svert); + int *refcount; + + /* lower reference count and remove if not needed anymore by any samples */ + refcount= BLI_ghash_lookup(cache->refcounthash, &pair); + + (*refcount)--; + if (*refcount == 0) { + BLI_ghash_remove(cache->resulthash, &pair, MEM_freeN, NULL); + BLI_ghash_remove(cache->refcounthash, &pair, NULL, NULL); + } +} + +static void strand_shade_refcount(StrandShadeCache *cache, StrandSegment *sseg, StrandVert *svert) +{ + GHashPair pair = strand_shade_hash_pair(sseg->obi, svert); + GHashPair *key; + int *refcount= BLI_ghash_lookup(cache->refcounthash, &pair); + + if (!refcount) { + key= BLI_memarena_alloc(cache->memarena, sizeof(GHashPair)); + *key = pair; + refcount= BLI_memarena_alloc(cache->memarena, sizeof(int)); + *refcount= 1; + BLI_ghash_insert(cache->refcounthash, key, refcount); + } + else + (*refcount)++; +} + +/* *************** */ + +typedef struct StrandPart { + Render *re; + ZSpan *zspan; + + APixstrand *apixbuf; + int *totapixbuf; + int *rectz; + int *rectmask; + intptr_t *rectdaps; + int rectx, recty; + int sample; + int shadow; + float (*jit)[2]; + int samples; + + StrandSegment *segment; + float t[3], s[3]; + + StrandShadeCache *cache; +} StrandPart; + +typedef struct StrandSortSegment { + struct StrandSortSegment *next; + int obi, strand, segment; + float z; +} StrandSortSegment; + +static int compare_strand_segment(const void *poin1, const void *poin2) +{ + const StrandSortSegment *seg1= (const StrandSortSegment*)poin1; + const StrandSortSegment *seg2= (const StrandSortSegment*)poin2; + + if (seg1->z < seg2->z) + return -1; + else if (seg1->z == seg2->z) + return 0; + else + return 1; +} + +static void do_strand_point_project(float winmat[4][4], ZSpan *zspan, float *co, float *hoco, float *zco) +{ + projectvert(co, winmat, hoco); + hoco_to_zco(zspan, zco, hoco); +} + +static void strand_project_point(float winmat[4][4], float winx, float winy, StrandPoint *spoint) +{ + float div; + + projectvert(spoint->co, winmat, spoint->hoco); + + div= 1.0f/spoint->hoco[3]; + spoint->x= spoint->hoco[0]*div*winx*0.5f; + spoint->y= spoint->hoco[1]*div*winy*0.5f; +} + +static APixstrand *addpsmainAstrand(ListBase *lb) +{ + APixstrMain *psm; + + psm= MEM_mallocN(sizeof(APixstrMain), "addpsmainA"); + BLI_addtail(lb, psm); + psm->ps = MEM_callocN(4096 * sizeof(APixstrand), "pixstr"); + + return psm->ps; +} + +static APixstrand *addpsAstrand(ZSpan *zspan) +{ + /* make new PS */ + if (zspan->apstrandmcounter==0) { + zspan->curpstrand= addpsmainAstrand(zspan->apsmbase); + zspan->apstrandmcounter= 4095; + } + else { + zspan->curpstrand++; + zspan->apstrandmcounter--; + } + return zspan->curpstrand; +} + +#define MAX_ZROW 2000 + +static void do_strand_fillac(void *handle, int x, int y, float u, float v, float z) +{ + StrandPart *spart= (StrandPart *)handle; + StrandShadeCache *cache= spart->cache; + StrandSegment *sseg= spart->segment; + APixstrand *apn, *apnew; + float t, s; + int offset, mask, obi, strnr, seg, zverg, bufferz, maskz=0; + + offset = y*spart->rectx + x; + obi= sseg->obi - spart->re->objectinstance; + strnr= sseg->strand->index + 1; + seg= sseg->v[1] - sseg->strand->vert; + mask= (1<<spart->sample); + + /* check against solid z-buffer */ + zverg= (int)z; + + if (spart->rectdaps) { + /* find the z of the sample */ + PixStr *ps; + intptr_t *rd= spart->rectdaps + offset; + + bufferz= 0x7FFFFFFF; + if (spart->rectmask) maskz= 0x7FFFFFFF; + + if (*rd) { + for (ps= (PixStr *)(*rd); ps; ps= ps->next) { + if (mask & ps->mask) { + bufferz= ps->z; + if (spart->rectmask) + maskz= ps->maskz; + break; + } + } + } + } + else { + bufferz= (spart->rectz)? spart->rectz[offset]: 0x7FFFFFFF; + if (spart->rectmask) + maskz= spart->rectmask[offset]; + } + +#define CHECK_ADD(n) \ + if (apn->p[n]==strnr && apn->obi[n]==obi && apn->seg[n]==seg) \ + { if (!(apn->mask[n] & mask)) { apn->mask[n] |= mask; apn->v[n] += t; apn->u[n] += s; } break; } (void)0 +#define CHECK_ASSIGN(n) \ + if (apn->p[n]==0) \ + {apn->obi[n]= obi; apn->p[n]= strnr; apn->z[n]= zverg; apn->mask[n]= mask; apn->v[n]= t; apn->u[n]= s; apn->seg[n]= seg; break; } (void)0 + + /* add to pixel list */ + if (zverg < bufferz && (spart->totapixbuf[offset] < MAX_ZROW)) { + if (!spart->rectmask || zverg > maskz) { + t = u * spart->t[0] + v * spart->t[1] + (1.0f - u - v) * spart->t[2]; + s = fabsf(u * spart->s[0] + v * spart->s[1] + (1.0f - u - v) * spart->s[2]); + + apn= spart->apixbuf + offset; + while (apn) { + CHECK_ADD(0); + CHECK_ADD(1); + CHECK_ADD(2); + CHECK_ADD(3); + CHECK_ASSIGN(0); + CHECK_ASSIGN(1); + CHECK_ASSIGN(2); + CHECK_ASSIGN(3); + + apnew= addpsAstrand(spart->zspan); + SWAP(APixstrand, *apnew, *apn); + apn->next= apnew; + CHECK_ASSIGN(0); + } + + if (cache) { + strand_shade_refcount(cache, sseg, sseg->v[1]); + strand_shade_refcount(cache, sseg, sseg->v[2]); + } + spart->totapixbuf[offset]++; + } + } +} + +/* width is calculated in hoco space, to ensure strands are visible */ +static int strand_test_clip(float winmat[4][4], ZSpan *UNUSED(zspan), float *bounds, float *co, float *zcomp, float widthx, float widthy) +{ + float hoco[4]; + int clipflag= 0; + + projectvert(co, winmat, hoco); + + /* we compare z without perspective division for segment sorting */ + *zcomp= hoco[2]; + + if (hoco[0]+widthx < bounds[0]*hoco[3]) clipflag |= 1; + else if (hoco[0]-widthx > bounds[1]*hoco[3]) clipflag |= 2; + + if (hoco[1]-widthy > bounds[3]*hoco[3]) clipflag |= 4; + else if (hoco[1]+widthy < bounds[2]*hoco[3]) clipflag |= 8; + + clipflag |= testclip(hoco); + + return clipflag; +} + +static void do_scanconvert_strand(Render *UNUSED(re), StrandPart *spart, ZSpan *zspan, float t, float dt, float *co1, float *co2, float *co3, float *co4, int sample) +{ + float jco1[3], jco2[3], jco3[3], jco4[3], jx, jy; + + copy_v3_v3(jco1, co1); + copy_v3_v3(jco2, co2); + copy_v3_v3(jco3, co3); + copy_v3_v3(jco4, co4); + + if (spart->jit) { + jx= -spart->jit[sample][0]; + jy= -spart->jit[sample][1]; + + jco1[0] += jx; jco1[1] += jy; + jco2[0] += jx; jco2[1] += jy; + jco3[0] += jx; jco3[1] += jy; + jco4[0] += jx; jco4[1] += jy; + + /* XXX mblur? */ + } + + spart->sample= sample; + + spart->t[0]= t-dt; + spart->s[0]= -1.0f; + spart->t[1]= t-dt; + spart->s[1]= 1.0f; + spart->t[2]= t; + spart->s[2]= 1.0f; + zspan_scanconvert_strand(zspan, spart, jco1, jco2, jco3, do_strand_fillac); + spart->t[0]= t-dt; + spart->s[0]= -1.0f; + spart->t[1]= t; + spart->s[1]= 1.0f; + spart->t[2]= t; + spart->s[2]= -1.0f; + zspan_scanconvert_strand(zspan, spart, jco1, jco3, jco4, do_strand_fillac); +} + +static void strand_render(Render *re, StrandSegment *sseg, float winmat[4][4], StrandPart *spart, ZSpan *zspan, int totzspan, StrandPoint *p1, StrandPoint *p2) +{ + if (spart) { + float t= p2->t; + float dt= p2->t - p1->t; + int a; + + for (a=0; a<spart->samples; a++) + do_scanconvert_strand(re, spart, zspan, t, dt, p1->zco2, p1->zco1, p2->zco1, p2->zco2, a); + } + else { + float hoco1[4], hoco2[4]; + int a, obi, index; + + obi= sseg->obi - re->objectinstance; + index= sseg->strand->index; + + projectvert(p1->co, winmat, hoco1); + projectvert(p2->co, winmat, hoco2); + + + for (a=0; a<totzspan; a++) { +#if 0 + /* render both strand and single pixel wire to counter aliasing */ + zbufclip4(re, &zspan[a], obi, index, p1->hoco2, p1->hoco1, p2->hoco1, p2->hoco2, p1->clip2, p1->clip1, p2->clip1, p2->clip2); +#endif + /* only render a line for now, which makes the shadow map more + * similar across frames, and so reduces flicker */ + zbufsinglewire(&zspan[a], obi, index, hoco1, hoco2); + } + } +} + +static int strand_segment_recursive(Render *re, float winmat[4][4], StrandPart *spart, ZSpan *zspan, int totzspan, StrandSegment *sseg, StrandPoint *p1, StrandPoint *p2, int depth) +{ + StrandPoint p; + StrandBuffer *buffer= sseg->buffer; + float dot, d1[2], d2[2], len1, len2; + + if (depth == buffer->maxdepth) + return 0; + + p.t= (p1->t + p2->t)*0.5f; + strand_eval_point(sseg, &p); + strand_project_point(buffer->winmat, buffer->winx, buffer->winy, &p); + + d1[0]= (p.x - p1->x); + d1[1]= (p.y - p1->y); + len1= d1[0]*d1[0] + d1[1]*d1[1]; + + d2[0]= (p2->x - p.x); + d2[1]= (p2->y - p.y); + len2= d2[0]*d2[0] + d2[1]*d2[1]; + + if (len1 == 0.0f || len2 == 0.0f) + return 0; + + dot= d1[0]*d2[0] + d1[1]*d2[1]; + if (dot*dot > sseg->sqadaptcos*len1*len2) + return 0; + + if (spart) { + do_strand_point_project(winmat, zspan, p.co1, p.hoco1, p.zco1); + do_strand_point_project(winmat, zspan, p.co2, p.hoco2, p.zco2); + } + else { +#if 0 + projectvert(p.co1, winmat, p.hoco1); + projectvert(p.co2, winmat, p.hoco2); + p.clip1= testclip(p.hoco1); + p.clip2= testclip(p.hoco2); +#endif + } + + if (!strand_segment_recursive(re, winmat, spart, zspan, totzspan, sseg, p1, &p, depth+1)) + strand_render(re, sseg, winmat, spart, zspan, totzspan, p1, &p); + if (!strand_segment_recursive(re, winmat, spart, zspan, totzspan, sseg, &p, p2, depth+1)) + strand_render(re, sseg, winmat, spart, zspan, totzspan, &p, p2); + + return 1; +} + +void render_strand_segment(Render *re, float winmat[4][4], StrandPart *spart, ZSpan *zspan, int totzspan, StrandSegment *sseg) +{ + StrandBuffer *buffer= sseg->buffer; + StrandPoint *p1= &sseg->point1; + StrandPoint *p2= &sseg->point2; + + p1->t= 0.0f; + p2->t= 1.0f; + + strand_eval_point(sseg, p1); + strand_project_point(buffer->winmat, buffer->winx, buffer->winy, p1); + strand_eval_point(sseg, p2); + strand_project_point(buffer->winmat, buffer->winx, buffer->winy, p2); + + if (spart) { + do_strand_point_project(winmat, zspan, p1->co1, p1->hoco1, p1->zco1); + do_strand_point_project(winmat, zspan, p1->co2, p1->hoco2, p1->zco2); + do_strand_point_project(winmat, zspan, p2->co1, p2->hoco1, p2->zco1); + do_strand_point_project(winmat, zspan, p2->co2, p2->hoco2, p2->zco2); + } + else { +#if 0 + projectvert(p1->co1, winmat, p1->hoco1); + projectvert(p1->co2, winmat, p1->hoco2); + projectvert(p2->co1, winmat, p2->hoco1); + projectvert(p2->co2, winmat, p2->hoco2); + p1->clip1= testclip(p1->hoco1); + p1->clip2= testclip(p1->hoco2); + p2->clip1= testclip(p2->hoco1); + p2->clip2= testclip(p2->hoco2); +#endif + } + + if (!strand_segment_recursive(re, winmat, spart, zspan, totzspan, sseg, p1, p2, 0)) + strand_render(re, sseg, winmat, spart, zspan, totzspan, p1, p2); +} + +/* render call to fill in strands */ +int zbuffer_strands_abuf(Render *re, RenderPart *pa, APixstrand *apixbuf, ListBase *apsmbase, unsigned int lay, int UNUSED(negzmask), float winmat[4][4], int winx, int winy, int samples, float (*jit)[2], float clipcrop, int shadow, StrandShadeCache *cache) +{ + ObjectRen *obr; + ObjectInstanceRen *obi; + ZSpan zspan; + StrandRen *strand = NULL; + StrandVert *svert; + StrandBound *sbound; + StrandPart spart; + StrandSegment sseg; + StrandSortSegment *sortsegments = NULL, *sortseg, *firstseg; + MemArena *memarena; + float z[4], bounds[4], obwinmat[4][4]; + int a, b, c, i, totsegment, clip[4]; + + if (re->test_break(re->tbh)) + return 0; + if (re->totstrand == 0) + return 0; + + /* setup StrandPart */ + memset(&spart, 0, sizeof(spart)); + + spart.re= re; + spart.rectx= pa->rectx; + spart.recty= pa->recty; + spart.apixbuf= apixbuf; + spart.zspan= &zspan; + spart.rectdaps= pa->rectdaps; + spart.rectz= pa->rectz; + spart.rectmask= pa->rectmask; + spart.cache= cache; + spart.shadow= shadow; + spart.jit= jit; + spart.samples= samples; + + zbuf_alloc_span(&zspan, pa->rectx, pa->recty, clipcrop); + + /* needed for transform from hoco to zbuffer co */ + zspan.zmulx= ((float)winx)/2.0f; + zspan.zmuly= ((float)winy)/2.0f; + + zspan.zofsx= -pa->disprect.xmin; + zspan.zofsy= -pa->disprect.ymin; + + /* to center the sample position */ + if (!shadow) { + zspan.zofsx -= 0.5f; + zspan.zofsy -= 0.5f; + } + + zspan.apsmbase= apsmbase; + + /* clipping setup */ + bounds[0]= (2*pa->disprect.xmin - winx-1)/(float)winx; + bounds[1]= (2*pa->disprect.xmax - winx+1)/(float)winx; + bounds[2]= (2*pa->disprect.ymin - winy-1)/(float)winy; + bounds[3]= (2*pa->disprect.ymax - winy+1)/(float)winy; + + memarena= BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "strand sort arena"); + firstseg= NULL; + totsegment= 0; + + /* for all object instances */ + for (obi=re->instancetable.first, i=0; obi; obi=obi->next, i++) { + Material *ma; + float widthx, widthy; + + obr= obi->obr; + + if (!obr->strandbuf || !(obr->strandbuf->lay & lay)) + continue; + + /* compute matrix and try clipping whole object */ + if (obi->flag & R_TRANSFORMED) + mul_m4_m4m4(obwinmat, winmat, obi->mat); + else + copy_m4_m4(obwinmat, winmat); + + /* test if we should skip it */ + ma = obr->strandbuf->ma; + + if (shadow && (!(ma->mode2 & MA_CASTSHADOW) || !(ma->mode & MA_SHADBUF))) + continue; + else if (!shadow && (ma->mode & MA_ONLYCAST)) + continue; + + if (clip_render_object(obi->obr->boundbox, bounds, obwinmat)) + continue; + + widthx= obr->strandbuf->maxwidth*obwinmat[0][0]; + widthy= obr->strandbuf->maxwidth*obwinmat[1][1]; + + /* for each bounding box containing a number of strands */ + sbound= obr->strandbuf->bound; + for (c=0; c<obr->strandbuf->totbound; c++, sbound++) { + if (clip_render_object(sbound->boundbox, bounds, obwinmat)) + continue; + + /* for each strand in this bounding box */ + for (a=sbound->start; a<sbound->end; a++) { + strand= RE_findOrAddStrand(obr, a); + svert= strand->vert; + + /* keep clipping and z depth for 4 control points */ + clip[1]= strand_test_clip(obwinmat, &zspan, bounds, svert->co, &z[1], widthx, widthy); + clip[2]= strand_test_clip(obwinmat, &zspan, bounds, (svert+1)->co, &z[2], widthx, widthy); + clip[0]= clip[1]; z[0]= z[1]; + + for (b=0; b<strand->totvert-1; b++, svert++) { + /* compute 4th point clipping and z depth */ + if (b < strand->totvert-2) { + clip[3]= strand_test_clip(obwinmat, &zspan, bounds, (svert+2)->co, &z[3], widthx, widthy); + } + else { + clip[3]= clip[2]; z[3]= z[2]; + } + + /* check clipping and add to sortsegments buffer */ + if (!(clip[0] & clip[1] & clip[2] & clip[3])) { + sortseg= BLI_memarena_alloc(memarena, sizeof(StrandSortSegment)); + sortseg->obi= i; + sortseg->strand= strand->index; + sortseg->segment= b; + + sortseg->z= 0.5f*(z[1] + z[2]); + + sortseg->next= firstseg; + firstseg= sortseg; + totsegment++; + } + + /* shift clipping and z depth */ + clip[0]= clip[1]; z[0]= z[1]; + clip[1]= clip[2]; z[1]= z[2]; + clip[2]= clip[3]; z[2]= z[3]; + } + } + } + } + + if (!re->test_break(re->tbh)) { + /* convert list to array and sort */ + sortsegments= MEM_mallocN(sizeof(StrandSortSegment)*totsegment, "StrandSortSegment"); + for (a=0, sortseg=firstseg; a<totsegment; a++, sortseg=sortseg->next) + sortsegments[a]= *sortseg; + qsort(sortsegments, totsegment, sizeof(StrandSortSegment), compare_strand_segment); + } + + BLI_memarena_free(memarena); + + spart.totapixbuf= MEM_callocN(sizeof(int)*pa->rectx*pa->recty, "totapixbuf"); + + if (!re->test_break(re->tbh)) { + /* render segments in sorted order */ + sortseg= sortsegments; + for (a=0; a<totsegment; a++, sortseg++) { + if (re->test_break(re->tbh)) + break; + + obi= &re->objectinstance[sortseg->obi]; + obr= obi->obr; + + sseg.obi= obi; + sseg.strand= RE_findOrAddStrand(obr, sortseg->strand); + sseg.buffer= sseg.strand->buffer; + sseg.sqadaptcos= sseg.buffer->adaptcos; + sseg.sqadaptcos *= sseg.sqadaptcos; + + svert= sseg.strand->vert + sortseg->segment; + sseg.v[0]= (sortseg->segment > 0)? (svert-1): svert; + sseg.v[1]= svert; + sseg.v[2]= svert+1; + sseg.v[3]= (sortseg->segment < sseg.strand->totvert-2)? svert+2: svert+1; + sseg.shaded= 0; + + spart.segment= &sseg; + + render_strand_segment(re, winmat, &spart, &zspan, 1, &sseg); + } + } + + if (sortsegments) + MEM_freeN(sortsegments); + MEM_freeN(spart.totapixbuf); + + zbuf_free_span(&zspan); + + return totsegment; +} + +/* *************** */ + +StrandSurface *cache_strand_surface(Render *re, ObjectRen *obr, DerivedMesh *dm, float mat[4][4], int timeoffset) +{ + StrandSurface *mesh; + MFace *mface; + MVert *mvert; + float (*co)[3]; + int a, totvert, totface; + + totvert= dm->getNumVerts(dm); + totface= dm->getNumTessFaces(dm); + + for (mesh = re->strandsurface.first; mesh; mesh = mesh->next) { + if ((mesh->obr.ob == obr->ob) && + (mesh->obr.par == obr->par) && + (mesh->obr.index == obr->index) && + (mesh->totvert == totvert) && + (mesh->totface == totface)) + { + break; + } + } + + if (!mesh) { + mesh= MEM_callocN(sizeof(StrandSurface), "StrandSurface"); + mesh->obr= *obr; + mesh->totvert= totvert; + mesh->totface= totface; + mesh->face= MEM_callocN(sizeof(int)*4*mesh->totface, "StrandSurfFaces"); + mesh->ao= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfAO"); + mesh->env= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfEnv"); + mesh->indirect= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfIndirect"); + BLI_addtail(&re->strandsurface, mesh); + } + + if (timeoffset == -1 && !mesh->prevco) + mesh->prevco= co= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfCo"); + else if (timeoffset == 0 && !mesh->co) + mesh->co= co= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfCo"); + else if (timeoffset == 1 && !mesh->nextco) + mesh->nextco= co= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfCo"); + else + return mesh; + + mvert= dm->getVertArray(dm); + for (a=0; a<mesh->totvert; a++, mvert++) { + copy_v3_v3(co[a], mvert->co); + mul_m4_v3(mat, co[a]); + } + + mface= dm->getTessFaceArray(dm); + for (a=0; a<mesh->totface; a++, mface++) { + mesh->face[a][0]= mface->v1; + mesh->face[a][1]= mface->v2; + mesh->face[a][2]= mface->v3; + mesh->face[a][3]= mface->v4; + } + + return mesh; +} + +void free_strand_surface(Render *re) +{ + StrandSurface *mesh; + + for (mesh=re->strandsurface.first; mesh; mesh=mesh->next) { + if (mesh->co) MEM_freeN(mesh->co); + if (mesh->prevco) MEM_freeN(mesh->prevco); + if (mesh->nextco) MEM_freeN(mesh->nextco); + if (mesh->ao) MEM_freeN(mesh->ao); + if (mesh->env) MEM_freeN(mesh->env); + if (mesh->indirect) MEM_freeN(mesh->indirect); + if (mesh->face) MEM_freeN(mesh->face); + } + + BLI_freelistN(&re->strandsurface); +} + +void strand_minmax(StrandRen *strand, float min[3], float max[3], const float width) +{ + StrandVert *svert; + const float width2 = width * 2.0f; + float vec[3]; + int a; + + for (a=0, svert=strand->vert; a<strand->totvert; a++, svert++) { + copy_v3_v3(vec, svert->co); + minmax_v3v3_v3(min, max, vec); + + if (width!=0.0f) { + add_v3_fl(vec, width); + minmax_v3v3_v3(min, max, vec); + add_v3_fl(vec, -width2); + minmax_v3v3_v3(min, max, vec); + } + } +} + diff --git a/source/blender/render/intern/source/sunsky.c b/source/blender/render/intern/source/sunsky.c new file mode 100644 index 00000000000..80dd52c220c --- /dev/null +++ b/source/blender/render/intern/source/sunsky.c @@ -0,0 +1,506 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/sunsky.c + * \ingroup render + * + * This feature comes from Preetham paper on "A Practical Analytic Model for Daylight" + * and example code from Brian Smits, another author of that paper in + * http://www.cs.utah.edu/vissim/papers/sunsky/code/ + */ + +#include "sunsky.h" +#include "BLI_math.h" + +/** + * These macros are defined for vector operations + * */ + +/** + * compute v1 = v2 op v3 + * v1, v2 and v3 are vectors contains 3 float + * */ +#define VEC3OPV(v1, v2, op, v3) \ + { \ + v1[0] = (v2[0] op v3[0]); \ + v1[1] = (v2[1] op v3[1]); \ + v1[2] = (v2[2] op v3[2]); \ + } (void)0 + +/** + * compute v1 = v2 op f1 + * v1, v2 are vectors contains 3 float + * and f1 is a float + * */ +#define VEC3OPF(v1, v2, op, f1) \ + { \ + v1[0] = (v2[0] op(f1)); \ + v1[1] = (v2[1] op(f1)); \ + v1[2] = (v2[2] op(f1)); \ + } (void)0 + +/** + * compute v1 = f1 op v2 + * v1, v2 are vectors contains 3 float + * and f1 is a float + * */ +#define FOPVEC3(v1, f1, op, v2) \ + { \ + v1[0] = ((f1) op v2[0]); \ + v1[1] = ((f1) op v2[1]); \ + v1[2] = ((f1) op v2[2]); \ + } (void)0 + +/** + * ClipColor: + * clip a color to range [0, 1]; + * */ +void ClipColor(float c[3]) +{ + if (c[0] > 1.0f) c[0] = 1.0f; + if (c[0] < 0.0f) c[0] = 0.0f; + if (c[1] > 1.0f) c[1] = 1.0f; + if (c[1] < 0.0f) c[1] = 0.0f; + if (c[2] > 1.0f) c[2] = 1.0f; + if (c[2] < 0.0f) c[2] = 0.0f; +} + +/** + * AngleBetween: + * compute angle between to direction + * all angles are in radians + * */ +static float AngleBetween(float thetav, float phiv, float theta, float phi) +{ + float cospsi = sinf(thetav) * sinf(theta) * cosf(phi - phiv) + cosf(thetav) * cosf(theta); + + if (cospsi > 1.0f) + return 0; + if (cospsi < -1.0f) + return M_PI; + + return acosf(cospsi); +} + +/** + * DirectionToThetaPhi: + * this function convert a direction to it's theta and phi value + * parameters: + * toSun: contains direction information + * theta, phi, are return values from this conversion + * */ +static void DirectionToThetaPhi(float *toSun, float *theta, float *phi) +{ + *theta = acosf(toSun[2]); + if (fabsf(*theta) < 1e-5f) + *phi = 0; + else + *phi = atan2f(toSun[1], toSun[0]); +} + +/** + * PerezFunction: + * compute perez function value based on input parameters + */ +static float PerezFunction(struct SunSky *sunsky, const float *lam, float theta, float gamma, float lvz) +{ + float den, num; + + den = ((1 + lam[0] * expf(lam[1])) * + (1 + lam[2] * expf(lam[3] * sunsky->theta) + lam[4] * cosf(sunsky->theta) * cosf(sunsky->theta))); + + num = ((1 + lam[0] * expf(lam[1] / cosf(theta))) * + (1 + lam[2] * expf(lam[3] * gamma) + lam[4] * cosf(gamma) * cosf(gamma))); + + return(lvz * num / den); +} + +/** + * InitSunSky: + * this function compute some sun,sky parameters according to input parameters and also initiate some other sun, sky parameters + * parameters: + * sunSky, is a structure that contains information about sun, sky and atmosphere, in this function, most of its values initiated + * turb, is atmosphere turbidity + * toSun, contains sun direction + * horizon_brighness, controls the brightness of the horizon colors + * spread, controls colors spreed at horizon + * sun_brightness, controls sun's brightness + * sun_size, controls sun's size + * back_scatter, controls back scatter light + * */ +void InitSunSky(struct SunSky *sunsky, float turb, const float toSun[3], float horizon_brightness, + float spread, float sun_brightness, float sun_size, float back_scatter, + float skyblendfac, short skyblendtype, float sky_exposure, float sky_colorspace) +{ + float theta2; + float theta3; + float T; + float T2; + float chi; + + sunsky->turbidity = turb; + + sunsky->horizon_brightness = horizon_brightness; + sunsky->spread = spread; + sunsky->sun_brightness = sun_brightness; + sunsky->sun_size = sun_size; + sunsky->backscattered_light = back_scatter; + sunsky->skyblendfac = skyblendfac; + sunsky->skyblendtype = skyblendtype; + sunsky->sky_exposure = -sky_exposure; + sunsky->sky_colorspace = sky_colorspace; + + sunsky->toSun[0] = toSun[0]; + sunsky->toSun[1] = toSun[1]; + sunsky->toSun[2] = toSun[2]; + + DirectionToThetaPhi(sunsky->toSun, &sunsky->theta, &sunsky->phi); + + sunsky->sunSolidAngle = 0.25 * M_PI * 1.39 * 1.39 / (150 * 150); /* = 6.7443e-05 */ + + theta2 = sunsky->theta * sunsky->theta; + theta3 = theta2 * sunsky->theta; + T = turb; + T2 = turb * turb; + + chi = (4.0f / 9.0f - T / 120.0f) * ((float)M_PI - 2.0f * sunsky->theta); + sunsky->zenith_Y = (4.0453f * T - 4.9710f) * tanf(chi) - 0.2155f * T + 2.4192f; + sunsky->zenith_Y *= 1000; /* conversion from kcd/m^2 to cd/m^2 */ + + if (sunsky->zenith_Y <= 0) + sunsky->zenith_Y = 1e-6; + + sunsky->zenith_x = + (+0.00165f * theta3 - 0.00374f * theta2 + 0.00208f * sunsky->theta + 0.0f) * T2 + + (-0.02902f * theta3 + 0.06377f * theta2 - 0.03202f * sunsky->theta + 0.00394f) * T + + (+0.11693f * theta3 - 0.21196f * theta2 + 0.06052f * sunsky->theta + 0.25885f); + + sunsky->zenith_y = + (+0.00275f * theta3 - 0.00610f * theta2 + 0.00316f * sunsky->theta + 0.0f) * T2 + + (-0.04214f * theta3 + 0.08970f * theta2 - 0.04153f * sunsky->theta + 0.00515f) * T + + (+0.15346f * theta3 - 0.26756f * theta2 + 0.06669f * sunsky->theta + 0.26688f); + + + sunsky->perez_Y[0] = 0.17872f * T - 1.46303f; + sunsky->perez_Y[1] = -0.35540f * T + 0.42749f; + sunsky->perez_Y[2] = -0.02266f * T + 5.32505f; + sunsky->perez_Y[3] = 0.12064f * T - 2.57705f; + sunsky->perez_Y[4] = -0.06696f * T + 0.37027f; + + sunsky->perez_x[0] = -0.01925f * T - 0.25922f; + sunsky->perez_x[1] = -0.06651f * T + 0.00081f; + sunsky->perez_x[2] = -0.00041f * T + 0.21247f; + sunsky->perez_x[3] = -0.06409f * T - 0.89887f; + sunsky->perez_x[4] = -0.00325f * T + 0.04517f; + + sunsky->perez_y[0] = -0.01669f * T - 0.26078f; + sunsky->perez_y[1] = -0.09495f * T + 0.00921f; + sunsky->perez_y[2] = -0.00792f * T + 0.21023f; + sunsky->perez_y[3] = -0.04405f * T - 1.65369f; + sunsky->perez_y[4] = -0.01092f * T + 0.05291f; + + /* suggested by glome in patch [#8063] */ + sunsky->perez_Y[0] *= sunsky->horizon_brightness; + sunsky->perez_x[0] *= sunsky->horizon_brightness; + sunsky->perez_y[0] *= sunsky->horizon_brightness; + + sunsky->perez_Y[1] *= sunsky->spread; + sunsky->perez_x[1] *= sunsky->spread; + sunsky->perez_y[1] *= sunsky->spread; + + sunsky->perez_Y[2] *= sunsky->sun_brightness; + sunsky->perez_x[2] *= sunsky->sun_brightness; + sunsky->perez_y[2] *= sunsky->sun_brightness; + + sunsky->perez_Y[3] *= sunsky->sun_size; + sunsky->perez_x[3] *= sunsky->sun_size; + sunsky->perez_y[3] *= sunsky->sun_size; + + sunsky->perez_Y[4] *= sunsky->backscattered_light; + sunsky->perez_x[4] *= sunsky->backscattered_light; + sunsky->perez_y[4] *= sunsky->backscattered_light; +} + +/** + * GetSkyXYZRadiance: + * this function compute sky radiance according to a view parameters `theta' and `phi'and sunSky values + * parameters: + * sunSky, sontains sun and sky parameters + * theta, is sun's theta + * phi, is sun's phi + * color_out, is computed color that shows sky radiance in XYZ color format + * */ +void GetSkyXYZRadiance(struct SunSky *sunsky, float theta, float phi, float color_out[3]) +{ + float gamma; + float x, y, Y, X, Z; + float hfade = 1, nfade = 1; + + + if (theta > (float)M_PI_2) { + hfade = 1.0f - (theta * (float)M_1_PI - 0.5f) * 2.0f; + hfade = hfade * hfade * (3.0f - 2.0f * hfade); + theta = M_PI_2; + } + + if (sunsky->theta > (float)M_PI_2) { + if (theta <= (float)M_PI_2) { + nfade = 1.0f - (0.5f - theta * (float)M_1_PI) * 2.0f; + nfade *= 1.0f - (sunsky->theta * (float)M_1_PI - 0.5f) * 2.0f; + nfade = nfade * nfade * (3.0f - 2.0f * nfade); + } + } + + gamma = AngleBetween(theta, phi, sunsky->theta, sunsky->phi); + + /* Compute xyY values */ + x = PerezFunction(sunsky, sunsky->perez_x, theta, gamma, sunsky->zenith_x); + y = PerezFunction(sunsky, sunsky->perez_y, theta, gamma, sunsky->zenith_y); + Y = 6.666666667e-5f * nfade * hfade * PerezFunction(sunsky, sunsky->perez_Y, theta, gamma, sunsky->zenith_Y); + + if (sunsky->sky_exposure != 0.0f) + Y = 1.0 - exp(Y * sunsky->sky_exposure); + + X = (x / y) * Y; + Z = ((1 - x - y) / y) * Y; + + color_out[0] = X; + color_out[1] = Y; + color_out[2] = Z; +} + +/** + * GetSkyXYZRadiancef: + * this function compute sky radiance according to a view direction `varg' and sunSky values + * parameters: + * sunSky, sontains sun and sky parameters + * varg, shows direction + * color_out, is computed color that shows sky radiance in XYZ color format + * */ +void GetSkyXYZRadiancef(struct SunSky *sunsky, const float varg[3], float color_out[3]) +{ + float theta, phi; + float v[3]; + + normalize_v3_v3(v, varg); + + if (v[2] < 0.001f) { + v[2] = 0.001f; + normalize_v3(v); + } + + DirectionToThetaPhi(v, &theta, &phi); + GetSkyXYZRadiance(sunsky, theta, phi, color_out); +} + +/** + * ComputeAttenuatedSunlight: + * this function compute attenuated sun light based on sun's theta and atmosphere turbidity + * parameters: + * theta, is sun's theta + * turbidity: is atmosphere turbidity + * fTau: contains computed attenuated sun light + * */ +static void ComputeAttenuatedSunlight(float theta, int turbidity, float fTau[3]) +{ + float fBeta; + float fTauR, fTauA; + float m; + float fAlpha; + + int i; + float fLambda[3]; + fLambda[0] = 0.65f; + fLambda[1] = 0.57f; + fLambda[2] = 0.475f; + + fAlpha = 1.3f; + fBeta = 0.04608365822050f * turbidity - 0.04586025928522f; + + m = 1.0f / (cosf(theta) + 0.15f * powf(93.885f - theta / (float)M_PI * 180.0f, -1.253f)); + + for (i = 0; i < 3; i++) { + /* Rayleigh Scattering */ + fTauR = expf(-m * 0.008735f * powf(fLambda[i], (float)(-4.08f))); + + /* Aerosal (water + dust) attenuation */ + fTauA = exp(-m * fBeta * powf(fLambda[i], -fAlpha)); + + fTau[i] = fTauR * fTauA; + } +} + +/** + * InitAtmosphere: + * this function initiate sunSky structure with user input parameters. + * parameters: + * sunSky, contains information about sun, and in this function some atmosphere parameters will initiated + * sun_intens, shows sun intensity value + * mief, Mie scattering factor this factor currently call with 1.0 + * rayf, Rayleigh scattering factor, this factor currently call with 1.0 + * inscattf, inscatter light factor that range from 0.0 to 1.0, 0.0 means no inscatter light and 1.0 means full inscatter light + * extincf, extinction light factor that range from 0.0 to 1.0, 0.0 means no extinction and 1.0 means full extinction + * disf, is distance factor, multiplied to pixle's z value to compute each pixle's distance to camera, + * */ +void InitAtmosphere(struct SunSky *sunSky, float sun_intens, float mief, float rayf, + float inscattf, float extincf, float disf) +{ + const float pi = M_PI; + const float n = 1.003f; /* refractive index */ + const float N = 2.545e25; + const float pn = 0.035f; + const float T = 2.0f; + float fTemp, fTemp2, fTemp3, fBeta, fBetaDash; + float c = (6.544f * T - 6.51f) * 1e-17f; + float K[3] = {0.685f, 0.679f, 0.670f}; + float vBetaMieTemp[3]; + + float fLambda[3], fLambda2[3], fLambda4[3]; + float vLambda2[3]; + float vLambda4[3]; + + int i; + + sunSky->atm_SunIntensity = sun_intens; + sunSky->atm_BetaMieMultiplier = mief; + sunSky->atm_BetaRayMultiplier = rayf; + sunSky->atm_InscatteringMultiplier = inscattf; + sunSky->atm_ExtinctionMultiplier = extincf; + sunSky->atm_DistanceMultiplier = disf; + + sunSky->atm_HGg = 0.8; + + fLambda[0] = 1 / 650e-9f; + fLambda[1] = 1 / 570e-9f; + fLambda[2] = 1 / 475e-9f; + for (i = 0; i < 3; i++) { + fLambda2[i] = fLambda[i] * fLambda[i]; + fLambda4[i] = fLambda2[i] * fLambda2[i]; + } + + vLambda2[0] = fLambda2[0]; + vLambda2[1] = fLambda2[1]; + vLambda2[2] = fLambda2[2]; + + vLambda4[0] = fLambda4[0]; + vLambda4[1] = fLambda4[1]; + vLambda4[2] = fLambda4[2]; + + /* Rayleigh scattering constants. */ + fTemp = pi * pi * (n * n - 1) * (n * n - 1) * (6 + 3 * pn) / (6 - 7 * pn) / N; + fBeta = 8 * fTemp * pi / 3; + + VEC3OPF(sunSky->atm_BetaRay, vLambda4, *, fBeta); + fBetaDash = fTemp / 2; + VEC3OPF(sunSky->atm_BetaDashRay, vLambda4, *, fBetaDash); + + + /* Mie scattering constants. */ + fTemp2 = 0.434f * c * (2 * pi) * (2 * pi) * 0.5f; + VEC3OPF(sunSky->atm_BetaDashMie, vLambda2, *, fTemp2); + + fTemp3 = 0.434f * c * pi * (2 * pi) * (2 * pi); + + VEC3OPV(vBetaMieTemp, K, *, fLambda); + VEC3OPF(sunSky->atm_BetaMie, vBetaMieTemp, *, fTemp3); + +} + +/** + * AtmospherePixleShader: + * this function apply atmosphere effect on a pixle color `rgb' at distance `s' + * parameters: + * sunSky, contains information about sun parameters and user values + * view, is camera view vector + * s, is distance + * rgb, contains rendered color value for a pixle + * */ +void AtmospherePixleShader(struct SunSky *sunSky, float view[3], float s, float rgb[3]) +{ + float costheta; + float Phase_1; + float Phase_2; + float sunColor[3]; + + float E[3]; + float E1[3]; + + + float I[3]; + float fTemp; + float vTemp1[3], vTemp2[3]; + + float sunDirection[3]; + + s *= sunSky->atm_DistanceMultiplier; + + sunDirection[0] = sunSky->toSun[0]; + sunDirection[1] = sunSky->toSun[1]; + sunDirection[2] = sunSky->toSun[2]; + + costheta = dot_v3v3(view, sunDirection); /* cos(theta) */ + Phase_1 = 1 + (costheta * costheta); /* Phase_1 */ + + VEC3OPF(sunSky->atm_BetaRay, sunSky->atm_BetaRay, *, sunSky->atm_BetaRayMultiplier); + VEC3OPF(sunSky->atm_BetaMie, sunSky->atm_BetaMie, *, sunSky->atm_BetaMieMultiplier); + VEC3OPV(sunSky->atm_BetaRM, sunSky->atm_BetaRay, +, sunSky->atm_BetaMie); + + /* e^(-(beta_1 + beta_2) * s) = E1 */ + VEC3OPF(E1, sunSky->atm_BetaRM, *, -s / (float)M_LN2); + E1[0] = exp(E1[0]); + E1[1] = exp(E1[1]); + E1[2] = exp(E1[2]); + + copy_v3_v3(E, E1); + + /* Phase2(theta) = (1-g^2)/(1+g-2g*cos(theta))^(3/2) */ + fTemp = 1 + sunSky->atm_HGg - 2 * sunSky->atm_HGg * costheta; + fTemp = fTemp * sqrtf(fTemp); + Phase_2 = (1 - sunSky->atm_HGg * sunSky->atm_HGg) / fTemp; + + VEC3OPF(vTemp1, sunSky->atm_BetaDashRay, *, Phase_1); + VEC3OPF(vTemp2, sunSky->atm_BetaDashMie, *, Phase_2); + + VEC3OPV(vTemp1, vTemp1, +, vTemp2); + FOPVEC3(vTemp2, 1.0f, -, E1); + VEC3OPV(vTemp1, vTemp1, *, vTemp2); + + FOPVEC3(vTemp2, 1.0f, /, sunSky->atm_BetaRM); + + VEC3OPV(I, vTemp1, *, vTemp2); + + VEC3OPF(I, I, *, sunSky->atm_InscatteringMultiplier); + VEC3OPF(E, E, *, sunSky->atm_ExtinctionMultiplier); + + /* scale to color sun */ + ComputeAttenuatedSunlight(sunSky->theta, sunSky->turbidity, sunColor); + VEC3OPV(E, E, *, sunColor); + + VEC3OPF(I, I, *, sunSky->atm_SunIntensity); + + VEC3OPV(rgb, rgb, *, E); + VEC3OPV(rgb, rgb, +, I); +} + +#undef VEC3OPV +#undef VEC3OPF +#undef FOPVEC3 + +/* EOF */ diff --git a/source/blender/render/intern/source/volume_precache.c b/source/blender/render/intern/source/volume_precache.c new file mode 100644 index 00000000000..8e79f309814 --- /dev/null +++ b/source/blender/render/intern/source/volume_precache.c @@ -0,0 +1,855 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): Matt Ebb, Ra˙l Fern·ndez Hern·ndez (Farsthary). + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/volume_precache.c + * \ingroup render + */ + + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include <float.h> + +#include "MEM_guardedalloc.h" + +#include "BLI_blenlib.h" +#include "BLI_math.h" +#include "BLI_task.h" +#include "BLI_threads.h" +#include "BLI_voxel.h" +#include "BLI_utildefines.h" + +#include "BLT_translation.h" + +#include "PIL_time.h" + +#include "RE_shader_ext.h" + +#include "DNA_material_types.h" + +#include "rayintersection.h" +#include "rayobject.h" +#include "render_types.h" +#include "rendercore.h" +#include "renderdatabase.h" +#include "volumetric.h" +#include "volume_precache.h" + +#include "atomic_ops.h" + + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +/* *** utility code to set up an individual raytree for objectinstance, for checking inside/outside *** */ + +/* Recursive test for intersections, from a point inside the mesh, to outside + * Number of intersections (depth) determine if a point is inside or outside the mesh */ +static int intersect_outside_volume(RayObject *tree, Isect *isect, float *offset, int limit, int depth) +{ + if (limit == 0) return depth; + + if (RE_rayobject_raycast(tree, isect)) { + + isect->start[0] = isect->start[0] + isect->dist*isect->dir[0]; + isect->start[1] = isect->start[1] + isect->dist*isect->dir[1]; + isect->start[2] = isect->start[2] + isect->dist*isect->dir[2]; + + isect->dist = FLT_MAX; + isect->skip = RE_SKIP_VLR_NEIGHBOUR; + isect->orig.face= isect->hit.face; + isect->orig.ob= isect->hit.ob; + + return intersect_outside_volume(tree, isect, offset, limit-1, depth+1); + } + else { + return depth; + } +} + +/* Uses ray tracing to check if a point is inside or outside an ObjectInstanceRen */ +static int point_inside_obi(RayObject *tree, ObjectInstanceRen *obi, const float co[3]) +{ + Isect isect= {{0}}; + float dir[3] = {0.0f, 0.0f, 1.0f}; + int final_depth=0, depth=0, limit=20; + + /* set up the isect */ + copy_v3_v3(isect.start, co); + copy_v3_v3(isect.dir, dir); + isect.mode= RE_RAY_MIRROR; + isect.last_hit= NULL; + isect.lay= -1; + + isect.dist = FLT_MAX; + isect.orig.face= NULL; + isect.orig.ob = NULL; + + RE_instance_rotate_ray(obi, &isect); + final_depth = intersect_outside_volume(tree, &isect, dir, limit, depth); + RE_instance_rotate_ray_restore(obi, &isect); + + /* even number of intersections: point is outside + * odd number: point is inside */ + if (final_depth % 2 == 0) return 0; + else return 1; +} + +/* find the bounding box of an objectinstance in global space */ +void global_bounds_obi(Render *re, ObjectInstanceRen *obi, float bbmin[3], float bbmax[3]) +{ + ObjectRen *obr = obi->obr; + VolumePrecache *vp = obi->volume_precache; + VertRen *ver= NULL; + float co[3]; + int a; + + if (vp->bbmin != NULL && vp->bbmax != NULL) { + copy_v3_v3(bbmin, vp->bbmin); + copy_v3_v3(bbmax, vp->bbmax); + return; + } + + vp->bbmin = MEM_callocN(sizeof(float)*3, "volume precache min boundbox corner"); + vp->bbmax = MEM_callocN(sizeof(float)*3, "volume precache max boundbox corner"); + + INIT_MINMAX(bbmin, bbmax); + + for (a=0; a<obr->totvert; a++) { + if ((a & 255)==0) ver= obr->vertnodes[a>>8].vert; + else ver++; + + copy_v3_v3(co, ver->co); + + /* transformed object instance in camera space */ + if (obi->flag & R_TRANSFORMED) + mul_m4_v3(obi->mat, co); + + /* convert to global space */ + mul_m4_v3(re->viewinv, co); + + minmax_v3v3_v3(vp->bbmin, vp->bbmax, co); + } + + copy_v3_v3(bbmin, vp->bbmin); + copy_v3_v3(bbmax, vp->bbmax); + +} + +/* *** light cache filtering *** */ + +static float get_avg_surrounds(float *cache, int *res, int xx, int yy, int zz) +{ + int x, y, z, x_, y_, z_; + int added=0; + float tot=0.0f; + + for (z=-1; z <= 1; z++) { + z_ = zz+z; + if (z_ >= 0 && z_ <= res[2]-1) { + + for (y=-1; y <= 1; y++) { + y_ = yy+y; + if (y_ >= 0 && y_ <= res[1]-1) { + + for (x=-1; x <= 1; x++) { + x_ = xx+x; + if (x_ >= 0 && x_ <= res[0]-1) { + const int64_t i = BLI_VOXEL_INDEX(x_, y_, z_, res); + + if (cache[i] > 0.0f) { + tot += cache[i]; + added++; + } + + } + } + } + } + } + } + + if (added > 0) tot /= added; + + return tot; +} + +/* function to filter the edges of the light cache, where there was no volume originally. + * For each voxel which was originally external to the mesh, it finds the average values of + * the surrounding internal voxels and sets the original external voxel to that average amount. + * Works almost a bit like a 'dilate' filter */ +static void lightcache_filter(VolumePrecache *vp) +{ + int x, y, z; + + for (z=0; z < vp->res[2]; z++) { + for (y=0; y < vp->res[1]; y++) { + for (x=0; x < vp->res[0]; x++) { + /* trigger for outside mesh */ + const int64_t i = BLI_VOXEL_INDEX(x, y, z, vp->res); + + if (vp->data_r[i] < -0.f) + vp->data_r[i] = get_avg_surrounds(vp->data_r, vp->res, x, y, z); + if (vp->data_g[i] < -0.f) + vp->data_g[i] = get_avg_surrounds(vp->data_g, vp->res, x, y, z); + if (vp->data_b[i] < -0.f) + vp->data_b[i] = get_avg_surrounds(vp->data_b, vp->res, x, y, z); + } + } + } +} + +#if 0 +static void lightcache_filter2(VolumePrecache *vp) +{ + int x, y, z; + float *new_r, *new_g, *new_b; + int field_size = vp->res[0]*vp->res[1]*vp->res[2]*sizeof(float); + + new_r = MEM_mallocN(field_size, "temp buffer for light cache filter r channel"); + new_g = MEM_mallocN(field_size, "temp buffer for light cache filter g channel"); + new_b = MEM_mallocN(field_size, "temp buffer for light cache filter b channel"); + + memcpy(new_r, vp->data_r, field_size); + memcpy(new_g, vp->data_g, field_size); + memcpy(new_b, vp->data_b, field_size); + + for (z=0; z < vp->res[2]; z++) { + for (y=0; y < vp->res[1]; y++) { + for (x=0; x < vp->res[0]; x++) { + /* trigger for outside mesh */ + const int64_t i = BLI_VOXEL_INDEX(x, y, z, vp->res); + if (vp->data_r[i] < -0.f) + new_r[i] = get_avg_surrounds(vp->data_r, vp->res, x, y, z); + if (vp->data_g[i] < -0.f) + new_g[i] = get_avg_surrounds(vp->data_g, vp->res, x, y, z); + if (vp->data_b[i] < -0.f) + new_b[i] = get_avg_surrounds(vp->data_b, vp->res, x, y, z); + } + } + } + + SWAP(float *, vp->data_r, new_r); + SWAP(float *, vp->data_g, new_g); + SWAP(float *, vp->data_b, new_b); + + if (new_r) { MEM_freeN(new_r); new_r=NULL; } + if (new_g) { MEM_freeN(new_g); new_g=NULL; } + if (new_b) { MEM_freeN(new_b); new_b=NULL; } +} +#endif + +/* has a pad of 1 voxel surrounding the core for boundary simulation */ +BLI_INLINE int64_t ms_I(int x, int y, int z, const int *n) +{ + /* different ordering to light cache */ + return ((int64_t)x * (int64_t)(n[1] + 2) * (int64_t)(n[2] + 2) + + (int64_t)y * (int64_t)(n[2] + 2) + + (int64_t)z); +} + +/* has a pad of 1 voxel surrounding the core for boundary simulation */ +BLI_INLINE int64_t v_I_pad(int x, int y, int z, const int *n) +{ + /* same ordering to light cache, with padding */ + return ((int64_t)z * (int64_t)(n[1] + 2) * (int64_t)(n[0] + 2) + + (int64_t)y * (int64_t)(n[0] + 2) + + (int64_t)x); +} + +BLI_INLINE int64_t lc_to_ms_I(int x, int y, int z, const int *n) +{ + /* converting light cache index to multiple scattering index */ + return ((int64_t)(x - 1) * ((int64_t)n[1] * (int64_t)n[2]) + + (int64_t)(y - 1) * ((int64_t)n[2]) + + (int64_t)(z - 1)); +} + +/* *** multiple scattering approximation *** */ + +/* get the total amount of light energy in the light cache. used to normalize after multiple scattering */ +static float total_ss_energy(Render *re, int do_test_break, VolumePrecache *vp) +{ + int x, y, z; + const int *res = vp->res; + float energy=0.f; + + for (z=0; z < res[2]; z++) { + for (y=0; y < res[1]; y++) { + for (x=0; x < res[0]; x++) { + const int64_t i = BLI_VOXEL_INDEX(x, y, z, res); + + if (vp->data_r[i] > 0.f) energy += vp->data_r[i]; + if (vp->data_g[i] > 0.f) energy += vp->data_g[i]; + if (vp->data_b[i] > 0.f) energy += vp->data_b[i]; + } + } + + if (do_test_break && re->test_break(re->tbh)) break; + } + + return energy; +} + +static float total_ms_energy(Render *re, int do_test_break, float *sr, float *sg, float *sb, const int res[3]) +{ + int x, y, z; + float energy=0.f; + + for (z=1;z<=res[2];z++) { + for (y=1;y<=res[1];y++) { + for (x=1;x<=res[0];x++) { + const int64_t i = ms_I(x, y, z, res); + + if (sr[i] > 0.f) energy += sr[i]; + if (sg[i] > 0.f) energy += sg[i]; + if (sb[i] > 0.f) energy += sb[i]; + } + } + + if (do_test_break && re->test_break(re->tbh)) break; + } + + return energy; +} + +/** + * \param n: the unpadded resolution + */ +static void ms_diffuse(Render *re, int do_test_break, const float *x0, float *x, float diff, const int n[3]) +{ + int i, j, k, l; + const float dt = VOL_MS_TIMESTEP; + int64_t size = (int64_t)n[0] * (int64_t)n[1] * (int64_t)n[2]; + const float a = dt * diff * size; + + for (l=0; l<20; l++) { + for (k=1; k<=n[2]; k++) { + for (j=1; j<=n[1]; j++) { + for (i=1; i<=n[0]; i++) { + x[v_I_pad(i, j, k, n)] = + ((x0[v_I_pad(i, j, k, n)]) + ( + (x0[v_I_pad(i - 1, j, k, n)] + + x0[v_I_pad(i + 1, j, k, n)] + + x0[v_I_pad(i, j - 1, k, n)] + + x0[v_I_pad(i, j + 1, k, n)] + + x0[v_I_pad(i, j, k - 1, n)] + + x0[v_I_pad(i, j, k + 1, n)]) * a) / (1 + 6 * a)); + } + } + + if (do_test_break && re->test_break(re->tbh)) break; + } + + if (re->test_break(re->tbh)) break; + } +} + +static void multiple_scattering_diffusion(Render *re, VolumePrecache *vp, Material *ma) +{ + const float diff = ma->vol.ms_diff * 0.001f; /* compensate for scaling for a nicer UI range */ + const int simframes = (int)(ma->vol.ms_spread * (float)max_iii(vp->res[0], vp->res[1], vp->res[2])); + const int shade_type = ma->vol.shade_type; + float fac = ma->vol.ms_intensity; + + int x, y, z, m; + const int *n = vp->res; + const int size = (n[0]+2)*(n[1]+2)*(n[2]+2); + const int do_test_break = (size > 100000); + double time, lasttime= PIL_check_seconds_timer(); + float total; + float c=1.0f; + float origf; /* factor for blending in original light cache */ + float energy_ss, energy_ms; + + float *sr0=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer"); + float *sr=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer"); + float *sg0=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer"); + float *sg=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer"); + float *sb0=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer"); + float *sb=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer"); + + total = (float)(n[0]*n[1]*n[2]*simframes); + + energy_ss = total_ss_energy(re, do_test_break, vp); + + /* Scattering as diffusion pass */ + for (m=0; m<simframes; m++) { + /* add sources */ + for (z=1; z<=n[2]; z++) { + for (y=1; y<=n[1]; y++) { + for (x=1; x<=n[0]; x++) { + const int64_t i = lc_to_ms_I(x, y, z, n); //lc index + const int64_t j = ms_I(x, y, z, n); //ms index + + time= PIL_check_seconds_timer(); + c++; + if (vp->data_r[i] > 0.0f) + sr[j] += vp->data_r[i]; + if (vp->data_g[i] > 0.0f) + sg[j] += vp->data_g[i]; + if (vp->data_b[i] > 0.0f) + sb[j] += vp->data_b[i]; + + /* Displays progress every second */ + if (time-lasttime>1.0) { + char str[64]; + BLI_snprintf(str, sizeof(str), IFACE_("Simulating multiple scattering: %d%%"), + (int)(100.0f * (c / total))); + re->i.infostr = str; + re->stats_draw(re->sdh, &re->i); + re->i.infostr = NULL; + lasttime= time; + } + } + } + + if (do_test_break && re->test_break(re->tbh)) break; + } + + if (re->test_break(re->tbh)) break; + + SWAP(float *, sr, sr0); + SWAP(float *, sg, sg0); + SWAP(float *, sb, sb0); + + /* main diffusion simulation */ + ms_diffuse(re, do_test_break, sr0, sr, diff, n); + ms_diffuse(re, do_test_break, sg0, sg, diff, n); + ms_diffuse(re, do_test_break, sb0, sb, diff, n); + + if (re->test_break(re->tbh)) break; + } + + /* normalization factor to conserve energy */ + energy_ms = total_ms_energy(re, do_test_break, sr, sg, sb, n); + fac *= (energy_ss / energy_ms); + + /* blend multiple scattering back in the light cache */ + if (shade_type == MA_VOL_SHADE_SHADEDPLUSMULTIPLE) { + /* conserve energy - half single, half multiple */ + origf = 0.5f; + fac *= 0.5f; + } + else { + origf = 0.0f; + } + + for (z=1;z<=n[2];z++) { + for (y=1;y<=n[1];y++) { + for (x=1;x<=n[0];x++) { + const int64_t i = lc_to_ms_I(x, y, z, n); //lc index + const int64_t j = ms_I(x, y, z, n); //ms index + + vp->data_r[i] = origf * vp->data_r[i] + fac * sr[j]; + vp->data_g[i] = origf * vp->data_g[i] + fac * sg[j]; + vp->data_b[i] = origf * vp->data_b[i] + fac * sb[j]; + } + } + + if (do_test_break && re->test_break(re->tbh)) break; + } + + MEM_freeN(sr0); + MEM_freeN(sr); + MEM_freeN(sg0); + MEM_freeN(sg); + MEM_freeN(sb0); + MEM_freeN(sb); +} + + + +#if 0 /* debug stuff */ +static void *vol_precache_part_test(void *data) +{ + VolPrecachePart *pa = data; + + printf("part number: %d\n", pa->num); + printf("done: %d\n", pa->done); + printf("x min: %d x max: %d\n", pa->minx, pa->maxx); + printf("y min: %d y max: %d\n", pa->miny, pa->maxy); + printf("z min: %d z max: %d\n", pa->minz, pa->maxz); + + return NULL; +} +#endif + +/* Iterate over the 3d voxel grid, and fill the voxels with scattering information + * + * It's stored in memory as 3 big float grids next to each other, one for each RGB channel. + * I'm guessing the memory alignment may work out better this way for the purposes + * of doing linear interpolation, but I haven't actually tested this theory! :) + */ +typedef struct VolPrecacheState { + double lasttime; + unsigned int doneparts; + unsigned int totparts; +} VolPrecacheState; + +static void vol_precache_part(TaskPool * __restrict pool, void *taskdata, int UNUSED(threadid)) +{ + VolPrecacheState *state = (VolPrecacheState *)BLI_task_pool_userdata(pool); + VolPrecachePart *pa = (VolPrecachePart *)taskdata; + Render *re = pa->re; + + ObjectInstanceRen *obi = pa->obi; + RayObject *tree = pa->tree; + ShadeInput *shi = pa->shi; + float scatter_col[3] = {0.f, 0.f, 0.f}; + float co[3], cco[3], view[3]; + int x, y, z; + int res[3]; + double time; + + if (re->test_break && re->test_break(re->tbh)) + return; + + //printf("thread id %d\n", threadid); + + res[0]= pa->res[0]; + res[1]= pa->res[1]; + res[2]= pa->res[2]; + + for (z= pa->minz; z < pa->maxz; z++) { + co[2] = pa->bbmin[2] + (pa->voxel[2] * (z + 0.5f)); + + for (y= pa->miny; y < pa->maxy; y++) { + co[1] = pa->bbmin[1] + (pa->voxel[1] * (y + 0.5f)); + + for (x=pa->minx; x < pa->maxx; x++) { + int64_t i; + co[0] = pa->bbmin[0] + (pa->voxel[0] * (x + 0.5f)); + + if (re->test_break && re->test_break(re->tbh)) + break; + + /* convert from world->camera space for shading */ + mul_v3_m4v3(cco, pa->viewmat, co); + + i = BLI_VOXEL_INDEX(x, y, z, res); + + /* don't bother if the point is not inside the volume mesh */ + if (!point_inside_obi(tree, obi, cco)) { + obi->volume_precache->data_r[i] = -1.0f; + obi->volume_precache->data_g[i] = -1.0f; + obi->volume_precache->data_b[i] = -1.0f; + continue; + } + + copy_v3_v3(view, cco); + normalize_v3(view); + vol_get_scattering(shi, scatter_col, cco, view); + + obi->volume_precache->data_r[i] = scatter_col[0]; + obi->volume_precache->data_g[i] = scatter_col[1]; + obi->volume_precache->data_b[i] = scatter_col[2]; + + } + } + } + + unsigned int doneparts = atomic_add_and_fetch_u(&state->doneparts, 1); + + time = PIL_check_seconds_timer(); + if (time - state->lasttime > 1.0) { + ThreadMutex *mutex = BLI_task_pool_user_mutex(pool); + + if (BLI_mutex_trylock(mutex)) { + char str[64]; + float ratio = (float)doneparts/(float)state->totparts; + BLI_snprintf(str, sizeof(str), IFACE_("Precaching volume: %d%%"), (int)(100.0f * ratio)); + re->i.infostr = str; + re->stats_draw(re->sdh, &re->i); + re->i.infostr = NULL; + state->lasttime = time; + + BLI_mutex_unlock(mutex); + } + } +} + +static void precache_setup_shadeinput(Render *re, ObjectInstanceRen *obi, Material *ma, ShadeInput *shi) +{ + memset(shi, 0, sizeof(ShadeInput)); + shi->depth= 1; + shi->mask= 1; + shi->mat = ma; + shi->vlr = NULL; + memcpy(&shi->r, &shi->mat->r, 23*sizeof(float)); /* note, keep this synced with render_types.h */ + shi->har= shi->mat->har; + shi->obi= obi; + shi->obr= obi->obr; + shi->lay = re->lay; +} + +static void precache_launch_parts(Render *re, RayObject *tree, ShadeInput *shi, ObjectInstanceRen *obi) +{ + TaskScheduler *task_scheduler; + TaskPool *task_pool; + VolumePrecache *vp = obi->volume_precache; + VolPrecacheState state; + int i=0, x, y, z; + float voxel[3]; + int sizex, sizey, sizez; + float bbmin[3], bbmax[3]; + const int *res; + int minx, maxx; + int miny, maxy; + int minz, maxz; + int totthread = re->r.threads; + int parts[3]; + + if (!vp) return; + + /* currently we just subdivide the box, number of threads per side */ + parts[0] = parts[1] = parts[2] = totthread; + res = vp->res; + + /* setup task scheduler */ + memset(&state, 0, sizeof(state)); + state.doneparts = 0; + state.totparts = parts[0]*parts[1]*parts[2]; + state.lasttime = PIL_check_seconds_timer(); + + task_scheduler = BLI_task_scheduler_create(totthread); + task_pool = BLI_task_pool_create(task_scheduler, &state); + + /* using boundbox in worldspace */ + global_bounds_obi(re, obi, bbmin, bbmax); + sub_v3_v3v3(voxel, bbmax, bbmin); + + voxel[0] /= (float)res[0]; + voxel[1] /= (float)res[1]; + voxel[2] /= (float)res[2]; + + for (x=0; x < parts[0]; x++) { + sizex = ceil(res[0] / (float)parts[0]); + minx = x * sizex; + maxx = minx + sizex; + maxx = (maxx>res[0])?res[0]:maxx; + + for (y=0; y < parts[1]; y++) { + sizey = ceil(res[1] / (float)parts[1]); + miny = y * sizey; + maxy = miny + sizey; + maxy = (maxy>res[1])?res[1]:maxy; + + for (z=0; z < parts[2]; z++) { + VolPrecachePart *pa= MEM_callocN(sizeof(VolPrecachePart), "new precache part"); + + sizez = ceil(res[2] / (float)parts[2]); + minz = z * sizez; + maxz = minz + sizez; + maxz = (maxz>res[2])?res[2]:maxz; + + pa->re = re; + pa->num = i; + pa->tree = tree; + pa->shi = shi; + pa->obi = obi; + copy_m4_m4(pa->viewmat, re->viewmat); + + copy_v3_v3(pa->bbmin, bbmin); + copy_v3_v3(pa->voxel, voxel); + copy_v3_v3_int(pa->res, res); + + pa->minx = minx; pa->maxx = maxx; + pa->miny = miny; pa->maxy = maxy; + pa->minz = minz; pa->maxz = maxz; + + BLI_task_pool_push(task_pool, vol_precache_part, pa, true, TASK_PRIORITY_HIGH); + + i++; + } + } + } + + /* work and wait until tasks are done */ + BLI_task_pool_work_and_wait(task_pool); + + /* free */ + BLI_task_pool_free(task_pool); + BLI_task_scheduler_free(task_scheduler); +} + +/* calculate resolution from bounding box in world space */ +static int precache_resolution(Render *re, VolumePrecache *vp, ObjectInstanceRen *obi, int res) +{ + float dim[3], div; + float bbmin[3], bbmax[3]; + + /* bound box in global space */ + global_bounds_obi(re, obi, bbmin, bbmax); + sub_v3_v3v3(dim, bbmax, bbmin); + + div = max_fff(dim[0], dim[1], dim[2]); + dim[0] /= div; + dim[1] /= div; + dim[2] /= div; + + vp->res[0] = ceil(dim[0] * res); + vp->res[1] = ceil(dim[1] * res); + vp->res[2] = ceil(dim[2] * res); + + if ((vp->res[0] < 1) || (vp->res[1] < 1) || (vp->res[2] < 1)) + return 0; + + return 1; +} + +/* Precache a volume into a 3D voxel grid. + * The voxel grid is stored in the ObjectInstanceRen, + * in camera space, aligned with the ObjectRen's bounding box. + * Resolution is defined by the user. + */ +static void vol_precache_objectinstance_threads(Render *re, ObjectInstanceRen *obi, Material *ma) +{ + VolumePrecache *vp; + RayObject *tree; + ShadeInput shi; + + R = *re; + + /* create a raytree with just the faces of the instanced ObjectRen, + * used for checking if the cached point is inside or outside. */ + tree = makeraytree_object(&R, obi); + if (!tree) return; + + vp = MEM_callocN(sizeof(VolumePrecache), "volume light cache"); + obi->volume_precache = vp; + + if (!precache_resolution(re, vp, obi, ma->vol.precache_resolution)) { + MEM_freeN(vp); + vp = NULL; + return; + } + + vp->data_r = MEM_callocN(sizeof(float)*vp->res[0]*vp->res[1]*vp->res[2], "volume light cache data red channel"); + vp->data_g = MEM_callocN(sizeof(float)*vp->res[0]*vp->res[1]*vp->res[2], "volume light cache data green channel"); + vp->data_b = MEM_callocN(sizeof(float)*vp->res[0]*vp->res[1]*vp->res[2], "volume light cache data blue channel"); + if (vp->data_r==NULL || vp->data_g==NULL || vp->data_b==NULL) { + MEM_freeN(vp); + return; + } + + /* Need a shadeinput to calculate scattering */ + precache_setup_shadeinput(re, obi, ma, &shi); + + precache_launch_parts(re, tree, &shi, obi); + + if (tree) { + /* TODO: makeraytree_object creates a tree and saves it on OBI, + * if we free this tree we should also clear other pointers to it */ + //RE_rayobject_free(tree); + //tree= NULL; + } + + if (ELEM(ma->vol.shade_type, MA_VOL_SHADE_MULTIPLE, MA_VOL_SHADE_SHADEDPLUSMULTIPLE)) { + /* this should be before the filtering */ + multiple_scattering_diffusion(re, obi->volume_precache, ma); + } + + lightcache_filter(obi->volume_precache); +} + +static int using_lightcache(Material *ma) +{ + return (((ma->vol.shadeflag & MA_VOL_PRECACHESHADING) && (ma->vol.shade_type == MA_VOL_SHADE_SHADED)) || + (ELEM(ma->vol.shade_type, MA_VOL_SHADE_MULTIPLE, MA_VOL_SHADE_SHADEDPLUSMULTIPLE))); +} + +/* loop through all objects (and their associated materials) + * marked for pre-caching in convertblender.c, and pre-cache them */ +void volume_precache(Render *re) +{ + ObjectInstanceRen *obi; + VolumeOb *vo; + + re->i.infostr = IFACE_("Volume preprocessing"); + re->stats_draw(re->sdh, &re->i); + + for (vo= re->volumes.first; vo; vo= vo->next) { + if (using_lightcache(vo->ma)) { + for (obi= re->instancetable.first; obi; obi= obi->next) { + if (obi->obr == vo->obr) { + vol_precache_objectinstance_threads(re, obi, vo->ma); + + if (re->test_break && re->test_break(re->tbh)) + break; + } + } + + if (re->test_break && re->test_break(re->tbh)) + break; + } + } + + re->i.infostr = NULL; + re->stats_draw(re->sdh, &re->i); +} + +void free_volume_precache(Render *re) +{ + ObjectInstanceRen *obi; + + for (obi= re->instancetable.first; obi; obi= obi->next) { + if (obi->volume_precache != NULL) { + MEM_freeN(obi->volume_precache->data_r); + MEM_freeN(obi->volume_precache->data_g); + MEM_freeN(obi->volume_precache->data_b); + MEM_freeN(obi->volume_precache->bbmin); + MEM_freeN(obi->volume_precache->bbmax); + MEM_freeN(obi->volume_precache); + obi->volume_precache = NULL; + } + } + + BLI_freelistN(&re->volumes); +} + +int point_inside_volume_objectinstance(Render *re, ObjectInstanceRen *obi, const float co[3]) +{ + RayObject *tree; + int inside=0; + + tree = makeraytree_object(re, obi); + if (!tree) return 0; + + inside = point_inside_obi(tree, obi, co); + + //TODO: makeraytree_object creates a tree and saves it on OBI, if we free this tree we should also clear other pointers to it + //RE_rayobject_free(tree); + //tree= NULL; + + return inside; +} + diff --git a/source/blender/render/intern/source/volumetric.c b/source/blender/render/intern/source/volumetric.c new file mode 100644 index 00000000000..583353ed8cf --- /dev/null +++ b/source/blender/render/intern/source/volumetric.c @@ -0,0 +1,836 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. + * All rights reserved. + * + * The Original Code is: all of this file. + * + * Contributor(s): Matt Ebb, Raul Fernandez Hernandez (Farsthary) + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/render/intern/source/volumetric.c + * \ingroup render + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include <float.h> + +#include "BLI_math.h" +#include "BLI_rand.h" +#include "BLI_voxel.h" +#include "BLI_utildefines.h" + +#include "RE_shader_ext.h" + +#include "IMB_colormanagement.h" + +#include "DNA_material_types.h" +#include "DNA_group_types.h" +#include "DNA_lamp_types.h" +#include "DNA_meta_types.h" + + +#include "render_types.h" +#include "pixelshading.h" +#include "rayintersection.h" +#include "rayobject.h" +#include "renderdatabase.h" +#include "shading.h" +#include "shadbuf.h" +#include "texture.h" +#include "volumetric.h" +#include "volume_precache.h" + +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ +/* only to be used here in this file, it's for speed */ +extern struct Render R; +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +/* tracing */ +static float vol_get_shadow(ShadeInput *shi, LampRen *lar, const float co[3]) +{ + float visibility = 1.f; + + if (lar->shb) { + float dxco[3] = {0.f, 0.f, 0.f}, dyco[3] = {0.f, 0.f, 0.f}; + + visibility = testshadowbuf(&R, lar->shb, co, dxco, dyco, 1.0, 0.0); + } + else if (lar->mode & LA_SHAD_RAY) { + /* trace shadow manually, no good lamp api atm */ + Isect is; + + copy_v3_v3(is.start, co); + if (lar->type == LA_SUN || lar->type == LA_HEMI) { + is.dir[0] = -lar->vec[0]; + is.dir[1] = -lar->vec[1]; + is.dir[2] = -lar->vec[2]; + is.dist = R.maxdist; + } + else { + sub_v3_v3v3(is.dir, lar->co, is.start); + is.dist = normalize_v3(is.dir); + } + + is.mode = RE_RAY_MIRROR; + is.check = RE_CHECK_VLR_NON_SOLID_MATERIAL; + is.skip = 0; + + if (lar->mode & (LA_LAYER | LA_LAYER_SHADOW)) + is.lay = lar->lay; + else + is.lay = -1; + + is.orig.ob = NULL; + is.orig.face = NULL; + is.last_hit = lar->last_hit[shi->thread]; + + RE_instance_rotate_ray(shi->obi, &is); + + if (RE_rayobject_raycast(R.raytree, &is)) { + RE_instance_rotate_ray_restore(shi->obi, &is); + + visibility = 0.f; + } + + lar->last_hit[shi->thread] = is.last_hit; + } + return visibility; +} + +static int vol_get_bounds(ShadeInput *shi, const float co[3], const float vec[3], float hitco[3], Isect *isect, int intersect_type) +{ + + copy_v3_v3(isect->start, co); + copy_v3_v3(isect->dir, vec); + isect->dist = FLT_MAX; + isect->mode = RE_RAY_MIRROR; + isect->last_hit = NULL; + isect->lay = -1; + isect->check = RE_CHECK_VLR_NONE; + + if (intersect_type == VOL_BOUNDS_DEPTH) { + isect->skip = RE_SKIP_VLR_NEIGHBOUR; + isect->orig.face = (void *)shi->vlr; + isect->orig.ob = (void *)shi->obi; + } + else { // if (intersect_type == VOL_BOUNDS_SS) { + isect->skip = 0; + isect->orig.face = NULL; + isect->orig.ob = NULL; + } + + RE_instance_rotate_ray(shi->obi, isect); + + if (RE_rayobject_raycast(R.raytree, isect)) { + RE_instance_rotate_ray_restore(shi->obi, isect); + + hitco[0] = isect->start[0] + isect->dist * isect->dir[0]; + hitco[1] = isect->start[1] + isect->dist * isect->dir[1]; + hitco[2] = isect->start[2] + isect->dist * isect->dir[2]; + return 1; + } + else { + return 0; + } +} + +static void shade_intersection(ShadeInput *shi, float col_r[4], Isect *is) +{ + ShadeInput shi_new; + ShadeResult shr_new; + + memset(&shi_new, 0, sizeof(ShadeInput)); + + shi_new.mask = shi->mask; + shi_new.osatex = shi->osatex; + shi_new.thread = shi->thread; + shi_new.depth = shi->depth + 1; + shi_new.volume_depth = shi->volume_depth + 1; + shi_new.xs = shi->xs; + shi_new.ys = shi->ys; + shi_new.lay = shi->lay; + shi_new.passflag = SCE_PASS_COMBINED; /* result of tracing needs no pass info */ + shi_new.combinedflag = 0xFFFFFF; /* ray trace does all options */ + shi_new.light_override = shi->light_override; + shi_new.mat_override = shi->mat_override; + + copy_v3_v3(shi_new.camera_co, is->start); + + memset(&shr_new, 0, sizeof(ShadeResult)); + + /* hardcoded limit of 100 for now - prevents problems in weird geometry */ + if (shi->volume_depth < 100) { + shade_ray(is, &shi_new, &shr_new); + } + + copy_v3_v3(col_r, shr_new.combined); + col_r[3] = shr_new.alpha; +} + +static void vol_trace_behind(ShadeInput *shi, VlakRen *vlr, const float co[3], float col_r[4]) +{ + Isect isect; + + copy_v3_v3(isect.start, co); + copy_v3_v3(isect.dir, shi->view); + isect.dist = FLT_MAX; + + isect.mode = RE_RAY_MIRROR; + isect.check = RE_CHECK_VLR_NONE; + isect.skip = RE_SKIP_VLR_NEIGHBOUR; + isect.orig.ob = (void *) shi->obi; + isect.orig.face = (void *)vlr; + isect.last_hit = NULL; + isect.lay = -1; + + /* check to see if there's anything behind the volume, otherwise shade the sky */ + RE_instance_rotate_ray(shi->obi, &isect); + + if (RE_rayobject_raycast(R.raytree, &isect)) { + RE_instance_rotate_ray_restore(shi->obi, &isect); + + shade_intersection(shi, col_r, &isect); + } + else { + shadeSkyView(col_r, co, shi->view, NULL, shi->thread); + shadeSunView(col_r, shi->view); + } +} + + +/* trilinear interpolation */ +static void vol_get_precached_scattering(Render *re, ShadeInput *shi, float scatter_col[3], const float co[3]) +{ + VolumePrecache *vp = shi->obi->volume_precache; + float bbmin[3], bbmax[3], dim[3]; + float world_co[3], sample_co[3]; + + if (!vp) return; + + /* find sample point in global space bounding box 0.0-1.0 */ + global_bounds_obi(re, shi->obi, bbmin, bbmax); + sub_v3_v3v3(dim, bbmax, bbmin); + mul_v3_m4v3(world_co, re->viewinv, co); + + /* sample_co in 0.0-1.0 */ + sample_co[0] = (world_co[0] - bbmin[0]) / dim[0]; + sample_co[1] = (world_co[1] - bbmin[1]) / dim[1]; + sample_co[2] = (world_co[2] - bbmin[2]) / dim[2]; + + scatter_col[0] = BLI_voxel_sample_triquadratic(vp->data_r, vp->res, sample_co); + scatter_col[1] = BLI_voxel_sample_triquadratic(vp->data_g, vp->res, sample_co); + scatter_col[2] = BLI_voxel_sample_triquadratic(vp->data_b, vp->res, sample_co); +} + +/* Meta object density, brute force for now + * (might be good enough anyway, don't need huge number of metaobs to model volumetric objects */ +static float metadensity(Object *ob, const float co[3]) +{ + float mat[4][4], imat[4][4], dens = 0.f; + MetaBall *mb = (MetaBall *)ob->data; + MetaElem *ml; + + /* transform co to meta-element */ + float tco[3] = {co[0], co[1], co[2]}; + mul_m4_m4m4(mat, R.viewmat, ob->obmat); + invert_m4_m4(imat, mat); + mul_m4_v3(imat, tco); + + for (ml = mb->elems.first; ml; ml = ml->next) { + float bmat[3][3], dist2; + + /* element rotation transform */ + float tp[3] = {ml->x - tco[0], ml->y - tco[1], ml->z - tco[2]}; + quat_to_mat3(bmat, ml->quat); + transpose_m3(bmat); /* rot.only, so inverse == transpose */ + mul_m3_v3(bmat, tp); + + /* MB_BALL default */ + switch (ml->type) { + case MB_ELIPSOID: + tp[0] /= ml->expx; + tp[1] /= ml->expy; + tp[2] /= ml->expz; + break; + case MB_CUBE: + tp[2] = (tp[2] > ml->expz) ? (tp[2] - ml->expz) : ((tp[2] < -ml->expz) ? (tp[2] + ml->expz) : 0.f); + /* no break, xy as plane */ + ATTR_FALLTHROUGH; + case MB_PLANE: + tp[1] = (tp[1] > ml->expy) ? (tp[1] - ml->expy) : ((tp[1] < -ml->expy) ? (tp[1] + ml->expy) : 0.f); + /* no break, x as tube */ + ATTR_FALLTHROUGH; + case MB_TUBE: + tp[0] = (tp[0] > ml->expx) ? (tp[0] - ml->expx) : ((tp[0] < -ml->expx) ? (tp[0] + ml->expx) : 0.f); + } + + /* ml->rad2 is not set */ + dist2 = 1.0f - (dot_v3v3(tp, tp) / (ml->rad * ml->rad)); + if (dist2 > 0.f) + dens += (ml->flag & MB_NEGATIVE) ? -ml->s * dist2 * dist2 * dist2 : ml->s * dist2 * dist2 * dist2; + } + + dens -= mb->thresh; + return (dens < 0.f) ? 0.f : dens; +} + +float vol_get_density(struct ShadeInput *shi, const float co[3]) +{ + float density = shi->mat->vol.density; + float density_scale = shi->mat->vol.density_scale; + + if (shi->mat->mapto_textured & MAP_DENSITY) + do_volume_tex(shi, co, MAP_DENSITY, NULL, &density, &R); + + /* if meta-object, modulate by metadensity without increasing it */ + if (shi->obi->obr->ob->type == OB_MBALL) { + const float md = metadensity(shi->obi->obr->ob, co); + if (md < 1.f) density *= md; + } + + return density * density_scale; +} + + +/* Color of light that gets scattered out by the volume */ +/* Uses same physically based scattering parameter as in transmission calculations, + * along with artificial reflection scale/reflection color tint */ +static void vol_get_reflection_color(ShadeInput *shi, float ref_col[3], const float co[3]) +{ + float scatter = shi->mat->vol.scattering; + float reflection = shi->mat->vol.reflection; + copy_v3_v3(ref_col, shi->mat->vol.reflection_col); + + if (shi->mat->mapto_textured & (MAP_SCATTERING + MAP_REFLECTION_COL)) + do_volume_tex(shi, co, MAP_SCATTERING + MAP_REFLECTION_COL, ref_col, &scatter, &R); + + /* only one single float parameter at a time... :s */ + if (shi->mat->mapto_textured & (MAP_REFLECTION)) + do_volume_tex(shi, co, MAP_REFLECTION, NULL, &reflection, &R); + + ref_col[0] = reflection * ref_col[0] * scatter; + ref_col[1] = reflection * ref_col[1] * scatter; + ref_col[2] = reflection * ref_col[2] * scatter; +} + +/* compute emission component, amount of radiance to add per segment + * can be textured with 'emit' */ +static void vol_get_emission(ShadeInput *shi, float emission_col[3], const float co[3]) +{ + float emission = shi->mat->vol.emission; + copy_v3_v3(emission_col, shi->mat->vol.emission_col); + + if (shi->mat->mapto_textured & (MAP_EMISSION + MAP_EMISSION_COL)) + do_volume_tex(shi, co, MAP_EMISSION + MAP_EMISSION_COL, emission_col, &emission, &R); + + emission_col[0] = emission_col[0] * emission; + emission_col[1] = emission_col[1] * emission; + emission_col[2] = emission_col[2] * emission; +} + + +/* A combination of scattering and absorption -> known as sigma T. + * This can possibly use a specific scattering color, + * and absorption multiplier factor too, but these parameters are left out for simplicity. + * It's easy enough to get a good wide range of results with just these two parameters. */ +static void vol_get_sigma_t(ShadeInput *shi, float sigma_t[3], const float co[3]) +{ + /* technically absorption, but named transmission color + * since it describes the effect of the coloring *after* absorption */ + float transmission_col[3] = {shi->mat->vol.transmission_col[0], shi->mat->vol.transmission_col[1], shi->mat->vol.transmission_col[2]}; + float scattering = shi->mat->vol.scattering; + + if (shi->mat->mapto_textured & (MAP_SCATTERING + MAP_TRANSMISSION_COL)) + do_volume_tex(shi, co, MAP_SCATTERING + MAP_TRANSMISSION_COL, transmission_col, &scattering, &R); + + sigma_t[0] = (1.0f - transmission_col[0]) + scattering; + sigma_t[1] = (1.0f - transmission_col[1]) + scattering; + sigma_t[2] = (1.0f - transmission_col[2]) + scattering; +} + +/* phase function - determines in which directions the light + * is scattered in the volume relative to incoming direction + * and view direction */ +static float vol_get_phasefunc(ShadeInput *UNUSED(shi), float g, const float w[3], const float wp[3]) +{ + const float normalize = 0.25f; // = 1.f/4.f = M_PI/(4.f*M_PI) + + /* normalization constant is 1/4 rather than 1/4pi, since + * Blender's shading system doesn't normalize for + * energy conservation - eg. multiplying by pdf ( 1/pi for a lambert brdf ). + * This means that lambert surfaces in Blender are pi times brighter than they 'should be' + * and therefore, with correct energy conservation, volumes will darker than other solid objects, + * for the same lighting intensity. + * To correct this, scale up the phase function values by pi + * until Blender's shading system supports this better. --matt + */ + + if (g == 0.f) { /* isotropic */ + return normalize * 1.f; + } + else { /* schlick */ + const float k = 1.55f * g - 0.55f * g * g * g; + const float kcostheta = k * dot_v3v3(w, wp); + return normalize * (1.f - k * k) / ((1.f - kcostheta) * (1.f - kcostheta)); + } + + /* not used, but here for reference: */ +#if 0 + switch (phasefunc_type) { + case MA_VOL_PH_MIEHAZY: + return normalize * (0.5f + 4.5f * powf(0.5 * (1.f + costheta), 8.f)); + case MA_VOL_PH_MIEMURKY: + return normalize * (0.5f + 16.5f * powf(0.5 * (1.f + costheta), 32.f)); + case MA_VOL_PH_RAYLEIGH: + return normalize * 3.f / 4.f * (1 + costheta * costheta); + case MA_VOL_PH_HG: + return normalize * (1.f - g * g) / powf(1.f + g * g - 2.f * g * costheta, 1.5f); + case MA_VOL_PH_SCHLICK: + { + const float k = 1.55f * g - 0.55f * g * g * g; + const float kcostheta = k * costheta; + return normalize * (1.f - k * k) / ((1.f - kcostheta) * (1.f - kcostheta)); + } + case MA_VOL_PH_ISOTROPIC: + default: + return normalize * 1.f; + } +#endif +} + +/* Compute transmittance = e^(-attenuation) */ +static void vol_get_transmittance_seg(ShadeInput *shi, float tr[3], float stepsize, const float co[3], float density) +{ + /* input density = density at co */ + float tau[3] = {0.f, 0.f, 0.f}; + const float stepd = density * stepsize; + float sigma_t[3]; + + vol_get_sigma_t(shi, sigma_t, co); + + /* homogeneous volume within the sampled distance */ + tau[0] += stepd * sigma_t[0]; + tau[1] += stepd * sigma_t[1]; + tau[2] += stepd * sigma_t[2]; + + tr[0] *= expf(-tau[0]); + tr[1] *= expf(-tau[1]); + tr[2] *= expf(-tau[2]); +} + +/* Compute transmittance = e^(-attenuation) */ +static void vol_get_transmittance(ShadeInput *shi, float tr[3], const float co[3], const float endco[3]) +{ + float p[3] = {co[0], co[1], co[2]}; + float step_vec[3] = {endco[0] - co[0], endco[1] - co[1], endco[2] - co[2]}; + float tau[3] = {0.f, 0.f, 0.f}; + + float t0 = 0.f; + float t1 = normalize_v3(step_vec); + float pt0 = t0; + + t0 += shi->mat->vol.stepsize * ((shi->mat->vol.stepsize_type == MA_VOL_STEP_CONSTANT) ? 0.5f : BLI_thread_frand(shi->thread)); + p[0] += t0 * step_vec[0]; + p[1] += t0 * step_vec[1]; + p[2] += t0 * step_vec[2]; + mul_v3_fl(step_vec, shi->mat->vol.stepsize); + + for (; t0 < t1; pt0 = t0, t0 += shi->mat->vol.stepsize) { + const float d = vol_get_density(shi, p); + const float stepd = (t0 - pt0) * d; + float sigma_t[3]; + + vol_get_sigma_t(shi, sigma_t, p); + + tau[0] += stepd * sigma_t[0]; + tau[1] += stepd * sigma_t[1]; + tau[2] += stepd * sigma_t[2]; + + add_v3_v3(p, step_vec); + } + + /* return transmittance */ + tr[0] = expf(-tau[0]); + tr[1] = expf(-tau[1]); + tr[2] = expf(-tau[2]); +} + +static void vol_shade_one_lamp(struct ShadeInput *shi, const float co[3], const float view[3], LampRen *lar, float lacol[3]) +{ + float visifac, lv[3], lampdist; + float tr[3] = {1.0, 1.0, 1.0}; + float hitco[3], *atten_co; + float p, ref_col[3]; + + if (lar->mode & LA_LAYER) if ((lar->lay & shi->obi->lay) == 0) return; + if ((lar->lay & shi->lay) == 0) return; + if (lar->energy == 0.0f) return; + + if ((visifac = lamp_get_visibility(lar, co, lv, &lampdist)) == 0.f) return; + + copy_v3_v3(lacol, &lar->r); + + if (lar->mode & LA_TEXTURE) { + shi->osatex = 0; + do_lamp_tex(lar, lv, shi, lacol, LA_TEXTURE); + } + + mul_v3_fl(lacol, visifac); + + if (ELEM(lar->type, LA_SUN, LA_HEMI)) + copy_v3_v3(lv, lar->vec); + negate_v3(lv); + + if (shi->mat->vol.shade_type == MA_VOL_SHADE_SHADOWED) { + mul_v3_fl(lacol, vol_get_shadow(shi, lar, co)); + } + else if (ELEM(shi->mat->vol.shade_type, MA_VOL_SHADE_SHADED, MA_VOL_SHADE_MULTIPLE, MA_VOL_SHADE_SHADEDPLUSMULTIPLE)) { + Isect is; + + if (shi->mat->vol.shadeflag & MA_VOL_RECV_EXT_SHADOW) { + mul_v3_fl(lacol, vol_get_shadow(shi, lar, co)); + if (IMB_colormanagement_get_luminance(lacol) < 0.001f) return; + } + + /* find minimum of volume bounds, or lamp coord */ + if (vol_get_bounds(shi, co, lv, hitco, &is, VOL_BOUNDS_SS)) { + float dist = len_v3v3(co, hitco); + VlakRen *vlr = (VlakRen *)is.hit.face; + + /* simple internal shadowing */ + if (vlr->mat->material_type == MA_TYPE_SURFACE) { + lacol[0] = lacol[1] = lacol[2] = 0.0f; + return; + } + + if (ELEM(lar->type, LA_SUN, LA_HEMI)) + /* infinite lights, can never be inside volume */ + atten_co = hitco; + else if (lampdist < dist) { + atten_co = lar->co; + } + else + atten_co = hitco; + + vol_get_transmittance(shi, tr, co, atten_co); + + mul_v3_v3v3(lacol, lacol, tr); + } + else { + /* Point is on the outside edge of the volume, + * therefore no attenuation, full transmission. + * Radiance from lamp remains unchanged */ + } + } + + if (IMB_colormanagement_get_luminance(lacol) < 0.001f) return; + + normalize_v3(lv); + p = vol_get_phasefunc(shi, shi->mat->vol.asymmetry, view, lv); + + /* physically based scattering with non-physically based RGB gain */ + vol_get_reflection_color(shi, ref_col, co); + + lacol[0] *= p * ref_col[0]; + lacol[1] *= p * ref_col[1]; + lacol[2] *= p * ref_col[2]; +} + +/* single scattering only for now */ +void vol_get_scattering(ShadeInput *shi, float scatter_col[3], const float co[3], const float view[3]) +{ + ListBase *lights; + GroupObject *go; + LampRen *lar; + + zero_v3(scatter_col); + + lights = get_lights(shi); + for (go = lights->first; go; go = go->next) { + float lacol[3] = {0.f, 0.f, 0.f}; + lar = go->lampren; + + if (lar) { + vol_shade_one_lamp(shi, co, view, lar, lacol); + add_v3_v3(scatter_col, lacol); + } + } +} + + +/* + * The main volumetric integrator, using an emission/absorption/scattering model. + * + * Incoming radiance = + * + * outgoing radiance from behind surface * beam transmittance/attenuation + * + added radiance from all points along the ray due to participating media + * --> radiance for each segment = + * (radiance added by scattering + radiance added by emission) * beam transmittance/attenuation + */ + +/* For ease of use, I've also introduced a 'reflection' and 'reflection color' parameter, which isn't + * physically correct. This works as an RGB tint/gain on out-scattered light, but doesn't affect the light + * that is transmitted through the volume. While having wavelength dependent absorption/scattering is more correct, + * it also makes it harder to control the overall look of the volume since coloring the outscattered light results + * in the inverse color being transmitted through the rest of the volume. + */ +static void volumeintegrate(struct ShadeInput *shi, float col[4], const float co[3], const float endco[3]) +{ + float radiance[3] = {0.f, 0.f, 0.f}; + float tr[3] = {1.f, 1.f, 1.f}; + float p[3] = {co[0], co[1], co[2]}; + float step_vec[3] = {endco[0] - co[0], endco[1] - co[1], endco[2] - co[2]}; + const float stepsize = shi->mat->vol.stepsize; + + float t0 = 0.f; + float pt0 = t0; + float t1 = normalize_v3(step_vec); /* returns vector length */ + + t0 += stepsize * ((shi->mat->vol.stepsize_type == MA_VOL_STEP_CONSTANT) ? 0.5f : BLI_thread_frand(shi->thread)); + p[0] += t0 * step_vec[0]; + p[1] += t0 * step_vec[1]; + p[2] += t0 * step_vec[2]; + mul_v3_fl(step_vec, stepsize); + + for (; t0 < t1; pt0 = t0, t0 += stepsize) { + const float density = vol_get_density(shi, p); + + if (density > 0.00001f) { + float scatter_col[3] = {0.f, 0.f, 0.f}, emit_col[3]; + const float stepd = (t0 - pt0) * density; + + /* transmittance component (alpha) */ + vol_get_transmittance_seg(shi, tr, stepsize, co, density); + + if (t0 > t1 * 0.25f) { + /* only use depth cutoff after we've traced a little way into the volume */ + if (IMB_colormanagement_get_luminance(tr) < shi->mat->vol.depth_cutoff) break; + } + + vol_get_emission(shi, emit_col, p); + + if (shi->obi->volume_precache) { + float p2[3]; + + p2[0] = p[0] + (step_vec[0] * 0.5f); + p2[1] = p[1] + (step_vec[1] * 0.5f); + p2[2] = p[2] + (step_vec[2] * 0.5f); + + vol_get_precached_scattering(&R, shi, scatter_col, p2); + } + else + vol_get_scattering(shi, scatter_col, p, shi->view); + + radiance[0] += stepd * tr[0] * (emit_col[0] + scatter_col[0]); + radiance[1] += stepd * tr[1] * (emit_col[1] + scatter_col[1]); + radiance[2] += stepd * tr[2] * (emit_col[2] + scatter_col[2]); + } + add_v3_v3(p, step_vec); + } + + /* multiply original color (from behind volume) with transmittance over entire distance */ + mul_v3_v3v3(col, tr, col); + add_v3_v3(col, radiance); + + /* alpha <-- transmission luminance */ + col[3] = 1.0f - IMB_colormanagement_get_luminance(tr); +} + +/* the main entry point for volume shading */ +static void volume_trace(struct ShadeInput *shi, struct ShadeResult *shr, int inside_volume) +{ + float hitco[3], col[4] = {0.f, 0.f, 0.f, 0.f}; + const float *startco, *endco; + int trace_behind = 1; + const int ztransp = ((shi->depth == 0) && (shi->mat->mode & MA_TRANSP) && (shi->mat->mode & MA_ZTRANSP)); + Isect is; + + /* check for shading an internal face a volume object directly */ + if (inside_volume == VOL_SHADE_INSIDE) + trace_behind = 0; + else if (inside_volume == VOL_SHADE_OUTSIDE) { + if (shi->flippednor) + inside_volume = VOL_SHADE_INSIDE; + } + + if (ztransp && inside_volume == VOL_SHADE_INSIDE) { + MatInside *mi; + int render_this = 0; + + /* don't render the backfaces of ztransp volume materials. + * + * volume shading renders the internal volume from between the + * ' view intersection of the solid volume to the + * intersection on the other side, as part of the shading of + * the front face. + * + * Because ztransp renders both front and back faces independently + * this will double up, so here we prevent rendering the backface as well, + * which would otherwise render the volume in between the camera and the backface + * --matt */ + + for (mi = R.render_volumes_inside.first; mi; mi = mi->next) { + /* weak... */ + if (mi->ma == shi->mat) render_this = 1; + } + if (!render_this) return; + } + + + if (inside_volume == VOL_SHADE_INSIDE) { + startco = shi->camera_co; + endco = shi->co; + + if (trace_behind) { + if (!ztransp) + /* trace behind the volume object */ + vol_trace_behind(shi, shi->vlr, endco, col); + } + else { + /* we're tracing through the volume between the camera + * and a solid surface, so use that pre-shaded radiance */ + copy_v4_v4(col, shr->combined); + } + + /* shade volume from 'camera' to 1st hit point */ + volumeintegrate(shi, col, startco, endco); + } + /* trace to find a backface, the other side bounds of the volume */ + /* (ray intersect ignores front faces here) */ + else if (vol_get_bounds(shi, shi->co, shi->view, hitco, &is, VOL_BOUNDS_DEPTH)) { + VlakRen *vlr = (VlakRen *)is.hit.face; + + startco = shi->co; + endco = hitco; + + if (!ztransp) { + /* if it's another face in the same material */ + if (vlr->mat == shi->mat) { + /* trace behind the 2nd (raytrace) hit point */ + vol_trace_behind(shi, (VlakRen *)is.hit.face, endco, col); + } + else { + shade_intersection(shi, col, &is); + } + } + + /* shade volume from 1st hit point to 2nd hit point */ + volumeintegrate(shi, col, startco, endco); + } + + if (ztransp) + col[3] = col[3] > 1.f ? 1.f : col[3]; + else + col[3] = 1.f; + + copy_v3_v3(shr->combined, col); + shr->alpha = col[3]; + + copy_v3_v3(shr->diff, shr->combined); + copy_v3_v3(shr->diffshad, shr->diff); +} + +/* Traces a shadow through the object, + * pretty much gets the transmission over a ray path */ +void shade_volume_shadow(struct ShadeInput *shi, struct ShadeResult *shr, struct Isect *last_is) +{ + float hitco[3]; + float tr[3] = {1.0, 1.0, 1.0}; + Isect is = {{0}}; + const float *startco, *endco; + + memset(shr, 0, sizeof(ShadeResult)); + + /* if 1st hit normal is facing away from the camera, + * then we're inside the volume already. */ + if (shi->flippednor) { + startco = last_is->start; + endco = shi->co; + } + + /* trace to find a backface, the other side bounds of the volume */ + /* (ray intersect ignores front faces here) */ + else if (vol_get_bounds(shi, shi->co, shi->view, hitco, &is, VOL_BOUNDS_DEPTH)) { + startco = shi->co; + endco = hitco; + } + else { + shr->combined[0] = shr->combined[1] = shr->combined[2] = 0.f; + shr->alpha = shr->combined[3] = 1.f; + return; + } + + vol_get_transmittance(shi, tr, startco, endco); + + + /* if we hit another face in the same volume bounds */ + /* shift raytrace coordinates to the hit point, to avoid shading volume twice */ + /* due to idiosyncracy in ray_trace_shadow_tra() */ + if (is.hit.ob == shi->obi) { + copy_v3_v3(shi->co, hitco); + last_is->dist += is.dist; + shi->vlr = (VlakRen *)is.hit.face; + } + + + copy_v3_v3(shr->combined, tr); + shr->combined[3] = 1.0f - IMB_colormanagement_get_luminance(tr); + shr->alpha = shr->combined[3]; +} + + +/* delivers a fully filled in ShadeResult, for all passes */ +void shade_volume_outside(ShadeInput *shi, ShadeResult *shr) +{ + memset(shr, 0, sizeof(ShadeResult)); + volume_trace(shi, shr, VOL_SHADE_OUTSIDE); +} + + +void shade_volume_inside(ShadeInput *shi, ShadeResult *shr) +{ + MatInside *m; + Material *mat_backup; + ObjectInstanceRen *obi_backup; + float prev_alpha = shr->alpha; + + /* XXX: extend to multiple volumes perhaps later */ + mat_backup = shi->mat; + obi_backup = shi->obi; + + m = R.render_volumes_inside.first; + shi->mat = m->ma; + shi->obi = m->obi; + shi->obr = m->obi->obr; + + volume_trace(shi, shr, VOL_SHADE_INSIDE); + + shr->alpha = shr->alpha + prev_alpha; + CLAMP(shr->alpha, 0.0f, 1.0f); + + shi->mat = mat_backup; + shi->obi = obi_backup; + shi->obr = obi_backup->obr; +} diff --git a/source/blender/render/intern/source/voxeldata.c b/source/blender/render/intern/source/voxeldata.c index 0d9f7b197e1..2daa4123536 100644 --- a/source/blender/render/intern/source/voxeldata.c +++ b/source/blender/render/intern/source/voxeldata.c @@ -88,10 +88,10 @@ static size_t vd_resol_size(VoxelData *vd) } static int load_frame_blendervoxel(VoxelData *vd, FILE *fp, int frame) -{ +{ const size_t size = vd_resol_size(vd); size_t offset = sizeof(VoxelDataHeader); - + if (is_vd_res_ok(vd) == false) return 0; @@ -102,7 +102,7 @@ static int load_frame_blendervoxel(VoxelData *vd, FILE *fp, int frame) return 0; if (fread(vd->dataset, sizeof(float), size, fp) != size) return 0; - + vd->cachedframe = frame; vd->ok = 1; return 1; @@ -138,12 +138,12 @@ static int load_frame_raw8(VoxelData *vd, FILE *fp, int frame) vd->dataset = NULL; return 0; } - + for (i = 0; i < size; i++) { vd->dataset[i] = (float)data_c[i] / 255.f; } MEM_freeN(data_c); - + vd->cachedframe = frame; vd->ok = 1; return 1; @@ -160,7 +160,7 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex) if (!ima) return; if (iuser.frames == 0) return; - + ima->source = IMA_SRC_SEQUENCE; iuser.framenr = 1 + iuser.offset; @@ -173,13 +173,13 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex) } if (!ibuf) return; if (!ibuf->rect_float) IMB_float_from_rect(ibuf); - + vd->flag |= TEX_VD_STILL; vd->resol[0] = ibuf->x; vd->resol[1] = ibuf->y; vd->resol[2] = iuser.frames; vd->dataset = MEM_mapallocN(sizeof(float) * vd_resol_size(vd), "voxel dataset"); - + for (z = 0; z < iuser.frames; z++) { /* get a new ibuf for each frame */ if (z > 0) { @@ -190,7 +190,7 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex) if (!ibuf->rect_float) IMB_float_from_rect(ibuf); } rf = ibuf->rect_float; - + for (y = 0; y < ibuf->y; y++) { for (x = 0; x < ibuf->x; x++) { /* currently averaged to monchrome */ @@ -198,7 +198,7 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex) rf += 4; } } - + BKE_image_free_anim_ibufs(ima, iuser.framenr); } @@ -211,13 +211,13 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex) static int read_voxeldata_header(FILE *fp, struct VoxelData *vd) { VoxelDataHeader *h = (VoxelDataHeader *)MEM_mallocN(sizeof(VoxelDataHeader), "voxel data header"); - + rewind(fp); if (fread(h, sizeof(VoxelDataHeader), 1, fp) != 1) { MEM_freeN(h); return 0; } - + vd->resol[0] = h->resolX; vd->resol[1] = h->resolY; vd->resol[2] = h->resolZ; @@ -231,16 +231,16 @@ static void init_frame_smoke(VoxelData *vd, int cfra) #ifdef WITH_SMOKE Object *ob; ModifierData *md; - + vd->dataset = NULL; if (vd->object == NULL) return; ob = vd->object; - + /* draw code for smoke */ if ((md = (ModifierData *)modifiers_findByType(ob, eModifierType_Smoke))) { SmokeModifierData *smd = (SmokeModifierData *)md; SmokeDomainSettings *sds = smd->domain; - + if (sds && sds->fluid) { BLI_rw_mutex_lock(sds->fluid_mutex, THREAD_LOCK_READ); @@ -356,7 +356,7 @@ static void init_frame_smoke(VoxelData *vd, int cfra) BLI_rw_mutex_unlock(sds->fluid_mutex); } } - + vd->ok = 1; #else // WITH_SMOKE @@ -371,14 +371,14 @@ static void init_frame_hair(VoxelData *vd, int UNUSED(cfra)) { Object *ob; ModifierData *md; - + vd->dataset = NULL; if (vd->object == NULL) return; ob = vd->object; - + if ((md = (ModifierData *)modifiers_findByType(ob, eModifierType_ParticleSystem))) { ParticleSystemModifierData *pmd = (ParticleSystemModifierData *)md; - + if (pmd->psys && pmd->psys->clmd) { vd->ok |= BPH_cloth_solver_get_texture_data(ob, pmd->psys->clmd, vd); } @@ -386,16 +386,16 @@ static void init_frame_hair(VoxelData *vd, int UNUSED(cfra)) } void cache_voxeldata(Tex *tex, int scene_frame) -{ +{ VoxelData *vd = tex->vd; FILE *fp; int curframe; char path[sizeof(vd->source_path)]; - + /* only re-cache if dataset needs updating */ if ((vd->flag & TEX_VD_STILL) || (vd->cachedframe == scene_frame)) if (vd->ok) return; - + /* clear out old cache, ready for new */ if (vd->dataset) { MEM_freeN(vd->dataset); @@ -408,9 +408,9 @@ void cache_voxeldata(Tex *tex, int scene_frame) curframe = vd->still_frame; else curframe = scene_frame; - + BLI_strncpy(path, vd->source_path, sizeof(path)); - + /* each type is responsible for setting to true */ vd->ok = false; @@ -428,7 +428,7 @@ void cache_voxeldata(Tex *tex, int scene_frame) BLI_path_abs(path, BKE_main_blendfile_path_from_global()); fp = BLI_fopen(path, "rb"); if (!fp) return; - + if (read_voxeldata_header(fp, vd)) load_frame_blendervoxel(vd, fp, curframe - 1); @@ -438,7 +438,7 @@ void cache_voxeldata(Tex *tex, int scene_frame) BLI_path_abs(path, BKE_main_blendfile_path_from_global()); fp = BLI_fopen(path, "rb"); if (!fp) return; - + load_frame_raw8(vd, fp, curframe); fclose(fp); return; @@ -448,24 +448,24 @@ void cache_voxeldata(Tex *tex, int scene_frame) void make_voxeldata(struct Render *re) { Tex *tex; - + re->i.infostr = IFACE_("Loading voxel datasets"); re->stats_draw(re->sdh, &re->i); - + /* XXX: should be doing only textures used in this render */ for (tex = re->main->tex.first; tex; tex = tex->id.next) { if (tex->id.us && tex->type == TEX_VOXELDATA) { cache_voxeldata(tex, re->r.cfra); } } - + re->i.infostr = NULL; re->stats_draw(re->sdh, &re->i); - + } int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texres) -{ +{ VoxelData *vd = tex->vd; float co[3], offset[3] = {0.5, 0.5, 0.5}, a; int retval = (vd->data_type == TEX_VD_RGBA_PREMUL) ? TEX_RGB : TEX_INT; @@ -476,7 +476,7 @@ int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texre texres->tin = 0.0f; return 0; } - + /* scale lookup from 0.0-1.0 (original location) to -1.0, 1.0, consistent with image texture tex coords */ /* in implementation this works backwards, bringing sample locations from -1.0, 1.0 * to the range 0.0, 1.0, before looking up in the voxel structure. */ @@ -531,7 +531,7 @@ int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texre switch (vd->interp_type) { case TEX_VD_NEARESTNEIGHBOR: *result = BLI_voxel_sample_nearest(dataset, vd->resol, co); - break; + break; case TEX_VD_LINEAR: *result = BLI_voxel_sample_trilinear(dataset, vd->resol, co); break; @@ -548,7 +548,7 @@ int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texre a = texres->tin; texres->tin *= vd->int_multiplier; BRICONT; - + if (vd->data_type == TEX_VD_RGBA_PREMUL) { /* unmultiply */ if (a>0.001f) { @@ -566,6 +566,6 @@ int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texre texres->ta = texres->tin; BRICONTRGB; - + return retval; } diff --git a/source/blender/render/intern/source/zbuf.c b/source/blender/render/intern/source/zbuf.c index 3837383c4c7..436ee590f5c 100644 --- a/source/blender/render/intern/source/zbuf.c +++ b/source/blender/render/intern/source/zbuf.c @@ -54,10 +54,10 @@ void zbuf_alloc_span(ZSpan *zspan, int rectx, int recty) { memset(zspan, 0, sizeof(ZSpan)); - + zspan->rectx= rectx; zspan->recty= recty; - + zspan->span1= MEM_mallocN(recty*sizeof(float), "zspan"); zspan->span2= MEM_mallocN(recty*sizeof(float), "zspan"); } @@ -85,27 +85,27 @@ static void zbuf_add_to_span(ZSpan *zspan, const float v1[2], const float v2[2]) float *span; float xx1, dx0, xs0; int y, my0, my2; - + if (v1[1]<v2[1]) { minv= v1; maxv= v2; } else { minv= v2; maxv= v1; } - + my0= ceil(minv[1]); my2= floor(maxv[1]); - + if (my2<0 || my0>= zspan->recty) return; - + /* clip top */ if (my2>=zspan->recty) my2= zspan->recty-1; /* clip bottom */ if (my0<0) my0= 0; - + if (my0>my2) return; /* if (my0>my2) should still fill in, that way we get spans that skip nicely */ - + xx1= maxv[1]-minv[1]; if (xx1>FLT_EPSILON) { dx0= (minv[0]-maxv[0])/xx1; @@ -115,7 +115,7 @@ static void zbuf_add_to_span(ZSpan *zspan, const float v1[2], const float v2[2]) dx0 = 0.0f; xs0 = min_ff(minv[0], maxv[0]); } - + /* empty span */ if (zspan->maxp1 == NULL) { span= zspan->span1; @@ -158,9 +158,9 @@ static void zbuf_add_to_span(ZSpan *zspan, const float v1[2], const float v2[2]) } } -/*-----------------------------------------------------------*/ +/*-----------------------------------------------------------*/ /* Functions */ -/*-----------------------------------------------------------*/ +/*-----------------------------------------------------------*/ /* scanconvert for strand triangles, calls func for each x, y coordinate and gives UV barycentrics and z */ @@ -170,30 +170,30 @@ void zspan_scanconvert(ZSpan *zspan, void *handle, float *v1, float *v2, float * float u, v, uxd, uyd, vxd, vyd, uy0, vy0, xx1; const float *span1, *span2; int i, j, x, y, sn1, sn2, rectx = zspan->rectx, my0, my2; - + /* init */ zbuf_init_span(zspan); - + /* set spans */ zbuf_add_to_span(zspan, v1, v2); zbuf_add_to_span(zspan, v2, v3); zbuf_add_to_span(zspan, v3, v1); - + /* clipped */ if (zspan->minp2==NULL || zspan->maxp2==NULL) return; - + my0 = max_ii(zspan->miny1, zspan->miny2); my2 = min_ii(zspan->maxy1, zspan->maxy2); - + // printf("my %d %d\n", my0, my2); if (my2<my0) return; - + /* ZBUF DX DY, in floats still */ x1= v1[0]- v2[0]; x2= v2[0]- v3[0]; y1= v1[1]- v2[1]; y2= v2[1]- v3[1]; - + z1= 1.0f; /* (u1 - u2) */ z2= 0.0f; /* (u2 - u3) */ @@ -213,28 +213,28 @@ void zspan_scanconvert(ZSpan *zspan, void *handle, float *v1, float *v2, float * x0= y1*z2-z1*y2; y0= z1*x2-x1*z2; - + xx1= (x0*v1[0] + y0*v1[1])/z0; vxd= -(double)x0/(double)z0; vyd= -(double)y0/(double)z0; vy0= ((double)my2)*vyd + (double)xx1; - + /* correct span */ span1= zspan->span1+my2; span2= zspan->span2+my2; - + for (i = 0, y = my2; y >= my0; i++, y--, span1--, span2--) { - + sn1= floor(min_ff(*span1, *span2)); sn2= floor(max_ff(*span1, *span2)); - sn1++; - + sn1++; + if (sn2>=rectx) sn2= rectx-1; if (sn1<0) sn1= 0; - + u = (((double)sn1 * uxd) + uy0) - (i * uyd); v = (((double)sn1 * vxd) + vy0) - (i * vyd); - + for (j = 0, x = sn1; x <= sn2; j++, x++) { func(handle, x, y, u + (j * uxd), v + (j * vxd)); } |