diff options
author | Campbell Barton <ideasman42@gmail.com> | 2013-04-04 17:37:07 +0400 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2013-04-04 17:37:07 +0400 |
commit | 83fff218cca47147183c1177de9db1381cfa48e6 (patch) | |
tree | 2576a9f8c61393a346c53eb85077ede1ba2b1d33 /intern | |
parent | 69035e183ba91877489e8aae2d0425b13813ac18 (diff) | |
parent | 19dd08a4828ac1883138b2a65f0b8df1498e0d15 (diff) |
svn merge ^/trunk/blender -r55700:55776
Diffstat (limited to 'intern')
68 files changed, 2130 insertions, 739 deletions
diff --git a/intern/audaspace/ffmpeg/AUD_FFMPEGReader.cpp b/intern/audaspace/ffmpeg/AUD_FFMPEGReader.cpp index c359c5ec3e2..408a4e56f1c 100644 --- a/intern/audaspace/ffmpeg/AUD_FFMPEGReader.cpp +++ b/intern/audaspace/ffmpeg/AUD_FFMPEGReader.cpp @@ -88,7 +88,7 @@ int AUD_FFMPEGReader::decode(AVPacket& packet, AUD_Buffer& buffer) } } else - memcpy(((data_t*)buffer.getBuffer()) + buf_pos, frame->data[1], data_size); + memcpy(((data_t*)buffer.getBuffer()) + buf_pos, frame->data[0], data_size); buf_pos += data_size; } diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 039fd39fc7d..ac0a1d7bdb7 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -205,6 +205,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default=1, ) + cls.subsurface_samples = IntProperty( + name="Subsurface Samples", + description="Number of subsurface scattering samples to render for each AA sample", + min=1, max=10000, + default=1, + ) + cls.no_caustics = BoolProperty( name="No Caustics", description="Leave out caustics, resulting in a darker image with less noise", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index ee33bf6572a..d1d007079bb 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -83,6 +83,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel): sub.prop(cscene, "transmission_samples", text="Transmission") sub.prop(cscene, "ao_samples", text="AO") sub.prop(cscene, "mesh_light_samples", text="Mesh Light") + sub.prop(cscene, "subsurface_samples", text="Subsurface") class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel): diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index b451764c347..7749f164b90 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -252,7 +252,6 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen else if (b_node.is_a(&RNA_ShaderNodeNormal)) { BL::Node::outputs_iterator out_it; b_node.outputs.begin(out_it); - BL::NodeSocket vec_sock(*out_it); NormalNode *norm = new NormalNode(); norm->direction = get_node_output_vector(b_node, "Normal"); @@ -302,6 +301,9 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen else if (b_node.is_a(&RNA_ShaderNodeBsdfDiffuse)) { node = new DiffuseBsdfNode(); } + else if (b_node.is_a(&RNA_ShaderNodeSubsurfaceScattering)) { + node = new SubsurfaceScatteringNode(); + } else if (b_node.is_a(&RNA_ShaderNodeBsdfGlossy)) { BL::ShaderNodeBsdfGlossy b_glossy_node(b_node); GlossyBsdfNode *glossy = new GlossyBsdfNode(); diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index f6ff78ab2ac..721eaeefc08 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -19,7 +19,6 @@ #include "background.h" #include "camera.h" #include "film.h" -#include "../render/filter.h" #include "graph.h" #include "integrator.h" #include "light.h" @@ -197,6 +196,7 @@ void BlenderSync::sync_integrator() integrator->transmission_samples = get_int(cscene, "transmission_samples"); integrator->ao_samples = get_int(cscene, "ao_samples"); integrator->mesh_light_samples = get_int(cscene, "mesh_light_samples"); + integrator->subsurface_samples = get_int(cscene, "subsurface_samples"); integrator->progressive = get_boolean(cscene, "progressive"); if(integrator->modified(previntegrator)) @@ -213,18 +213,11 @@ void BlenderSync::sync_film() Film prevfilm = *film; film->exposure = get_float(cscene, "film_exposure"); + film->filter_type = (FilterType)RNA_enum_get(&cscene, "filter_type"); + film->filter_width = (film->filter_type == FILTER_BOX)? 1.0f: get_float(cscene, "filter_width"); if(film->modified(prevfilm)) film->tag_update(scene); - - Filter *filter = scene->filter; - Filter prevfilter = *filter; - - filter->filter_type = (FilterType)RNA_enum_get(&cscene, "filter_type"); - filter->filter_width = (filter->filter_type == FILTER_BOX)? 1.0f: get_float(cscene, "filter_width"); - - if(filter->modified(prevfilter)) - filter->tag_update(scene); } /* Render Layer */ diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 3223ca91b9e..fd0bed33396 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -216,6 +216,14 @@ public: return mem; } + void copy_at(T *ptr, size_t offset, size_t size) + { + if(size > 0) { + size_t mem_size = size*data_elements*datatype_size(data_type); + memcpy(&data[0] + offset, ptr, mem_size); + } + } + void reference(T *ptr, size_t width, size_t height = 0) { data.clear(); diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index e83756b7c8a..fbaba1da094 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -42,6 +42,7 @@ set(SRC_HEADERS kernel_projection.h kernel_random.h kernel_shader.h + kernel_subsurface.h kernel_textures.h kernel_triangle.h kernel_types.h @@ -62,6 +63,7 @@ set(SRC_CLOSURE_HEADERS closure/bsdf_util.h closure/bsdf_ward.h closure/bsdf_westin.h + closure/bssrdf.h closure/emissive.h closure/volume.h ) diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index f26aefe7fd3..6403606c2df 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -29,6 +29,7 @@ #include "../closure/bsdf_ward.h" #endif #include "../closure/bsdf_westin.h" +#include "../closure/bssrdf.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h new file mode 100644 index 00000000000..1327fbd011e --- /dev/null +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -0,0 +1,154 @@ +/* + * Copyright 2013, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __KERNEL_BSSRDF_H__ +#define __KERNEL_BSSRDF_H__ + +CCL_NAMESPACE_BEGIN + +__device int bssrdf_setup(ShaderClosure *sc) +{ + if(sc->data0 < BSSRDF_MIN_RADIUS) { + /* revert to diffuse BSDF if radius too small */ + sc->data0 = 0.0f; + sc->data1 = 0.0f; + return bsdf_diffuse_setup(sc); + } + else { + /* radius + IOR params */ + sc->data0 = max(sc->data0, 0.0f); + sc->data1 = max(sc->data1, 1.0f); + sc->type = CLOSURE_BSSRDF_ID; + + return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF; + } +} + +/* Simple Cubic BSSRDF falloff */ + +__device float bssrdf_cubic(float ld, float r) +{ + if(ld == 0.0f) + return (r == 0.0f)? 1.0f: 0.0f; + + return powf(ld - min(r, ld), 3.0f) * 4.0f/powf(ld, 4.0f); +} + +/* Original BSSRDF fallof function */ + +typedef struct BSSRDFParams { + float eta; /* index of refraction */ + float sigma_t_; /* reduced extinction coefficient */ + float sigma_tr; /* effective extinction coefficient */ + float Fdr; /* diffuse fresnel reflectance */ + float D; /* diffusion constant */ + float A; + float alpha_; /* reduced albedo */ + float zr; /* distance of virtual lightsource above surface */ + float zv; /* distance of virtual lightsource below surface */ + float ld; /* mean free path */ + float ro; /* diffuse reflectance */ +} BSSRDFParams; + +__device float bssrdf_reduced_albedo_Rd(float alpha_, float A, float ro) +{ + float sq; + + sq = sqrt(3.0f*(1.0f - alpha_)); + return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro; +} + +__device float bssrdf_compute_reduced_albedo(float A, float ro) +{ + const float tolerance = 1e-8; + const int max_iteration_count = 20; + float d, fsub, xn_1 = 0.0f, xn = 1.0f, fxn, fxn_1; + int i; + + /* use secant method to compute reduced albedo using Rd function inverse + * with a given reflectance */ + fxn = bssrdf_reduced_albedo_Rd(xn, A, ro); + fxn_1 = bssrdf_reduced_albedo_Rd(xn_1, A, ro); + + for (i= 0; i < max_iteration_count; i++) { + fsub = (fxn - fxn_1); + if (fabsf(fsub) < tolerance) + break; + d = ((xn - xn_1)/fsub)*fxn; + if (fabsf(d) < tolerance) + break; + + xn_1 = xn; + fxn_1 = fxn; + xn = xn - d; + + if (xn > 1.0f) xn = 1.0f; + if (xn_1 > 1.0f) xn_1 = 1.0f; + + fxn = bssrdf_reduced_albedo_Rd(xn, A, ro); + } + + /* avoid division by zero later */ + if (xn <= 0.0f) + xn = 0.00001f; + + return xn; +} + +__device void bssrdf_setup_params(BSSRDFParams *ss, float refl, float radius, float ior) +{ + ss->eta = ior; + ss->Fdr = -1.440f/ior*ior + 0.710f/ior + 0.668f + 0.0636f*ior; + ss->A = (1.0f + ss->Fdr)/(1.0f - ss->Fdr); + ss->ld = radius; + ss->ro = min(refl, 0.999f); + + ss->alpha_ = bssrdf_compute_reduced_albedo(ss->A, ss->ro); + + ss->sigma_tr = 1.0f/ss->ld; + ss->sigma_t_ = ss->sigma_tr/sqrtf(3.0f*(1.0f - ss->alpha_)); + + ss->D = 1.0f/(3.0f*ss->sigma_t_); + + ss->zr = 1.0f/ss->sigma_t_; + ss->zv = ss->zr + 4.0f*ss->A*ss->D; +} + +/* exponential falloff function */ + +__device float bssrdf_original(const BSSRDFParams *ss, float r) +{ + if(ss->ld == 0.0f) + return (r == 0.0f)? 1.0f: 0.0f; + + float rr = r*r; + float sr, sv, Rdr, Rdv; + + sr = sqrt(rr + ss->zr*ss->zr); + sv = sqrt(rr + ss->zv*ss->zv); + + Rdr = ss->zr*(1.0f + ss->sigma_tr*sr)*expf(-ss->sigma_tr*sr)/(sr*sr*sr); + Rdv = ss->zv*(1.0f + ss->sigma_tr*sv)*expf(-ss->sigma_tr*sv)/(sv*sv*sv); + + return ss->alpha_*(1.0f/(4.0f*(float)M_PI))*(Rdr + Rdv); +} + +CCL_NAMESPACE_END + +#endif /* __KERNEL_BSSRDF_H__ */ + diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h index 2b9ebf35d0c..b44e1194672 100644 --- a/intern/cycles/kernel/kernel_bvh.h +++ b/intern/cycles/kernel/kernel_bvh.h @@ -923,6 +923,330 @@ __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const ui #endif } +/* Special ray intersection routines for subsurface scattering. In that case we + * only want to intersect with primitives in the same object, and if case of + * multiple hits we pick a single random primitive as the intersection point. */ + +__device_inline void bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect, + float3 P, float3 idir, int object, int triAddr, float tmax, int *num_hits, float subsurface_random) +{ + /* compute and check intersection t-value */ + float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); + float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); + float3 dir = 1.0f/idir; + + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); + float t = Oz * invDz; + + if(t > 0.0f && t < tmax) { + /* compute and check barycentric u */ + float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; + float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; + float u = Ox + t*Dx; + + if(u >= 0.0f) { + /* compute and check barycentric v */ + float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; + float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; + float v = Oy + t*Dy; + + if(v >= 0.0f && u + v <= 1.0f) { + (*num_hits)++; + + if(subsurface_random * (*num_hits) <= 1.0f) { + /* record intersection */ + isect->prim = triAddr; + isect->object = object; + isect->u = u; + isect->v = v; + isect->t = t; + } + } + } + } +} + +__device_inline int bvh_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, float subsurface_random) +{ + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 idir = bvh_inverse_direction(ray->D); + int object = ~0; + + int num_hits = 0; + + isect->t = tmax; + isect->object = ~0; + isect->prim = ~0; + isect->u = 0.0f; + isect->v = 0.0f; + + /* traversal loop */ + do { + do + { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) + { + bool traverseChild0, traverseChild1, closestChild1; + int nodeAddrChild1; + + bvh_node_intersect(kg, &traverseChild0, &traverseChild1, + &closestChild1, &nodeAddr, &nodeAddrChild1, + P, idir, isect->t, ~0, nodeAddr); + + if(traverseChild0 != traverseChild1) { + /* one child was intersected */ + if(traverseChild1) { + nodeAddr = nodeAddrChild1; + } + } + else { + if(!traverseChild0) { + /* neither child was intersected */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + else { + /* both children were intersected, push the farther one */ + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + traversalStack[stackPtr] = nodeAddrChild1; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1)); + int primAddr = __float_as_int(leaf.x); + +#ifdef __INSTANCING__ + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + while(primAddr < primAddr2) { + /* only primitives from the same object */ + uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object; + + if(tri_object == subsurface_object) { + /* intersect ray against primitive */ +#ifdef __HAIR__ + uint segment = kernel_tex_fetch(__prim_segment, primAddr); + if(segment == ~0) /* ignore hair for sss */ +#endif + bvh_triangle_intersect_subsurface(kg, isect, P, idir, object, primAddr, tmax, &num_hits, subsurface_random); + } + + primAddr++; + } +#ifdef __INSTANCING__ + } + else { + /* instance push */ + if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) { + object = subsurface_object; + bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax); + + ++stackPtr; + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } +#endif + } + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#ifdef __INSTANCING__ + if(stackPtr >= 0) { + kernel_assert(object != ~0); + + /* instance pop */ + bvh_instance_pop(kg, object, ray, &P, &idir, &isect->t, tmax); + object = ~0; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} + +#ifdef __OBJECT_MOTION__ +__device bool bvh_intersect_motion_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, float subsurface_random) +{ + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 idir = bvh_inverse_direction(ray->D); + int object = ~0; + + int num_hits = 0; + + Transform ob_tfm; + + isect->t = tmax; + isect->object = ~0; + isect->prim = ~0; + isect->u = 0.0f; + isect->v = 0.0f; + + /* traversal loop */ + do { + do + { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) + { + bool traverseChild0, traverseChild1, closestChild1; + int nodeAddrChild1; + + bvh_node_intersect(kg, &traverseChild0, &traverseChild1, + &closestChild1, &nodeAddr, &nodeAddrChild1, + P, idir, isect->t, ~0, nodeAddr); + + if(traverseChild0 != traverseChild1) { + /* one child was intersected */ + if(traverseChild1) { + nodeAddr = nodeAddrChild1; + } + } + else { + if(!traverseChild0) { + /* neither child was intersected */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + else { + /* both children were intersected, push the farther one */ + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + traversalStack[stackPtr] = nodeAddrChild1; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1)); + int primAddr = __float_as_int(leaf.x); + + if(primAddr >= 0) { + int primAddr2 = __float_as_int(leaf.y); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + while(primAddr < primAddr2) { + /* only primitives from the same object */ + uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object; + + if(tri_object == subsurface_object) { + /* intersect ray against primitive */ +#ifdef __HAIR__ + uint segment = kernel_tex_fetch(__prim_segment, primAddr); + if(segment == ~0) /* ignore hair for sss */ +#endif + bvh_triangle_intersect_subsurface(kg, isect, P, idir, object, primAddr, tmax, &num_hits, subsurface_random); + } + + primAddr++; + } + } + else { + /* instance push */ + if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) { + object = subsurface_object; + object = kernel_tex_fetch(__prim_object, -primAddr-1); + bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax); + + ++stackPtr; + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + if(stackPtr >= 0) { + kernel_assert(object != ~0); + + /* instance pop */ + bvh_instance_motion_pop(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax); + object = ~0; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} +#endif + +__device_inline int scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, float subsurface_random) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) + return bvh_intersect_motion_subsurface(kg, ray, isect, subsurface_object, subsurface_random); + else + return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, subsurface_random); +#else + return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, subsurface_random); +#endif +} + +/* Ray offset to avoid self intersection */ + __device_inline float3 ray_offset(float3 P, float3 Ng) { #ifdef __INTERSECTION_REFINE__ @@ -971,6 +1295,10 @@ __device_inline float3 ray_offset(float3 P, float3 Ng) #endif } +/* Refine triangle intersection to more precise hit point. For rays that travel + * far the precision is often not so good, this reintersects the primitive from + * a closer distance. */ + __device_inline float3 bvh_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) { float3 P = ray->P; diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 02f64cd649a..694ef8bd01d 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -53,7 +53,7 @@ __device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float /* compute point on plane of focus */ float ft = kernel_data.cam.focaldistance/ray->D.z; - float3 Pfocus = ray->P + ray->D*ft; + float3 Pfocus = ray->D*ft; /* update ray for effect of lens */ ray->P = make_float3(lensuv.x, lensuv.y, 0.0f); @@ -112,11 +112,13 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa /* compute point on plane of focus */ float ft = kernel_data.cam.focaldistance/ray->D.z; - float3 Pfocus = ray->P + ray->D*ft; + float3 Pfocus = ray->D*ft; /* update ray for effect of lens */ ray->P = make_float3(lensuv.x, lensuv.y, 0.0f); ray->D = normalize(Pfocus - ray->P); + + ray->P += Pcamera; } /* transform ray from camera to world */ @@ -224,8 +226,9 @@ __device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, flo float lens_u, float lens_v, float time, Ray *ray) { /* pixel filter */ - float raster_x = x + kernel_tex_interp(__filter_table, filter_u, FILTER_TABLE_SIZE); - float raster_y = y + kernel_tex_interp(__filter_table, filter_v, FILTER_TABLE_SIZE); + int filter_table_offset = kernel_data.film.filter_table_offset; + float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE); + float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE); #ifdef __CAMERA_MOTION__ /* motion blur */ diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 01bb78e8e1c..9972a63bfbb 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -57,19 +57,6 @@ template<typename T> struct texture { } #endif - float interp(float x, int size) - { - kernel_assert(size == width); - - x = clamp(x, 0.0f, 1.0f)*width; - - int index = min((int)x, width-1); - int nindex = min(index+1, width-1); - float t = x - index; - - return (1.0f - t)*data[index] + t*data[nindex]; - } - T *data; int width; }; @@ -157,7 +144,7 @@ typedef texture_image<uchar4> texture_image_uchar4; #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index)) #define kernel_tex_fetch_m128(tex, index) (kg->tex.fetch_m128(index)) #define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index)) -#define kernel_tex_interp(tex, t, size) (kg->tex.interp(t, size)) +#define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size)) #define kernel_tex_image_interp(tex, x, y) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp(x, y) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp(x, y)) #define kernel_data (kg->__data) diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 9fd065c3cda..a11f8f403cd 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -58,7 +58,6 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; /* Macros to handle different memory storage on different devices */ #define kernel_tex_fetch(t, index) tex1Dfetch(t, index) -#define kernel_tex_interp(t, x, size) tex1D(t, x) #define kernel_tex_image_interp(t, x, y) tex2D(t, x, y) #define kernel_data __data diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index abb2f094f5c..999820891b2 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -45,18 +45,6 @@ /* no assert in opencl */ #define kernel_assert(cond) -/* manual implementation of interpolated 1D lookup */ -__device float kernel_tex_interp_(__global float *data, int width, float x) -{ - x = clamp(x, 0.0f, 1.0f)*width; - - int index = min((int)x, width-1); - int nindex = min(index+1, width-1); - float t = x - index; - - return (1.0f - t)*data[index] + t*data[nindex]; -} - /* make_type definitions with opencl style element initializers */ #ifdef make_float2 #undef make_float2 @@ -114,7 +102,7 @@ __device float kernel_tex_interp_(__global float *data, int width, float x) /* data lookup defines */ #define kernel_data (*kg->data) -#define kernel_tex_interp(t, x, size) kernel_tex_interp_(kg->t, size, x) +#define kernel_tex_lookup(t, x, offset, size) kernel_tex_lookup_(kg->t, offset, size, x) #define kernel_tex_fetch(t, index) kg->t[index] /* define NULL */ diff --git a/intern/cycles/kernel/kernel_displace.h b/intern/cycles/kernel/kernel_displace.h index fc2be342e02..5f95e0c2e3b 100644 --- a/intern/cycles/kernel/kernel_displace.h +++ b/intern/cycles/kernel/kernel_displace.h @@ -66,8 +66,6 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou out = shader_eval_background(kg, &sd, flag, SHADER_CONTEXT_MAIN); } - shader_release(kg, &sd); - /* write output */ output[i] = make_float4(out.x, out.y, out.z, 0.0f); } diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 9787af535bb..7d7494657e2 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -69,8 +69,6 @@ __device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando, eval *= ls->eval_fac; - shader_release(kg, &sd); - return eval; } @@ -207,7 +205,6 @@ __device_noinline float3 indirect_background(KernelGlobals *kg, Ray *ray, int pa ShaderData sd; shader_setup_from_background(kg, &sd, ray); float3 L = shader_eval_background(kg, &sd, path_flag, SHADER_CONTEXT_EMISSION); - shader_release(kg, &sd); #ifdef __BACKGROUND_MIS__ /* check if background light exists or if we should skip pdf */ diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 529b7b8768f..abf1f5b4cb0 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -88,5 +88,39 @@ typedef struct KernelGlobals { #endif +/* Interpolated lookup table access */ + +__device float lookup_table_read(KernelGlobals *kg, float x, int offset, int size) +{ + x = clamp(x, 0.0f, 1.0f)*(size-1); + + int index = min((int)x, size-1); + int nindex = min(index+1, size-1); + float t = x - index; + + float data0 = kernel_tex_fetch(__lookup_table, index + offset); + if(t == 0.0f) + return data0; + + float data1 = kernel_tex_fetch(__lookup_table, nindex + offset); + return (1.0f - t)*data0 + t*data1; +} + +__device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize) +{ + y = clamp(y, 0.0f, 1.0f)*(ysize-1); + + int index = min((int)y, ysize-1); + int nindex = min(index+1, ysize-1); + float t = y - index; + + float data0 = lookup_table_read(kg, x, offset + xsize*index, xsize); + if(t == 0.0f) + return data0; + + float data1 = lookup_table_read(kg, x, offset + xsize*nindex, xsize); + return (1.0f - t)*data0 + t*data1; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 865ba7ca676..7aa1ec365b7 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -37,6 +37,10 @@ #include "kernel_random.h" #include "kernel_passes.h" +#ifdef __SUBSURFACE__ +#include "kernel_subsurface.h" +#endif + CCL_NAMESPACE_BEGIN typedef struct PathState { @@ -149,7 +153,7 @@ __device_inline float path_state_terminate_probability(KernelGlobals *kg, PathSt } /* probalistic termination */ - return average(throughput); + return average(throughput); /* todo: try using max here */ } __device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow) @@ -212,8 +216,6 @@ __device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ra if(ray->t != FLT_MAX) ray->D = normalize_len(Pend - ray->P, &ray->t); - shader_release(kg, &sd); - bounce++; } } @@ -323,10 +325,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, L_transparent += average(holdout_weight*throughput); } - if(sd.flag & SD_HOLDOUT_MASK) { - shader_release(kg, &sd); + if(sd.flag & SD_HOLDOUT_MASK) break; - } } #endif @@ -345,13 +345,29 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, float probability = path_state_terminate_probability(kg, &state, throughput); float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE); - if(terminate >= probability) { - shader_release(kg, &sd); + if(terminate >= probability) break; - } throughput /= probability; +#ifdef __SUBSURFACE__ + /* bssrdf scatter to a different location on the same object, replacing + * the closures with a diffuse BSDF */ + if(sd.flag & SD_BSSRDF) { + float bssrdf_probability; + ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability); + + /* modify throughput for picking bssrdf or bsdf */ + throughput *= bssrdf_probability; + + /* do bssrdf scatter step if we picked a bssrdf closure */ + if(sc) { + uint lcg_state = lcg_init(rbsdf); + subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); + } + } +#endif + #ifdef __AO__ /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { @@ -415,10 +431,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, #endif /* no BSDF? we can stop here */ - if(!(sd.flag & SD_BSDF)) { - shader_release(kg, &sd); + if(!(sd.flag & SD_BSDF)) break; - } /* sample BSDF */ float bsdf_pdf; @@ -432,8 +446,6 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); - shader_release(kg, &sd); - if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) break; @@ -481,7 +493,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, #ifdef __NON_PROGRESSIVE__ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer, - float3 throughput, float throughput_normalize, + float3 throughput, float num_samples_adjust, float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L) { #ifdef __LAMP_MIS__ @@ -554,16 +566,32 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray /* path termination. this is a strange place to put the termination, it's * mainly due to the mixed in MIS that we use. gives too many unneeded * shader evaluations, only need emission if we are going to terminate */ - float probability = path_state_terminate_probability(kg, &state, throughput*throughput_normalize); + float probability = path_state_terminate_probability(kg, &state, throughput*num_samples_adjust); float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE); - if(terminate >= probability) { - shader_release(kg, &sd); + if(terminate >= probability) break; - } throughput /= probability; +#ifdef __SUBSURFACE__ + /* bssrdf scatter to a different location on the same object, replacing + * the closures with a diffuse BSDF */ + if(sd.flag & SD_BSSRDF) { + float bssrdf_probability; + ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability); + + /* modify throughput for picking bssrdf or bsdf */ + throughput *= bssrdf_probability; + + /* do bssrdf scatter step if we picked a bssrdf closure */ + if(sc) { + uint lcg_state = lcg_init(rbsdf); + subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); + } + } +#endif + #ifdef __AO__ /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { @@ -628,10 +656,8 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray #endif /* no BSDF? we can stop here */ - if(!(sd.flag & SD_BSDF)) { - shader_release(kg, &sd); + if(!(sd.flag & SD_BSDF)) break; - } /* sample BSDF */ float bsdf_pdf; @@ -645,8 +671,6 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); - shader_release(kg, &sd); - if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) break; @@ -676,6 +700,193 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray } } +__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG *rng, int sample, + ShaderData *sd, float3 throughput, float num_samples_adjust, + float min_ray_pdf, float ray_pdf, PathState state, + int rng_offset, PathRadiance *L, __global float *buffer) +{ +#ifdef __AO__ + /* ambient occlusion */ + if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) { + int num_samples = ceil(kernel_data.integrator.ao_samples*num_samples_adjust); + float num_samples_inv = num_samples_adjust/num_samples; + float ao_factor = kernel_data.background.ao_factor; + float3 ao_N; + float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); + + for(int j = 0; j < num_samples; j++) { + /* todo: solve correlation */ + float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U); + float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V); + + float3 ao_D; + float ao_pdf; + + sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + + if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { + Ray light_ray; + float3 ao_shadow; + + light_ray.P = ray_offset(sd->P, sd->Ng); + light_ray.D = ao_D; + light_ray.t = kernel_data.background.ao_distance; +#ifdef __OBJECT_MOTION__ + light_ray.time = sd->time; +#endif + + if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow)) + path_radiance_accum_ao(L, throughput*num_samples_inv, ao_bsdf, ao_shadow, state.bounce); + } + } + } +#endif + + +#ifdef __EMISSION__ + /* sample illumination from lights to find path contribution */ + if(sd->flag & SD_BSDF_HAS_EVAL) { + Ray light_ray; + BsdfEval L_light; + bool is_lamp; + +#ifdef __OBJECT_MOTION__ + light_ray.time = sd->time; +#endif + + /* lamp sampling */ + for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { + int num_samples = ceil(num_samples_adjust*light_select_num_samples(kg, i)); + float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights); + + if(kernel_data.integrator.pdf_triangles != 0.0f) + num_samples_inv *= 0.5f; + + for(int j = 0; j < num_samples; j++) { + float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U); + float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V); + + if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) { + /* trace shadow ray */ + float3 shadow; + + if(!shadow_blocked(kg, &state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp); + } + } + } + } + + /* mesh light sampling */ + if(kernel_data.integrator.pdf_triangles != 0.0f) { + int num_samples = ceil(num_samples_adjust*kernel_data.integrator.mesh_light_samples); + float num_samples_inv = num_samples_adjust/num_samples; + + if(kernel_data.integrator.num_all_lights) + num_samples_inv *= 0.5f; + + for(int j = 0; j < num_samples; j++) { + float light_t = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT); + float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U); + float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V); + + /* only sample triangle lights */ + if(kernel_data.integrator.num_all_lights) + light_t = 0.5f*light_t; + + if(direct_emission(kg, sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) { + /* trace shadow ray */ + float3 shadow; + + if(!shadow_blocked(kg, &state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp); + } + } + } + } + } +#endif + + for(int i = 0; i< sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + if(!CLOSURE_IS_BSDF(sc->type)) + continue; + /* transparency is not handled here, but in outer loop */ + if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) + continue; + + int num_samples; + + if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) + num_samples = kernel_data.integrator.diffuse_samples; + else if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) + num_samples = kernel_data.integrator.glossy_samples; + else + num_samples = kernel_data.integrator.transmission_samples; + + num_samples = ceil(num_samples_adjust*num_samples); + + float num_samples_inv = num_samples_adjust/num_samples; + + for(int j = 0; j < num_samples; j++) { + /* sample BSDF */ + float bsdf_pdf; + BsdfEval bsdf_eval; + float3 bsdf_omega_in; + differential3 bsdf_domega_in; + float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U); + float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V); + int label; + + label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, + &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); + + if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) + continue; + + /* modify throughput */ + float3 tp = throughput; + path_radiance_bsdf_bounce(L, &tp, &bsdf_eval, bsdf_pdf, state.bounce, label); + + /* set labels */ + float min_ray_pdf = FLT_MAX; + + if(!(label & LABEL_TRANSPARENT)) + min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf); + + /* modify path state */ + PathState ps = state; + path_state_next(kg, &ps, label); + + /* setup ray */ + Ray bsdf_ray; + + bsdf_ray.P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng); + bsdf_ray.D = bsdf_omega_in; + bsdf_ray.t = FLT_MAX; +#ifdef __RAY_DIFFERENTIALS__ + bsdf_ray.dP = sd->dP; + bsdf_ray.dD = bsdf_domega_in; +#endif +#ifdef __OBJECT_MOTION__ + bsdf_ray.time = sd->time; +#endif + + kernel_path_indirect(kg, rng, sample*num_samples + j, bsdf_ray, buffer, + tp*num_samples_inv, num_samples, + min_ray_pdf, bsdf_pdf, ps, rng_offset+PRNG_BOUNCE_NUM, L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + } + } +} + __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer) { /* initialize */ @@ -740,10 +951,8 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam L_transparent += average(holdout_weight*throughput); } - if(sd.flag & SD_HOLDOUT_MASK) { - shader_release(kg, &sd); + if(sd.flag & SD_HOLDOUT_MASK) break; - } } #endif @@ -763,195 +972,47 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam float probability = path_state_terminate_probability(kg, &state, throughput); float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE); - if(terminate >= probability) { - shader_release(kg, &sd); + if(terminate >= probability) break; - } throughput /= probability; } -#ifdef __AO__ - /* ambient occlusion */ - if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { - int num_samples = kernel_data.integrator.ao_samples; - float num_samples_inv = 1.0f/num_samples; - float ao_factor = kernel_data.background.ao_factor; - float3 ao_N; - float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N); - - for(int j = 0; j < num_samples; j++) { - /* todo: solve correlation */ - float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U); - float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V); - - float3 ao_D; - float ao_pdf; - - sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - - if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { - Ray light_ray; - float3 ao_shadow; - - light_ray.P = ray_offset(sd.P, sd.Ng); - light_ray.D = ao_D; - light_ray.t = kernel_data.background.ao_distance; -#ifdef __OBJECT_MOTION__ - light_ray.time = sd.time; -#endif - - if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow)) - path_radiance_accum_ao(&L, throughput*num_samples_inv, ao_bsdf, ao_shadow, state.bounce); - } - } - } -#endif - -#ifdef __EMISSION__ - /* sample illumination from lights to find path contribution */ - if(sd.flag & SD_BSDF_HAS_EVAL) { - Ray light_ray; - BsdfEval L_light; - bool is_lamp; - -#ifdef __OBJECT_MOTION__ - light_ray.time = sd.time; -#endif - - /* lamp sampling */ - for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { - int num_samples = light_select_num_samples(kg, i); - float num_samples_inv = 1.0f/(num_samples*kernel_data.integrator.num_all_lights); - - if(kernel_data.integrator.pdf_triangles != 0.0f) - num_samples_inv *= 0.5f; - - for(int j = 0; j < num_samples; j++) { - float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U); - float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V); - - if(direct_emission(kg, &sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) { - /* trace shadow ray */ - float3 shadow; +#ifdef __SUBSURFACE__ + /* bssrdf scatter to a different location on the same object */ + if(sd.flag & SD_BSSRDF) { + for(int i = 0; i< sd.num_closure; i++) { + ShaderClosure *sc = &sd.closure[i]; - if(!shadow_blocked(kg, &state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(&L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp); - } - } - } - } + if(!CLOSURE_IS_BSSRDF(sc->type)) + continue; - /* mesh light sampling */ - if(kernel_data.integrator.pdf_triangles != 0.0f) { - int num_samples = kernel_data.integrator.mesh_light_samples; + /* set up random number generator */ + uint lcg_state = lcg_init(rbsdf); + int num_samples = kernel_data.integrator.subsurface_samples; float num_samples_inv = 1.0f/num_samples; - if(kernel_data.integrator.num_all_lights) - num_samples_inv *= 0.5f; - + /* do subsurface scatter step with copy of shader data, this will + * replace the BSSRDF with a diffuse BSDF closure */ for(int j = 0; j < num_samples; j++) { - float light_t = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT); - float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U); - float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V); - - /* only sample triangle lights */ - if(kernel_data.integrator.num_all_lights) - light_t = 0.5f*light_t; - - if(direct_emission(kg, &sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, &state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(&L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp); - } - } + ShaderData bssrdf_sd = sd; + subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true); + + /* compute lighting with the BSDF closure */ + kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j, + &bssrdf_sd, throughput, num_samples_inv, + ray_pdf, ray_pdf, state, rng_offset, &L, buffer); } } } #endif - for(int i = 0; i< sd.num_closure; i++) { - const ShaderClosure *sc = &sd.closure[i]; - - if(!CLOSURE_IS_BSDF(sc->type)) - continue; - /* transparency is not handled here, but in outer loop */ - if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) - continue; - - int num_samples; - - if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) - num_samples = kernel_data.integrator.diffuse_samples; - else if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) - num_samples = kernel_data.integrator.glossy_samples; - else - num_samples = kernel_data.integrator.transmission_samples; - - float num_samples_inv = 1.0f/num_samples; - - for(int j = 0; j < num_samples; j++) { - /* sample BSDF */ - float bsdf_pdf; - BsdfEval bsdf_eval; - float3 bsdf_omega_in; - differential3 bsdf_domega_in; - float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U); - float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V); - int label; - - label = shader_bsdf_sample_closure(kg, &sd, sc, bsdf_u, bsdf_v, &bsdf_eval, - &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); - - if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) - continue; - - /* modify throughput */ - float3 tp = throughput; - path_radiance_bsdf_bounce(&L, &tp, &bsdf_eval, bsdf_pdf, state.bounce, label); - - /* set labels */ - float min_ray_pdf = FLT_MAX; - - if(!(label & LABEL_TRANSPARENT)) - min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf); - - /* modify path state */ - PathState ps = state; - path_state_next(kg, &ps, label); - - /* setup ray */ - Ray bsdf_ray; - - bsdf_ray.P = ray_offset(sd.P, (label & LABEL_TRANSMIT)? -sd.Ng: sd.Ng); - bsdf_ray.D = bsdf_omega_in; - bsdf_ray.t = FLT_MAX; -#ifdef __RAY_DIFFERENTIALS__ - bsdf_ray.dP = sd.dP; - bsdf_ray.dD = bsdf_domega_in; -#endif -#ifdef __OBJECT_MOTION__ - bsdf_ray.time = sd.time; -#endif - - kernel_path_indirect(kg, rng, sample*num_samples + j, bsdf_ray, buffer, - tp*num_samples_inv, num_samples, - min_ray_pdf, bsdf_pdf, ps, rng_offset+PRNG_BOUNCE_NUM, &L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&L); - path_radiance_reset_indirect(&L); - } - } + /* lighting */ + kernel_path_non_progressive_lighting(kg, rng, sample, &sd, throughput, + 1.0f, ray_pdf, ray_pdf, state, rng_offset, &L, buffer); /* continue in case of transparency */ throughput *= shader_bsdf_transparency(kg, &sd); - shader_release(kg, &sd); if(is_zero(throughput)) break; diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index 9083b7cbfd7..e2eb8d5db83 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -200,5 +200,19 @@ __device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng) #endif +__device float lcg_step(uint *rng) +{ + /* implicit mod 2^32 */ + *rng = (1103515245*(*rng) + 12345); + return (float)*rng * (1.0f/(float)0xFFFFFFFF); +} + +__device uint lcg_init(float seed) +{ + uint rng = __float_as_int(seed); + lcg_step(&rng); + return rng; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index df86b352697..444543bf709 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -158,6 +158,103 @@ __device_noinline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, #endif } +/* ShaderData setup from BSSRDF scatter */ + +#ifdef __SUBSURFACE__ +__device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd, + const Intersection *isect, const Ray *ray) +{ + bool backfacing = sd->flag & SD_BACKFACING; + + /* object, matrices, time, ray_length stay the same */ + sd->flag = kernel_tex_fetch(__object_flag, sd->object); + sd->prim = kernel_tex_fetch(__prim_index, isect->prim); + +#ifdef __HAIR__ + if(kernel_tex_fetch(__prim_segment, isect->prim) != ~0) { + /* Strand Shader setting*/ + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + + sd->shader = __float_as_int(curvedata.z); + sd->segment = isect->segment; + + float tcorr = isect->t; + if(kernel_data.curve_kernel_data.curveflags & CURVE_KN_POSTINTERSECTCORRECTION) + tcorr = (isect->u < 0)? tcorr + sqrtf(isect->v) : tcorr - sqrtf(isect->v); + + sd->P = bvh_curve_refine(kg, sd, isect, ray, tcorr); + } + else { +#endif + /* fetch triangle data */ + float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim); + float3 Ng = make_float3(Ns.x, Ns.y, Ns.z); + sd->shader = __float_as_int(Ns.w); + +#ifdef __HAIR__ + sd->segment = ~0; +#endif + +#ifdef __UV__ + sd->u = isect->u; + sd->v = isect->v; +#endif + + /* vectors */ + sd->P = bvh_triangle_refine(kg, sd, isect, ray); + sd->Ng = Ng; + sd->N = Ng; + + /* smooth normal */ + if(sd->shader & SHADER_SMOOTH_NORMAL) + sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); + +#ifdef __DPDU__ + /* dPdu/dPdv */ + triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); +#endif + +#ifdef __HAIR__ + } +#endif + + sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); + +#ifdef __INSTANCING__ + if(isect->object != ~0) { + /* instance transform */ + object_normal_transform(kg, sd, &sd->N); + object_normal_transform(kg, sd, &sd->Ng); +#ifdef __DPDU__ + object_dir_transform(kg, sd, &sd->dPdu); + object_dir_transform(kg, sd, &sd->dPdv); +#endif + } +#endif + + /* backfacing test */ + if(backfacing) { + sd->flag |= SD_BACKFACING; + sd->Ng = -sd->Ng; + sd->N = -sd->N; +#ifdef __DPDU__ + sd->dPdu = -sd->dPdu; + sd->dPdv = -sd->dPdv; +#endif + } + + /* should not get used in principle as the shading will only use a diffuse + * BSDF, but the shader might still access it */ + sd->I = sd->N; + +#ifdef __RAY_DIFFERENTIALS__ + /* differentials */ + differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); + /* don't modify dP and dI */ +#endif +} +#endif + /* ShaderData setup from position sampled on mesh */ __device_noinline void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, @@ -283,11 +380,9 @@ __device_noinline void shader_setup_from_sample(KernelGlobals *kg, ShaderData *s /* ShaderData setup for displacement */ -__device_noinline void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, +__device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, int object, int prim, float u, float v) { - /* Note: no OSLShader::init call here, this is done in shader_setup_from_sample! */ - float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f); int shader; @@ -418,7 +513,7 @@ __device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const ShaderClosure *sc = &sd->closure[sampled]; if(CLOSURE_IS_BSDF(sc->type)) { - sum += sd->closure[sampled].sample_weight; + sum += sc->sample_weight; if(r <= sum) break; @@ -811,7 +906,7 @@ __device void shader_merge_closures(KernelGlobals *kg, ShaderData *sd) ShaderClosure *scj = &sd->closure[j]; #ifdef __OSL__ - if(!sci->prim && sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1) { + if(!sci->prim && !scj->prim && sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1) { #else if(sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1) { #endif @@ -823,18 +918,12 @@ __device void shader_merge_closures(KernelGlobals *kg, ShaderData *sd) memmove(scj, scj+1, size*sizeof(ShaderClosure)); sd->num_closure--; + j--; } } } } #endif -/* Free ShaderData */ - -__device void shader_release(KernelGlobals *kg, ShaderData *sd) -{ - /* nothing to do currently */ -} - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h new file mode 100644 index 00000000000..5fef9965c7f --- /dev/null +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -0,0 +1,241 @@ +/* + * Copyright 2013, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +CCL_NAMESPACE_BEGIN + +#define BSSRDF_MULTI_EVAL +#define BSSRDF_SKIP_NO_HIT + +__device float bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u) +{ + int table_offset = kernel_data.bssrdf.table_offset; + float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE); + + return r*radius; +} + +#ifdef BSSRDF_MULTI_EVAL +__device float bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r) +{ + if(r >= radius) + return 0.0f; + + /* todo: when we use the real BSSRDF this will need to be divided by the maximum + * radius instead of the average radius */ + float t = r/radius; + + int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET; + float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE); + + pdf /= radius; + + return pdf; +} +#endif + +__device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, ShaderData *sd, float *probability) +{ + /* sum sample weights of bssrdf and bsdf */ + float bsdf_sum = 0.0f; + float bssrdf_sum = 0.0f; + + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if(CLOSURE_IS_BSDF(sc->type)) + bsdf_sum += sc->sample_weight; + else if(CLOSURE_IS_BSSRDF(sc->type)) + bssrdf_sum += sc->sample_weight; + } + + /* use bsdf or bssrdf? */ + float r = sd->randb_closure*(bsdf_sum + bssrdf_sum); + + if(r < bsdf_sum) { + /* use bsdf, and adjust randb so we can reuse it for picking a bsdf */ + sd->randb_closure = r/bsdf_sum; + *probability = (bsdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bsdf_sum: 1.0f; + return NULL; + } + + /* use bssrdf */ + r -= bsdf_sum; + sd->randb_closure = 0.0f; /* not needed anymore */ + + float sum = 0.0f; + + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if(CLOSURE_IS_BSSRDF(sc->type)) { + sum += sc->sample_weight; + + if(r <= sum) { +#ifdef BSSRDF_MULTI_EVAL + *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bssrdf_sum: 1.0f; +#else + *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/sc->sample_weight: 1.0f; +#endif + return sc; + } + } + } + + /* should never happen */ + *probability = 1.0f; + return NULL; +} + +#ifdef BSSRDF_MULTI_EVAL +__device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all) +{ + /* compute pdf */ + float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f); + float pdf_sum = 0.0f; + float sample_weight_sum = 0.0f; + int num_bssrdf = 0; + + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if(CLOSURE_IS_BSSRDF(sc->type)) { + float sample_weight = (all)? 1.0f: sc->sample_weight; + + /* compute pdf */ + float pdf = 1.0f; + for(int i = 0; i < num_r; i++) + pdf *= bssrdf_pdf(kg, sc->data0, refl, r[i]); + + eval_sum += sc->weight*pdf; + pdf_sum += sample_weight*pdf; + + sample_weight_sum += sample_weight; + num_bssrdf++; + } + } + + float inv_pdf_sum; + + if(pdf_sum > 0.0f) { + /* in case of non-progressive integrate we sample all bssrdf's once, + * for progressive we pick one, so adjust pdf for that */ + if(all) + inv_pdf_sum = 1.0f/pdf_sum; + else + inv_pdf_sum = sample_weight_sum/pdf_sum; + } + else + inv_pdf_sum = 0.0f; + + float3 weight = eval_sum * inv_pdf_sum; + + return weight; +} +#endif + +/* replace closures with a single diffuse bsdf closure after scatter step */ +__device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight) +{ + ShaderClosure *sc = &sd->closure[0]; + sd->num_closure = 1; + + sc->weight = weight; + sc->sample_weight = 1.0f; + sc->data0 = 0.0f; + sc->data1 = 0.0f; + sc->N = sd->N; + sd->flag &= ~SD_CLOSURE_FLAGS; + sd->flag |= bsdf_diffuse_setup(sc); + sd->randb_closure = 0.0f; + + /* todo: evaluate shading to get blurred textures and bump mapping */ + /* shader_eval_surface(kg, sd, 0.0f, state_flag, SHADER_CONTEXT_SSS); */ +} + +/* subsurface scattering step, from a point on the surface to another nearby point on the same object */ +__device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all) +{ + float radius = sc->data0; + float refl = max(average(sc->weight)*3.0f, 0.0f); + float r = 0.0f; + bool hit = false; + float3 weight = make_float3(1.0f, 1.0f, 1.0f); +#ifdef BSSRDF_MULTI_EVAL + float r_attempts[BSSRDF_MAX_ATTEMPTS]; +#endif + int num_attempts; + + /* attempt to find a hit a given number of times before giving up */ + for(num_attempts = 0; num_attempts < kernel_data.bssrdf.num_attempts; num_attempts++) { + /* random numbers for sampling */ + float u1 = lcg_step(lcg_state); + float u2 = lcg_step(lcg_state); + float u3 = lcg_step(lcg_state); + float u4 = lcg_step(lcg_state); + float u5 = lcg_step(lcg_state); + float u6 = lcg_step(lcg_state); + + r = bssrdf_sample_distance(kg, radius, refl, u5); +#ifdef BSSRDF_MULTI_EVAL + r_attempts[num_attempts] = r; +#endif + + float3 p1 = sd->P + sample_uniform_sphere(u1, u2)*r; + float3 p2 = sd->P + sample_uniform_sphere(u3, u4)*r; + + /* create ray */ + Ray ray; + ray.P = p1; + ray.D = normalize_len(p2 - p1, &ray.t); + ray.dP = sd->dP; + ray.dD.dx = make_float3(0.0f, 0.0f, 0.0f); + ray.dD.dy = make_float3(0.0f, 0.0f, 0.0f); + ray.time = sd->time; + + /* intersect with the same object. if multiple intersections are + * found it will randomly pick one of them */ + Intersection isect; + if(scene_intersect_subsurface(kg, &ray, &isect, sd->object, u6) == 0) + continue; + + /* setup new shading point */ + shader_setup_from_subsurface(kg, sd, &isect, &ray); + + hit = true; + num_attempts++; + break; + } + + /* evaluate subsurface scattering closures */ +#ifdef BSSRDF_MULTI_EVAL + weight *= subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all); +#else + weight *= sc->weight; +#endif + +#ifdef BSSRDF_SKIP_NO_HIT + if(!hit) + weight = make_float3(0.0f, 0.0f, 0.0f); +#endif + + /* replace closures with a single diffuse BSDF */ + subsurface_scatter_setup_diffuse_bsdf(sd, weight); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index e27de95e7ab..55c6e15ad04 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -66,8 +66,8 @@ KERNEL_TEX(uint4, texture_uint4, __svm_nodes) KERNEL_TEX(uint, texture_uint, __shader_flag) KERNEL_TEX(uint, texture_uint, __object_flag) -/* camera/film */ -KERNEL_TEX(float, texture_float, __filter_table) +/* lookup tables */ +KERNEL_TEX(float, texture_float, __lookup_table) /* sobol */ KERNEL_TEX(uint, texture_uint, __sobol_directions) diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index ddbda9240fb..526609d0506 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -37,6 +37,13 @@ CCL_NAMESPACE_BEGIN #define PARTICLE_SIZE 5 #define TIME_INVALID FLT_MAX +#define BSSRDF_RADIUS_TABLE_SIZE 1024 +#define BSSRDF_REFL_TABLE_SIZE 256 +#define BSSRDF_PDF_TABLE_OFFSET (BSSRDF_RADIUS_TABLE_SIZE*BSSRDF_REFL_TABLE_SIZE) +#define BSSRDF_LOOKUP_TABLE_SIZE (BSSRDF_RADIUS_TABLE_SIZE*BSSRDF_REFL_TABLE_SIZE*2) +#define BSSRDF_MIN_RADIUS 1e-8f +#define BSSRDF_MAX_ATTEMPTS 8 + #define TEX_NUM_FLOAT_IMAGES 5 /* device capabilities */ @@ -48,6 +55,7 @@ CCL_NAMESPACE_BEGIN #ifdef WITH_OSL #define __OSL__ #endif +#define __SUBSURFACE__ #endif #ifdef __KERNEL_CUDA__ @@ -423,7 +431,8 @@ typedef enum ShaderContext { SHADER_CONTEXT_INDIRECT = 1, SHADER_CONTEXT_EMISSION = 2, SHADER_CONTEXT_SHADOW = 3, - SHADER_CONTEXT_NUM = 4 + SHADER_CONTEXT_SSS = 4, + SHADER_CONTEXT_NUM = 5 } ShaderContext; /* Shader Data @@ -438,20 +447,23 @@ enum ShaderDataFlag { SD_BSDF = 4, /* have bsdf closure? */ SD_BSDF_HAS_EVAL = 8, /* have non-singular bsdf closure? */ SD_BSDF_GLOSSY = 16, /* have glossy bsdf */ - SD_HOLDOUT = 32, /* have holdout closure? */ - SD_VOLUME = 64, /* have volume closure? */ - SD_AO = 128, /* have ao closure? */ + SD_BSSRDF = 32, /* have bssrdf */ + SD_HOLDOUT = 64, /* have holdout closure? */ + SD_VOLUME = 128, /* have volume closure? */ + SD_AO = 256, /* have ao closure? */ + + SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY|SD_BSSRDF|SD_HOLDOUT|SD_VOLUME|SD_AO), /* shader flags */ - SD_SAMPLE_AS_LIGHT = 256, /* direct light sample */ - SD_HAS_SURFACE_TRANSPARENT = 512, /* has surface transparency */ - SD_HAS_VOLUME = 1024, /* has volume shader */ - SD_HOMOGENEOUS_VOLUME = 2048, /* has homogeneous volume */ + SD_SAMPLE_AS_LIGHT = 512, /* direct light sample */ + SD_HAS_SURFACE_TRANSPARENT = 1024, /* has surface transparency */ + SD_HAS_VOLUME = 2048, /* has volume shader */ + SD_HOMOGENEOUS_VOLUME = 4096, /* has homogeneous volume */ /* object flags */ - SD_HOLDOUT_MASK = 4096, /* holdout for camera rays */ - SD_OBJECT_MOTION = 8192, /* has object motion blur */ - SD_TRANSFORM_APPLIED = 16384 /* vertices have transform applied */ + SD_HOLDOUT_MASK = 8192, /* holdout for camera rays */ + SD_OBJECT_MOTION = 16384, /* has object motion blur */ + SD_TRANSFORM_APPLIED = 32768 /* vertices have transform applied */ }; typedef struct ShaderData { @@ -611,8 +623,9 @@ typedef struct KernelFilm { int pass_shadow; float pass_shadow_scale; - int pass_pad1; - int pass_pad2; + + int filter_table_offset; + int filter_pad; } KernelFilm; typedef struct KernelBackground { @@ -680,6 +693,9 @@ typedef struct KernelIntegrator { int ao_samples; int mesh_light_samples; int use_lamp_mis; + int subsurface_samples; + + int pad1, pad2, pad3; } KernelIntegrator; typedef struct KernelBVH { @@ -711,9 +727,14 @@ typedef struct KernelCurves { float encasing_ratio; int curveflags; int subdivisions; - } KernelCurves; +typedef struct KernelBSSRDF { + int table_offset; + int num_attempts; + int pad1, pad2; +} KernelBSSRDF; + typedef struct KernelData { KernelCamera cam; KernelFilm film; @@ -722,6 +743,7 @@ typedef struct KernelData { KernelIntegrator integrator; KernelBVH bvh; KernelCurves curve_kernel_data; + KernelBSSRDF bssrdf; } KernelData; CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt index 5a27f7823e4..0ce40eda4df 100644 --- a/intern/cycles/kernel/osl/CMakeLists.txt +++ b/intern/cycles/kernel/osl/CMakeLists.txt @@ -18,12 +18,14 @@ set(SRC bsdf_phong_ramp.cpp bsdf_toon.cpp emissive.cpp + osl_bssrdf.cpp osl_closures.cpp osl_services.cpp osl_shader.cpp ) set(HEADER_SRC + osl_bssrdf.h osl_closures.h osl_globals.h osl_services.h diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp new file mode 100644 index 00000000000..ba9b13126ac --- /dev/null +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -0,0 +1,90 @@ +/* + * Adapted from Open Shading Language with this license: + * + * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. + * All Rights Reserved. + * + * Modifications Copyright 2011, Blender Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Sony Pictures Imageworks nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <OpenImageIO/fmath.h> + +#include <OSL/genclosure.h> + +#include "osl_bssrdf.h" +#include "osl_closures.h" + +#include "kernel_types.h" +#include "kernel_montecarlo.h" + +#include "closure/bsdf_diffuse.h" +#include "closure/bssrdf.h" + +CCL_NAMESPACE_BEGIN + +using namespace OSL; + +class BSSRDFClosure : public CBSSRDFClosure { +public: + size_t memsize() const { return sizeof(*this); } + const char *name() const { return "bssrdf_cubic"; } + + void setup() + { + sc.prim = NULL; + sc.data0 = fabsf(average(radius)); + sc.data1 = 1.3f; + + m_shaderdata_flag = bssrdf_setup(&sc); + } + + bool mergeable(const ClosurePrimitive *other) const + { + return false; + } + + void print_on(std::ostream &out) const + { + out << name() << " ((" << sc.N[0] << ", " << sc.N[1] << ", " << sc.N[2] << "))"; + } +}; + +ClosureParam *closure_bssrdf_params() +{ + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(BSSRDFClosure, sc.N), + CLOSURE_FLOAT3_PARAM(BSSRDFClosure, radius), + //CLOSURE_FLOAT_PARAM(BSSRDFClosure, sc.data1), + CLOSURE_STRING_KEYPARAM("label"), + CLOSURE_FINISH_PARAM(BSSRDFClosure) + }; + return params; +} + +CLOSURE_PREPARE(closure_bssrdf_prepare, BSSRDFClosure) + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/osl/osl_bssrdf.h b/intern/cycles/kernel/osl/osl_bssrdf.h new file mode 100644 index 00000000000..54df055405e --- /dev/null +++ b/intern/cycles/kernel/osl/osl_bssrdf.h @@ -0,0 +1,65 @@ +/* + * Adapted from Open Shading Language with this license: + * + * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. + * All Rights Reserved. + * + * Modifications Copyright 2011, Blender Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Sony Pictures Imageworks nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __OSL_BSSRDF_H__ +#define __OSL_BSSRDF_H__ + +#include <OSL/oslclosure.h> +#include <OSL/oslexec.h> +#include <OSL/genclosure.h> + +#include "kernel_types.h" + +#include "util_types.h" + +CCL_NAMESPACE_BEGIN + +class CBSSRDFClosure : public OSL::ClosurePrimitive { +public: + ShaderClosure sc; + float3 radius; + + CBSSRDFClosure() : OSL::ClosurePrimitive(BSSRDF), + m_shaderdata_flag(0) { } + ~CBSSRDFClosure() { } + + int scattering() const { return LABEL_DIFFUSE; } + int shaderdata_flag() const { return m_shaderdata_flag; } + +protected: + int m_shaderdata_flag; +}; + +CCL_NAMESPACE_END + +#endif /* __OSL_BSSRDF_H__ */ + diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 9e65cda1e8f..9ce11ca1207 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -201,6 +201,8 @@ void OSLShader::register_closures(OSLShadingSystem *ss_) closure_bsdf_diffuse_toon_params(), closure_bsdf_diffuse_toon_prepare); register_closure(ss, "specular_toon", id++, closure_bsdf_specular_toon_params(), closure_bsdf_specular_toon_prepare); + register_closure(ss, "bssrdf_cubic", id++, + closure_bssrdf_params(), closure_bssrdf_prepare); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h index daccc03ede2..d0e25bb2b0c 100644 --- a/intern/cycles/kernel/osl/osl_closures.h +++ b/intern/cycles/kernel/osl/osl_closures.h @@ -51,6 +51,7 @@ OSL::ClosureParam *closure_bsdf_diffuse_ramp_params(); OSL::ClosureParam *closure_bsdf_phong_ramp_params(); OSL::ClosureParam *closure_bsdf_diffuse_toon_params(); OSL::ClosureParam *closure_bsdf_specular_toon_params(); +OSL::ClosureParam *closure_bssrdf_params(); void closure_emission_prepare(OSL::RendererServices *, int id, void *data); void closure_background_prepare(OSL::RendererServices *, int id, void *data); @@ -60,6 +61,7 @@ void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *da void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data); void closure_bsdf_diffuse_toon_prepare(OSL::RendererServices *, int id, void *data); void closure_bsdf_specular_toon_prepare(OSL::RendererServices *, int id, void *data); +void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data); enum { AmbientOcclusion = 100 diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index a32c526a2be..555edf598f1 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -21,6 +21,7 @@ #include "kernel_globals.h" #include "kernel_object.h" +#include "osl_bssrdf.h" #include "osl_closures.h" #include "osl_globals.h" #include "osl_services.h" @@ -201,7 +202,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy, } break; } - case OSL::ClosurePrimitive::Holdout: + case OSL::ClosurePrimitive::Holdout: { sc.sample_weight = 0.0f; sc.type = CLOSURE_HOLDOUT_ID; sc.prim = NULL; @@ -211,7 +212,43 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy, sd->flag |= SD_HOLDOUT; } break; - case OSL::ClosurePrimitive::BSSRDF: + } + case OSL::ClosurePrimitive::BSSRDF: { + CBSSRDFClosure *bssrdf = (CBSSRDFClosure *)prim; + float sample_weight = fabsf(average(weight)); + + if(sample_weight > 1e-5f && sd->num_closure+2 < MAX_CLOSURE) { + sc.sample_weight = sample_weight; + + sc.type = bssrdf->sc.type; + sc.N = bssrdf->sc.N; + sc.data1 = bssrdf->sc.data1; + sc.prim = NULL; + + /* create one closure for each color channel */ + if(fabsf(weight.x) > 0.0f) { + sc.weight = make_float3(weight.x, 0.0f, 0.0f); + sc.data0 = bssrdf->radius.x; + sd->closure[sd->num_closure++] = sc; + sd->flag |= bssrdf->shaderdata_flag(); + } + + if(fabsf(weight.y) > 0.0f) { + sc.weight = make_float3(0.0f, weight.y, 0.0f); + sc.data0 = bssrdf->radius.y; + sd->closure[sd->num_closure++] = sc; + sd->flag |= bssrdf->shaderdata_flag(); + } + + if(fabsf(weight.z) > 0.0f) { + sc.weight = make_float3(0.0f, 0.0f, weight.z); + sc.data0 = bssrdf->radius.z; + sd->closure[sd->num_closure++] = sc; + sd->flag |= bssrdf->shaderdata_flag(); + } + } + break; + } case OSL::ClosurePrimitive::Debug: break; /* not implemented */ case OSL::ClosurePrimitive::Background: diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt index acae46f1615..0cff264d8e1 100644 --- a/intern/cycles/kernel/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/shaders/CMakeLists.txt @@ -55,6 +55,7 @@ set(SRC_OSL node_separate_rgb.osl node_set_normal.osl node_sky_texture.osl + node_subsurface_scattering.osl node_tangent.osl node_texture_coordinate.osl node_translucent_bsdf.osl diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl new file mode 100644 index 00000000000..5c25c44ec8f --- /dev/null +++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl @@ -0,0 +1,33 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "stdosl.h" + +shader node_subsurface_scattering( + color Color = 0.8, + float Scale = 1.0, + vector Radius = vector(0.1, 0.1, 0.1), + float IOR = 1.3, + normal Normal = N, + output closure color BSSRDF = 0) +{ + float eta = max(IOR, 1.0 + 1e-5); + + BSSRDF = Color * bssrdf_cubic(N, Scale * Radius); +} + diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index f340eaff95f..010d6ddd200 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -461,6 +461,7 @@ closure color emission() BUILTIN; closure color background() BUILTIN; closure color holdout() BUILTIN; closure color ambient_occlusion() BUILTIN; +closure color bssrdf_cubic(normal N, vector radius) BUILTIN; // Renderer state int raytype (string typename) BUILTIN; diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index b5bd2b42cb4..72e6a047158 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -25,6 +25,7 @@ __device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type, if(type == CLOSURE_BSDF_SHARP_GLASS_ID) { if(refract) { sc->data0 = eta; + sc->data1 = 0.0f; sd->flag |= bsdf_refraction_setup(sc); } else @@ -58,6 +59,9 @@ __device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, Clo if(sd->num_closure < MAX_CLOSURE) { sc->weight *= mix_weight; sc->type = type; +#ifdef __OSL__ + sc->prim = NULL; +#endif sd->num_closure++; return sc; } @@ -79,6 +83,9 @@ __device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float m sc->weight = weight; sc->sample_weight = sample_weight; sd->num_closure++; +#ifdef __OSL__ + sc->prim = NULL; +#endif return sc; } @@ -125,10 +132,13 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st float roughness = param1; if(roughness == 0.0f) { + sc->data0 = 0.0f; + sc->data1 = 0.0f; sd->flag |= bsdf_diffuse_setup(sc); } else { sc->data0 = roughness; + sc->data1 = 0.0f; sd->flag |= bsdf_oren_nayar_setup(sc); } } @@ -138,6 +148,8 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); if(sc) { + sc->data0 = 0.0f; + sc->data1 = 0.0f; sc->N = N; sd->flag |= bsdf_translucent_setup(sc); } @@ -147,6 +159,8 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); if(sc) { + sc->data0 = 0.0f; + sc->data1 = 0.0f; sc->N = N; sd->flag |= bsdf_transparent_setup(sc); } @@ -164,6 +178,7 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st if(sc) { sc->N = N; sc->data0 = param1; + sc->data1 = 0.0f; /* setup bsdf */ if(type == CLOSURE_BSDF_REFLECTION_ID) @@ -302,10 +317,73 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st /* sigma */ sc->data0 = clamp(param1, 0.0f, 1.0f); + sc->data1 = 0.0f; sd->flag |= bsdf_ashikhmin_velvet_setup(sc); } break; } +#ifdef __SUBSURFACE__ + case CLOSURE_BSSRDF_ID: { + ShaderClosure *sc = &sd->closure[sd->num_closure]; + float3 weight = sc->weight * mix_weight; + float sample_weight = fabsf(average(weight)); + + if(sample_weight > 1e-5f && sd->num_closure+2 < MAX_CLOSURE) { + /* radius * scale */ + float3 radius = stack_load_float3(stack, data_node.w)*param1; + /* index of refraction */ + float eta = fmaxf(param2, 1.0f + 1e-5f); + + /* create one closure per color channel */ + if(fabsf(weight.x) > 0.0f) { + sc->weight = make_float3(weight.x, 0.0f, 0.0f); + sc->sample_weight = sample_weight; + sc->data0 = radius.x; + sc->data1 = eta; +#ifdef __OSL__ + sc->prim = NULL; +#endif + sc->N = N; + sd->flag |= bssrdf_setup(sc); + + sd->num_closure++; + sc++; + } + + if(fabsf(weight.y) > 0.0f) { + sc->weight = make_float3(0.0f, weight.y, 0.0f); + sc->sample_weight = sample_weight; + sc->data0 = radius.y; + sc->data1 = eta; +#ifdef __OSL__ + sc->prim = NULL; +#endif + sc->N = N; + sd->flag |= bssrdf_setup(sc); + + sd->num_closure++; + sc++; + } + + if(fabsf(weight.z) > 0.0f) { + sc->weight = make_float3(0.0f, 0.0f, weight.z); + sc->sample_weight = sample_weight; + sc->data0 = radius.z; + sc->data1 = eta; +#ifdef __OSL__ + sc->prim = NULL; +#endif + sc->N = N; + sd->flag |= bssrdf_setup(sc); + + sd->num_closure++; + sc++; + } + } + + break; + } +#endif default: break; } diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 57177eec48f..70d73f98498 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -346,12 +346,11 @@ typedef enum ClosureType { CLOSURE_BSDF_TRANSPARENT_ID, - CLOSURE_BSSRDF_CUBIC_ID, + CLOSURE_BSSRDF_ID, CLOSURE_EMISSION_ID, CLOSURE_DEBUG_ID, CLOSURE_BACKGROUND_ID, CLOSURE_HOLDOUT_ID, - CLOSURE_SUBSURFACE_ID, CLOSURE_AMBIENT_OCCLUSION_ID, CLOSURE_VOLUME_ID, @@ -366,6 +365,7 @@ typedef enum ClosureType { #define CLOSURE_IS_BSDF_DIFFUSE(type) (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_OREN_NAYAR_ID) #define CLOSURE_IS_BSDF_GLOSSY(type) (type >= CLOSURE_BSDF_GLOSSY_ID && type <= CLOSURE_BSDF_PHONG_RAMP_ID) #define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID) +#define CLOSURE_IS_BSSRDF(type) (type == CLOSURE_BSSRDF_ID) #define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_ISOTROPIC_ID) #define CLOSURE_IS_EMISSION(type) (type == CLOSURE_EMISSION_ID) #define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID) diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index d67a686d1e8..e06364c6715 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -17,10 +17,10 @@ set(SRC attribute.cpp background.cpp buffers.cpp + bssrdf.cpp camera.cpp film.cpp # film_response.cpp (code unused) - filter.cpp graph.cpp image.cpp integrator.cpp @@ -37,6 +37,7 @@ set(SRC shader.cpp sobol.cpp svm.cpp + tables.cpp tile.cpp ) @@ -44,10 +45,10 @@ set(SRC_HEADERS attribute.h background.h buffers.h + bssrdf.h camera.h film.h # film_response.h (code unused) - filter.h graph.h image.h integrator.h @@ -63,6 +64,7 @@ set(SRC_HEADERS shader.h sobol.h svm.h + tables.h tile.h ) diff --git a/intern/cycles/render/bssrdf.cpp b/intern/cycles/render/bssrdf.cpp new file mode 100644 index 00000000000..f3f889f071a --- /dev/null +++ b/intern/cycles/render/bssrdf.cpp @@ -0,0 +1,141 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "bssrdf.h" + +#include "util_algorithm.h" +#include "util_math.h" +#include "util_types.h" + +#include "kernel_types.h" +#include "kernel_montecarlo.h" + +#include "closure/bsdf_diffuse.h" +#include "closure/bssrdf.h" + +CCL_NAMESPACE_BEGIN + +/* Cumulative density function utilities */ + +static float cdf_lookup_inverse(const vector<float>& table, float2 range, float x) +{ + int index = upper_bound(table.begin(), table.end(), x) - table.begin(); + + if(index == 0) + return range[0]; + else if(index == table.size()) + return range[1]; + else + index--; + + float t = (x - table[index])/(table[index+1] - table[index]); + float y = ((index + t)/(table.size() - 1)); + + return y*(range[1] - range[0]) + range[0]; +} + +static void cdf_invert(vector<float>& to, float2 to_range, const vector<float>& from, float2 from_range) +{ + float step = 1.0f/(float)(to.size() - 1); + + for(int i = 0; i < to.size(); i++) { + float x = (i*step)*(from_range[1] - from_range[0]) + from_range[0]; + to[i] = cdf_lookup_inverse(from, to_range, x); + } +} + +/* BSSRDF */ + +static float bssrdf_lookup_table_max_radius(const BSSRDFParams *ss) +{ + /* todo: adjust when we use the real BSSRDF */ + return ss->ld; +} + +static void bssrdf_lookup_table_create(const BSSRDFParams *ss, vector<float>& sample_table, vector<float>& pdf_table) +{ + const int size = BSSRDF_RADIUS_TABLE_SIZE; + vector<float> cdf(size); + vector<float> pdf(size); + float step = 1.0f/(float)(size - 1); + float max_radius = bssrdf_lookup_table_max_radius(ss); + float pdf_sum = 0.0f; + + /* compute the probability density function */ + for(int i = 0; i < pdf.size(); i++) { + float x = (i*step)*max_radius; + pdf[i] = bssrdf_cubic(ss->ld, x); + pdf_sum += pdf[i]; + } + + /* adjust for area covered by each distance */ + for(int i = 0; i < pdf.size(); i++) { + float x = (i*step)*max_radius; + pdf[i] *= 2*M_PI_F*x; + } + + /* normalize pdf, we multiply in reflectance later */ + if(pdf_sum > 0.0f) + for(int i = 0; i < pdf.size(); i++) + pdf[i] /= pdf_sum; + + /* sum to account for sampling which uses overlapping sphere */ + for(int i = pdf.size() - 2; i >= 0; i--) + pdf[i] = pdf[i] + pdf[i+1]; + + /* compute the cumulative density function */ + cdf[0] = 0.0f; + + for(int i = 1; i < size; i++) + cdf[i] = cdf[i-1] + 0.5f*(pdf[i-1] + pdf[i])*step*max_radius; + + /* invert cumulative density function for importance sampling */ + float2 cdf_range = make_float2(0.0f, cdf[size - 1]); + float2 table_range = make_float2(0.0f, max_radius); + + cdf_invert(sample_table, table_range, cdf, cdf_range); + + /* copy pdf table */ + for(int i = 0; i < pdf.size(); i++) + pdf_table[i] = pdf[i]; +} + +void bssrdf_table_build(vector<float>& table) +{ + vector<float> sample_table(BSSRDF_RADIUS_TABLE_SIZE); + vector<float> pdf_table(BSSRDF_RADIUS_TABLE_SIZE); + + table.resize(BSSRDF_LOOKUP_TABLE_SIZE); + + /* create a 2D lookup table, for reflection x sample radius */ + for(int i = 0; i < BSSRDF_REFL_TABLE_SIZE; i++) { + float refl = (float)i/(float)(BSSRDF_REFL_TABLE_SIZE-1); + float ior = 1.3f; + float radius = 1.0f; + + BSSRDFParams ss; + bssrdf_setup_params(&ss, refl, radius, ior); + bssrdf_lookup_table_create(&ss, sample_table, pdf_table); + + memcpy(&table[i*BSSRDF_RADIUS_TABLE_SIZE], &sample_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float)); + memcpy(&table[BSSRDF_PDF_TABLE_OFFSET + i*BSSRDF_RADIUS_TABLE_SIZE], &pdf_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float)); + } +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/render/bssrdf.h b/intern/cycles/render/bssrdf.h new file mode 100644 index 00000000000..975ac0b46ec --- /dev/null +++ b/intern/cycles/render/bssrdf.h @@ -0,0 +1,31 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __BSSRDF_H__ +#define __BSSRDF_H__ + +#include "util_vector.h" + +CCL_NAMESPACE_BEGIN + +void bssrdf_table_build(vector<float>& table); + +CCL_NAMESPACE_END + +#endif /* __BSSRDF_H__ */ + diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index fdf25ca7908..9fc6e867166 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -22,9 +22,12 @@ #include "integrator.h" #include "mesh.h" #include "scene.h" +#include "tables.h" #include "util_algorithm.h" +#include "util_debug.h" #include "util_foreach.h" +#include "util_math.h" CCL_NAMESPACE_BEGIN @@ -171,12 +174,84 @@ bool Pass::contains(const vector<Pass>& passes, PassType type) return false; } +/* Pixel Filter */ + +static float filter_func_box(float v, float width) +{ + return (float)1; +} + +static float filter_func_gaussian(float v, float width) +{ + v *= (float)2/width; + return (float)expf((float)-2*v*v); +} + +static vector<float> filter_table(FilterType type, float width) +{ + const int filter_table_size = FILTER_TABLE_SIZE-1; + vector<float> filter_table_cdf(filter_table_size+1); + vector<float> filter_table(filter_table_size+1); + float (*filter_func)(float, float) = NULL; + int i, half_size = filter_table_size/2; + + switch(type) { + case FILTER_BOX: + filter_func = filter_func_box; + break; + case FILTER_GAUSSIAN: + filter_func = filter_func_gaussian; + break; + default: + assert(0); + } + + /* compute cumulative distribution function */ + filter_table_cdf[0] = 0.0f; + + for(i = 0; i < filter_table_size; i++) { + float x = i*width*0.5f/(filter_table_size-1); + float y = filter_func(x, width); + filter_table_cdf[i+1] += filter_table_cdf[i] + fabsf(y); + } + + for(i = 0; i <= filter_table_size; i++) + filter_table_cdf[i] /= filter_table_cdf[filter_table_size]; + + /* create importance sampling table */ + for(i = 0; i <= half_size; i++) { + float x = i/(float)half_size; + int index = upper_bound(filter_table_cdf.begin(), filter_table_cdf.end(), x) - filter_table_cdf.begin(); + float t; + + if(index < filter_table_size+1) { + t = (x - filter_table_cdf[index])/(filter_table_cdf[index+1] - filter_table_cdf[index]); + } + else { + t = 0.0f; + index = filter_table_size; + } + + float y = ((index + t)/(filter_table_size))*width; + + filter_table[half_size+i] = 0.5f*(1.0f + y); + filter_table[half_size-i] = 0.5f*(1.0f - y); + } + + return filter_table; +} + /* Film */ Film::Film() { exposure = 0.8f; Pass::add(PASS_COMBINED, passes); + + filter_type = FILTER_BOX; + filter_width = 1.0f; + filter_table_offset = TABLE_OFFSET_INVALID; + need_update = true; } @@ -184,10 +259,12 @@ Film::~Film() { } -void Film::device_update(Device *device, DeviceScene *dscene) +void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) { if(!need_update) return; + + device_free(device, dscene, scene); KernelFilm *kfilm = &dscene->data.film; @@ -284,17 +361,28 @@ void Film::device_update(Device *device, DeviceScene *dscene) kfilm->pass_stride = align_up(kfilm->pass_stride, 4); + /* update filter table */ + vector<float> table = filter_table(filter_type, filter_width); + filter_table_offset = scene->lookup_tables->add_table(dscene, table); + kfilm->filter_table_offset = (int)filter_table_offset; + need_update = false; } -void Film::device_free(Device *device, DeviceScene *dscene) +void Film::device_free(Device *device, DeviceScene *dscene, Scene *scene) { + if(filter_table_offset != TABLE_OFFSET_INVALID) { + scene->lookup_tables->remove_table(filter_table_offset); + filter_table_offset = TABLE_OFFSET_INVALID; + } } bool Film::modified(const Film& film) { return !(exposure == film.exposure - && Pass::equals(passes, film.passes)); + && Pass::equals(passes, film.passes) + && filter_type == film.filter_type + && filter_width == film.filter_width); } void Film::tag_passes_update(Scene *scene, const vector<Pass>& passes_) diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h index 52d1a8428f8..bc1619c3f2d 100644 --- a/intern/cycles/render/film.h +++ b/intern/cycles/render/film.h @@ -30,6 +30,11 @@ class Device; class DeviceScene; class Scene; +typedef enum FilterType { + FILTER_BOX, + FILTER_GAUSSIAN +} FilterType; + class Pass { public: PassType type; @@ -47,13 +52,18 @@ class Film { public: float exposure; vector<Pass> passes; + + FilterType filter_type; + float filter_width; + size_t filter_table_offset; + bool need_update; Film(); ~Film(); - void device_update(Device *device, DeviceScene *dscene); - void device_free(Device *device, DeviceScene *dscene); + void device_update(Device *device, DeviceScene *dscene, Scene *scene); + void device_free(Device *device, DeviceScene *dscene, Scene *scene); bool modified(const Film& film); void tag_passes_update(Scene *scene, const vector<Pass>& passes_); diff --git a/intern/cycles/render/filter.cpp b/intern/cycles/render/filter.cpp deleted file mode 100644 index 0bd4fb4d579..00000000000 --- a/intern/cycles/render/filter.cpp +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright 2011, Blender Foundation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "camera.h" -#include "device.h" -#include "filter.h" -#include "scene.h" - -#include "kernel_types.h" - -#include "util_algorithm.h" -#include "util_debug.h" -#include "util_math.h" - -CCL_NAMESPACE_BEGIN - -Filter::Filter() -{ - filter_type = FILTER_BOX; - filter_width = 1.0f; - need_update = true; -} - -Filter::~Filter() -{ -} - -static float filter_func_box(float v, float width) -{ - return (float)1; -} - -static float filter_func_gaussian(float v, float width) -{ - v *= (float)2/width; - return (float)expf((float)-2*v*v); -} - -static vector<float> filter_table(FilterType type, float width) -{ - const int filter_table_size = FILTER_TABLE_SIZE-1; - vector<float> filter_table_cdf(filter_table_size+1); - vector<float> filter_table(filter_table_size+1); - float (*filter_func)(float, float) = NULL; - int i, half_size = filter_table_size/2; - - switch(type) { - case FILTER_BOX: - filter_func = filter_func_box; - break; - case FILTER_GAUSSIAN: - filter_func = filter_func_gaussian; - break; - default: - assert(0); - } - - /* compute cumulative distribution function */ - filter_table_cdf[0] = 0.0f; - - for(i = 0; i < filter_table_size; i++) { - float x = i*width*0.5f/(filter_table_size-1); - float y = filter_func(x, width); - filter_table_cdf[i+1] += filter_table_cdf[i] + fabsf(y); - } - - for(i = 0; i <= filter_table_size; i++) - filter_table_cdf[i] /= filter_table_cdf[filter_table_size]; - - /* create importance sampling table */ - for(i = 0; i <= half_size; i++) { - float x = i/(float)half_size; - int index = upper_bound(filter_table_cdf.begin(), filter_table_cdf.end(), x) - filter_table_cdf.begin(); - float t; - - if(index < filter_table_size+1) { - t = (x - filter_table_cdf[index])/(filter_table_cdf[index+1] - filter_table_cdf[index]); - } - else { - t = 0.0f; - index = filter_table_size; - } - - float y = ((index + t)/(filter_table_size))*width; - - filter_table[half_size+i] = 0.5f*(1.0f + y); - filter_table[half_size-i] = 0.5f*(1.0f - y); - } - - return filter_table; -} - -void Filter::device_update(Device *device, DeviceScene *dscene) -{ - if(!need_update) - return; - - device_free(device, dscene); - - /* update __filter_table */ - vector<float> table = filter_table(filter_type, filter_width); - - dscene->filter_table.copy(&table[0], table.size()); - device->tex_alloc("__filter_table", dscene->filter_table, true); - - need_update = false; -} - -void Filter::device_free(Device *device, DeviceScene *dscene) -{ - device->tex_free(dscene->filter_table); - dscene->filter_table.clear(); -} - -bool Filter::modified(const Filter& filter) -{ - return !(filter_type == filter.filter_type && - filter_width == filter.filter_width); -} - -void Filter::tag_update(Scene *scene) -{ - need_update = true; -} - -CCL_NAMESPACE_END - diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h index c6b9ae08508..46043cf85d2 100644 --- a/intern/cycles/render/graph.h +++ b/intern/cycles/render/graph.h @@ -187,6 +187,7 @@ public: virtual bool has_surface_emission() { return false; } virtual bool has_surface_transparent() { return false; } + virtual bool has_surface_bssrdf() { return false; } vector<ShaderInput*> inputs; vector<ShaderOutput*> outputs; diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 699e6979990..00039170733 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -54,6 +54,7 @@ Integrator::Integrator() transmission_samples = 1; ao_samples = 1; mesh_light_samples = 1; + subsurface_samples = 1; progressive = true; need_update = true; @@ -108,6 +109,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->transmission_samples = transmission_samples; kintegrator->ao_samples = ao_samples; kintegrator->mesh_light_samples = mesh_light_samples; + kintegrator->subsurface_samples = subsurface_samples; /* sobol directions table */ int max_samples = 1; @@ -163,6 +165,7 @@ bool Integrator::modified(const Integrator& integrator) transmission_samples == integrator.transmission_samples && ao_samples == integrator.ao_samples && mesh_light_samples == integrator.mesh_light_samples && + subsurface_samples == integrator.subsurface_samples && motion_blur == integrator.motion_blur); } diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 8fb341182b7..9867e310d4d 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -54,6 +54,7 @@ public: int transmission_samples; int ao_samples; int mesh_light_samples; + int subsurface_samples; bool progressive; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index a4ffc2518fb..b4ff6e3152b 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -1262,16 +1262,19 @@ void ProxyNode::compile(OSLCompiler& compiler) /* BSDF Closure */ -BsdfNode::BsdfNode() -: ShaderNode("bsdf") +BsdfNode::BsdfNode(bool scattering_) +: ShaderNode("subsurface_scattering"), scattering(scattering_) { - closure = ccl::CLOSURE_BSDF_DIFFUSE_ID; + closure = ccl::CLOSURE_BSSRDF_ID; add_input("Color", SHADER_SOCKET_COLOR, make_float3(0.8f, 0.8f, 0.8f)); add_input("Normal", SHADER_SOCKET_NORMAL, ShaderInput::NORMAL); add_input("SurfaceMixWeight", SHADER_SOCKET_FLOAT, 0.0f, ShaderInput::USE_SVM); - add_output("BSDF", SHADER_SOCKET_CLOSURE); + if(scattering) + add_output("BSSRDF", SHADER_SOCKET_CLOSURE); + else + add_output("BSDF", SHADER_SOCKET_CLOSURE); } void BsdfNode::compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3) @@ -1313,7 +1316,8 @@ void BsdfNode::compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput * (param3)? param3->stack_offset: SVM_STACK_INVALID); } else { - compiler.add_node(NODE_CLOSURE_BSDF, normal_in->stack_offset); + compiler.add_node(NODE_CLOSURE_BSDF, normal_in->stack_offset, SVM_STACK_INVALID, + (param3)? param3->stack_offset: SVM_STACK_INVALID); } } @@ -1548,6 +1552,29 @@ void TransparentBsdfNode::compile(OSLCompiler& compiler) compiler.add(this, "node_transparent_bsdf"); } +/* Subsurface Scattering Closure */ + +SubsurfaceScatteringNode::SubsurfaceScatteringNode() +: BsdfNode(true) +{ + name = "subsurface_scattering"; + closure = CLOSURE_BSSRDF_ID; + + add_input("Scale", SHADER_SOCKET_FLOAT, 0.01f); + add_input("Radius", SHADER_SOCKET_VECTOR, make_float3(0.1f, 0.1f, 0.1f)); + add_input("IOR", SHADER_SOCKET_FLOAT, 1.3f); +} + +void SubsurfaceScatteringNode::compile(SVMCompiler& compiler) +{ + BsdfNode::compile(compiler, input("Scale"), input("IOR"), input("Radius")); +} + +void SubsurfaceScatteringNode::compile(OSLCompiler& compiler) +{ + compiler.add(this, "node_subsurface_scattering"); +} + /* Emissive Closure */ EmissionNode::EmissionNode() diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index 1efe4ae076d..0d9f84327d0 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -198,11 +198,13 @@ public: class BsdfNode : public ShaderNode { public: - SHADER_NODE_CLASS(BsdfNode) + BsdfNode(bool scattering = false); + SHADER_NODE_BASE_CLASS(BsdfNode); void compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3 = NULL); ClosureType closure; + bool scattering; }; class WardBsdfNode : public BsdfNode { @@ -257,6 +259,12 @@ public: static ShaderEnum distribution_enum; }; +class SubsurfaceScatteringNode : public BsdfNode { +public: + SHADER_NODE_CLASS(SubsurfaceScatteringNode) + bool has_surface_bssrdf() { return true; } +}; + class EmissionNode : public ShaderNode { public: SHADER_NODE_CLASS(EmissionNode) diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index f5585babf5c..cefb6315725 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -73,7 +73,7 @@ void OSLShaderManager::device_update(Device *device, DeviceScene *dscene, Scene if(!need_update) return; - device_free(device, dscene); + device_free(device, dscene, scene); /* determine which shaders are in use */ device_update_shaders_used(scene); @@ -114,11 +114,11 @@ void OSLShaderManager::device_update(Device *device, DeviceScene *dscene, Scene device_update_common(device, dscene, scene, progress); } -void OSLShaderManager::device_free(Device *device, DeviceScene *dscene) +void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene) { OSLGlobals *og = (OSLGlobals*)device->osl_memory(); - device_free_common(device, dscene); + device_free_common(device, dscene, scene); /* clear shader engine */ og->use = false; @@ -328,6 +328,7 @@ const char *OSLShaderManager::shader_load_bytecode(const string& hash, const str OSLShaderInfo info; info.has_surface_emission = (bytecode.find("\"emission\"") != string::npos); info.has_surface_transparent = (bytecode.find("\"transparent\"") != string::npos); + info.has_surface_bssrdf = (bytecode.find("\"bssrdf\"") != string::npos); loaded_shaders[hash] = info; return loaded_shaders.find(hash)->first.c_str(); @@ -511,6 +512,8 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath) current_shader->has_surface_emission = true; if(info->has_surface_transparent) current_shader->has_surface_transparent = true; + if(info->has_surface_bssrdf) + current_shader->has_surface_bssrdf = true; } } @@ -671,6 +674,8 @@ void OSLCompiler::generate_nodes(const set<ShaderNode*>& nodes) current_shader->has_surface_emission = true; if(node->has_surface_transparent()) current_shader->has_surface_transparent = true; + if(node->has_surface_bssrdf()) + current_shader->has_surface_bssrdf = true; } else nodes_done = false; @@ -736,6 +741,7 @@ void OSLCompiler::compile(OSLGlobals *og, Shader *shader) shader->has_surface = false; shader->has_surface_emission = false; shader->has_surface_transparent = false; + shader->has_surface_bssrdf = false; shader->has_volume = false; shader->has_displacement = false; diff --git a/intern/cycles/render/osl.h b/intern/cycles/render/osl.h index 4b4ed6cba00..2d3996df0eb 100644 --- a/intern/cycles/render/osl.h +++ b/intern/cycles/render/osl.h @@ -50,11 +50,13 @@ class ShaderOutput; struct OSLShaderInfo { OSLShaderInfo() - : has_surface_emission(false), has_surface_transparent(false) + : has_surface_emission(false), has_surface_transparent(false), + has_surface_bssrdf(false) {} bool has_surface_emission; bool has_surface_transparent; + bool has_surface_bssrdf; }; /* Shader Manage */ @@ -69,7 +71,7 @@ public: bool use_osl() { return true; } void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress); - void device_free(Device *device, DeviceScene *dscene); + void device_free(Device *device, DeviceScene *dscene, Scene *scene); /* osl compile and query */ static bool osl_compile(const string& inputfile, const string& outputfile); diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 7b82a91cae8..a6dca62ffd0 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -20,19 +20,19 @@ #include "background.h" #include "camera.h" +#include "curves.h" #include "device.h" #include "film.h" -#include "filter.h" #include "integrator.h" #include "light.h" -#include "shader.h" #include "mesh.h" #include "object.h" +#include "osl.h" #include "particles.h" -#include "curves.h" #include "scene.h" +#include "shader.h" #include "svm.h" -#include "osl.h" +#include "tables.h" #include "util_foreach.h" #include "util_progress.h" @@ -46,7 +46,7 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_) memset(&dscene.data, 0, sizeof(dscene.data)); camera = new Camera(); - filter = new Filter(); + lookup_tables = new LookupTables(); film = new Film(); background = new Background(); light_manager = new LightManager(); @@ -93,14 +93,13 @@ void Scene::free_memory(bool final) if(device) { camera->device_free(device, &dscene); - filter->device_free(device, &dscene); - film->device_free(device, &dscene); + film->device_free(device, &dscene, this); background->device_free(device, &dscene); integrator->device_free(device, &dscene); object_manager->device_free(device, &dscene); mesh_manager->device_free(device, &dscene); - shader_manager->device_free(device, &dscene); + shader_manager->device_free(device, &dscene, this); light_manager->device_free(device, &dscene); particle_system_manager->device_free(device, &dscene); @@ -108,10 +107,12 @@ void Scene::free_memory(bool final) if(!params.persistent_data || final) image_manager->device_free(device, &dscene); + + lookup_tables->device_free(device, &dscene); } if(final) { - delete filter; + delete lookup_tables; delete camera; delete film; delete background; @@ -188,13 +189,8 @@ void Scene::device_update(Device *device_, Progress& progress) if(progress.get_cancel()) return; - progress.set_status("Updating Filter"); - filter->device_update(device, &dscene); - - if(progress.get_cancel()) return; - progress.set_status("Updating Film"); - film->device_update(device, &dscene); + film->device_update(device, &dscene, this); if(progress.get_cancel()) return; @@ -203,6 +199,11 @@ void Scene::device_update(Device *device_, Progress& progress) if(progress.get_cancel()) return; + progress.set_status("Updating Lookup Tables"); + lookup_tables->device_update(device, &dscene); + + if(progress.get_cancel()) return; + progress.set_status("Updating Device", "Writing constant memory"); device->const_copy_to("__data", &dscene.data, sizeof(dscene.data)); } @@ -247,7 +248,7 @@ bool Scene::need_reset() || object_manager->need_update || mesh_manager->need_update || light_manager->need_update - || filter->need_update + || lookup_tables->need_update || integrator->need_update || shader_manager->need_update || particle_system_manager->need_update @@ -261,7 +262,6 @@ void Scene::reset() /* ensure all objects are updated */ camera->tag_update(); - filter->tag_update(this); film->tag_update(this); background->tag_update(this); integrator->tag_update(this); diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index fc6b538af03..545a765cc22 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -39,10 +39,10 @@ class Camera; class Device; class DeviceInfo; class Film; -class Filter; class Integrator; class Light; class LightManager; +class LookupTables; class Mesh; class MeshManager; class Object; @@ -99,8 +99,8 @@ public: device_vector<uint> shader_flag; device_vector<uint> object_flag; - /* filter */ - device_vector<float> filter_table; + /* lookup tables */ + device_vector<float> lookup_table; /* integrator */ device_vector<uint> sobol_directions; @@ -155,7 +155,7 @@ class Scene { public: /* data */ Camera *camera; - Filter *filter; + LookupTables *lookup_tables; Film *film; Background *background; Integrator *integrator; diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index b9b49bf2989..c7f39b4151a 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -16,6 +16,7 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include "bssrdf.h" #include "device.h" #include "graph.h" #include "light.h" @@ -25,6 +26,7 @@ #include "scene.h" #include "shader.h" #include "svm.h" +#include "tables.h" #include "util_foreach.h" @@ -46,6 +48,7 @@ Shader::Shader() has_surface = false; has_surface_transparent = false; has_surface_emission = false; + has_surface_bssrdf = false; has_volume = false; has_displacement = false; @@ -115,6 +118,7 @@ void Shader::tag_used(Scene *scene) ShaderManager::ShaderManager() { need_update = true; + bssrdf_table_offset = TABLE_OFFSET_INVALID; } ShaderManager::~ShaderManager() @@ -196,7 +200,8 @@ void ShaderManager::device_update_shaders_used(Scene *scene) void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - device_free_common(device, dscene); + device->tex_free(dscene->shader_flag); + dscene->shader_flag.clear(); if(scene->shaders.size() == 0) return; @@ -204,6 +209,7 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc uint shader_flag_size = scene->shaders.size()*4; uint *shader_flag = dscene->shader_flag.resize(shader_flag_size); uint i = 0; + bool has_surface_bssrdf = false; foreach(Shader *shader, scene->shaders) { uint flag = 0; @@ -216,6 +222,8 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc flag |= SD_HAS_VOLUME; if(shader->homogeneous_volume) flag |= SD_HOMOGENEOUS_VOLUME; + if(shader->has_surface_bssrdf) + has_surface_bssrdf = true; shader_flag[i++] = flag; shader_flag[i++] = shader->pass_id; @@ -224,10 +232,32 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc } device->tex_alloc("__shader_flag", dscene->shader_flag); + + /* bssrdf lookup table */ + KernelBSSRDF *kbssrdf = &dscene->data.bssrdf; + + if(has_surface_bssrdf && bssrdf_table_offset == TABLE_OFFSET_INVALID) { + vector<float> table; + + bssrdf_table_build(table); + bssrdf_table_offset = scene->lookup_tables->add_table(dscene, table); + + kbssrdf->table_offset = (int)bssrdf_table_offset; + kbssrdf->num_attempts = BSSRDF_MAX_ATTEMPTS; + } + else if(!has_surface_bssrdf && bssrdf_table_offset != TABLE_OFFSET_INVALID) { + scene->lookup_tables->remove_table(bssrdf_table_offset); + bssrdf_table_offset = TABLE_OFFSET_INVALID; + } } -void ShaderManager::device_free_common(Device *device, DeviceScene *dscene) +void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scene *scene) { + if(bssrdf_table_offset != TABLE_OFFSET_INVALID) { + scene->lookup_tables->remove_table(bssrdf_table_offset); + bssrdf_table_offset = TABLE_OFFSET_INVALID; + } + device->tex_free(dscene->shader_flag); dscene->shader_flag.clear(); } diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index b38e098e3cb..2a9f1198467 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -75,6 +75,7 @@ public: bool has_surface_transparent; bool has_volume; bool has_displacement; + bool has_surface_bssrdf; /* requested mesh attributes */ AttributeRequestSet attributes; @@ -116,11 +117,11 @@ public: /* device update */ virtual void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) = 0; - virtual void device_free(Device *device, DeviceScene *dscene) = 0; + virtual void device_free(Device *device, DeviceScene *dscene, Scene *scene) = 0; void device_update_shaders_used(Scene *scene); void device_update_common(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress); - void device_free_common(Device *device, DeviceScene *dscene); + void device_free_common(Device *device, DeviceScene *dscene, Scene *scene); /* get globally unique id for a type of attribute */ uint get_attribute_id(ustring name); @@ -138,6 +139,8 @@ protected: typedef unordered_map<ustring, uint, ustringHash> AttributeIDMap; AttributeIDMap unique_attribute_id; + + size_t bssrdf_table_offset; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index 5cb11a4ec1a..ea2fe4991db 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -50,7 +50,7 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene return; /* test if we need to update */ - device_free(device, dscene); + device_free(device, dscene, scene); /* determine which shaders are in use */ device_update_shaders_used(scene); @@ -99,9 +99,9 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene need_update = false; } -void SVMShaderManager::device_free(Device *device, DeviceScene *dscene) +void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene) { - device_free_common(device, dscene); + device_free_common(device, dscene, scene); device->tex_free(dscene->svm_nodes); dscene->svm_nodes.clear(); @@ -486,6 +486,8 @@ void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done) current_shader->has_surface_emission = true; if(node->has_surface_transparent()) current_shader->has_surface_transparent = true; + if(node->has_surface_bssrdf()) + current_shader->has_surface_bssrdf = true; /* end node is added outside of this */ } @@ -546,6 +548,8 @@ void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& don current_shader->has_surface_emission = true; if(node->has_surface_transparent()) current_shader->has_surface_transparent = true; + if(node->has_surface_bssrdf()) + current_shader->has_surface_bssrdf = true; } done.insert(node); @@ -654,6 +658,7 @@ void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int in shader->has_surface = false; shader->has_surface_emission = false; shader->has_surface_transparent = false; + shader->has_surface_bssrdf = false; shader->has_volume = false; shader->has_displacement = false; diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h index c1ce619e12a..e09144a4e76 100644 --- a/intern/cycles/render/svm.h +++ b/intern/cycles/render/svm.h @@ -48,7 +48,7 @@ public: void reset(Scene *scene); void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress); - void device_free(Device *device, DeviceScene *dscene); + void device_free(Device *device, DeviceScene *dscene, Scene *scene); }; /* Graph Compiler */ diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp new file mode 100644 index 00000000000..c7c86f68960 --- /dev/null +++ b/intern/cycles/render/tables.cpp @@ -0,0 +1,110 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "device.h" +#include "scene.h" +#include "tables.h" + +#include "util_debug.h" + +CCL_NAMESPACE_BEGIN + +/* Lookup Tables */ + +LookupTables::LookupTables() +{ + need_update = true; +} + +LookupTables::~LookupTables() +{ + assert(lookup_tables.size() == 0); +} + +void LookupTables::device_update(Device *device, DeviceScene *dscene) +{ + if(!need_update) + return; + + device->tex_alloc("__lookup_table", dscene->lookup_table); + + need_update = false; +} + +void LookupTables::device_free(Device *device, DeviceScene *dscene) +{ + device->tex_free(dscene->lookup_table); + dscene->lookup_table.clear(); +} + +static size_t round_up_to_multiple(size_t size, size_t chunk) +{ + return ((size + chunk - 1)/chunk) * chunk; +} + +size_t LookupTables::add_table(DeviceScene *dscene, vector<float>& data) +{ + assert(data.size() > 0); + + need_update = true; + + Table new_table; + new_table.offset = 0; + new_table.size = round_up_to_multiple(data.size(), TABLE_CHUNK_SIZE); + + /* find space to put lookup table */ + list<Table>::iterator table; + + for(table = lookup_tables.begin(); table != lookup_tables.end(); table++) { + if(new_table.offset + new_table.size <= table->offset) { + lookup_tables.insert(table, new_table); + break; + } + else + new_table.offset = table->offset + table->size; + } + + if(table == lookup_tables.end()) { + /* add at the end */ + lookup_tables.push_back(new_table); + dscene->lookup_table.resize(new_table.offset + new_table.size); + } + + /* copy table data and return offset */ + dscene->lookup_table.copy_at(&data[0], new_table.offset, data.size()); + return new_table.offset; +} + +void LookupTables::remove_table(size_t offset) +{ + need_update = true; + + list<Table>::iterator table; + + for(table = lookup_tables.begin(); table != lookup_tables.end(); table++) { + if(table->offset == offset) { + lookup_tables.erase(table); + break; + } + } + + assert(table != lookup_tables.end()); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/render/filter.h b/intern/cycles/render/tables.h index 5df7bb8fd14..605efd3747f 100644 --- a/intern/cycles/render/filter.h +++ b/intern/cycles/render/tables.h @@ -16,8 +16,10 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#ifndef __FILTER_H__ -#define __FILTER_H__ +#ifndef __TABLES_H__ +#define __TABLES_H__ + +#include <util_list.h> CCL_NAMESPACE_BEGIN @@ -25,29 +27,30 @@ class Device; class DeviceScene; class Scene; -typedef enum FilterType { - FILTER_BOX, - FILTER_GAUSSIAN -} FilterType; +enum { TABLE_CHUNK_SIZE = 256 }; +enum { TABLE_OFFSET_INVALID = -1 }; -class Filter { +class LookupTables { public: - /* pixel filter */ - FilterType filter_type; - float filter_width; + struct Table { + size_t offset; + size_t size; + }; + bool need_update; + list<Table> lookup_tables; - Filter(); - ~Filter(); + LookupTables(); + ~LookupTables(); void device_update(Device *device, DeviceScene *dscene); void device_free(Device *device, DeviceScene *dscene); - bool modified(const Filter& filter); - void tag_update(Scene *scene); + size_t add_table(DeviceScene *dscene, vector<float>& data); + void remove_table(size_t offset); }; CCL_NAMESPACE_END -#endif /* __FILTER_H__ */ +#endif /* __TABLES_H__ */ diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index c37fa1a4dc6..f2e814527fd 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -1151,14 +1151,7 @@ __device float safe_logf(float a, float b) __device float safe_divide(float a, float b) { - float result; - - if(b == 0.0f) - result = 0.0f; - else - result = a/b; - - return result; + return (b != 0.0f)? a/b: 0.0f; } /* Ray Intersection */ diff --git a/intern/ffmpeg/ffmpeg_compat.h b/intern/ffmpeg/ffmpeg_compat.h index 69e2e014761..aaedbe2dccd 100644 --- a/intern/ffmpeg/ffmpeg_compat.h +++ b/intern/ffmpeg/ffmpeg_compat.h @@ -171,6 +171,20 @@ void av_update_cur_dts(AVFormatContext *s, AVStream *ref_st, int64_t timestamp) } #endif +#if ((LIBAVCODEC_VERSION_MAJOR < 54) || (LIBAVCODEC_VERSION_MAJOR == 54 && LIBAVCODEC_VERSION_MINOR < 28)) +static inline +void avcodec_free_frame(AVFrame **frame) +{ + /* don't need to do anything with old AVFrame + * since it does not have malloced members */ + (void)frame; +} +#endif + +#if ((LIBAVCODEC_VERSION_MAJOR > 54) || (LIBAVCODEC_VERSION_MAJOR == 54 && LIBAVCODEC_VERSION_MINOR >= 13)) +#define FFMPEG_HAVE_FRAME_CHANNEL_LAYOUT +#endif + #ifndef FFMPEG_HAVE_AVIO #define AVIO_FLAG_WRITE URL_WRONLY #define avio_open url_fopen diff --git a/intern/ghost/intern/GHOST_WindowCocoa.mm b/intern/ghost/intern/GHOST_WindowCocoa.mm index 795b5200ae2..61b853d79b0 100644 --- a/intern/ghost/intern/GHOST_WindowCocoa.mm +++ b/intern/ghost/intern/GHOST_WindowCocoa.mm @@ -454,7 +454,7 @@ GHOST_WindowCocoa::GHOST_WindowCocoa( GHOST_TDrawingContextType type, const bool stereoVisual, const GHOST_TUns16 numOfAASamples ) : - GHOST_Window(width, height, state, GHOST_kDrawingContextTypeNone, stereoVisual, numOfAASamples), + GHOST_Window(width, height, state, GHOST_kDrawingContextTypeNone, stereoVisual, false, numOfAASamples), m_customCursor(0) { NSOpenGLPixelFormatAttribute pixelFormatAttrsWindow[40]; diff --git a/intern/ghost/intern/GHOST_WindowNULL.h b/intern/ghost/intern/GHOST_WindowNULL.h index e3d092101b0..f595fa7d794 100644 --- a/intern/ghost/intern/GHOST_WindowNULL.h +++ b/intern/ghost/intern/GHOST_WindowNULL.h @@ -53,7 +53,7 @@ public: const bool stereoVisual, const GHOST_TUns16 numOfAASamples ) : - GHOST_Window(width, height, state, type, stereoVisual, numOfAASamples), + GHOST_Window(width, height, state, type, stereoVisual, false, numOfAASamples), m_system(system) { setTitle(title); diff --git a/intern/ghost/intern/GHOST_WindowWin32.cpp b/intern/ghost/intern/GHOST_WindowWin32.cpp index b1a9ca52605..2b1bdfa78f0 100644 --- a/intern/ghost/intern/GHOST_WindowWin32.cpp +++ b/intern/ghost/intern/GHOST_WindowWin32.cpp @@ -130,7 +130,7 @@ GHOST_WindowWin32::GHOST_WindowWin32( int msPixelFormat) : GHOST_Window(width, height, state, GHOST_kDrawingContextTypeNone, - stereoVisual, numOfAASamples), + stereoVisual, false, numOfAASamples), m_system(system), m_hDC(0), m_hGlRc(0), diff --git a/intern/opencolorio/CMakeLists.txt b/intern/opencolorio/CMakeLists.txt index 5680ce79762..30a74baa646 100644 --- a/intern/opencolorio/CMakeLists.txt +++ b/intern/opencolorio/CMakeLists.txt @@ -44,6 +44,7 @@ set(SRC if(WITH_OPENCOLORIO) add_definitions( -DWITH_OCIO + -DGLEW_STATIC ) list(APPEND INC_SYS diff --git a/intern/opencolorio/SConscript b/intern/opencolorio/SConscript index 73a8fd7a2e0..7f050f25cae 100644 --- a/intern/opencolorio/SConscript +++ b/intern/opencolorio/SConscript @@ -30,7 +30,7 @@ Import('env') sources = env.Glob('*.cc') incs = '. ../guardedalloc ../../source/blender/blenlib' -defs = [] +defs = [ 'GLEW_STATIC' ] if env['WITH_BF_OCIO']: defs.append('WITH_OCIO') diff --git a/intern/opencolorio/fallback_impl.cc b/intern/opencolorio/fallback_impl.cc index 47c648e9cba..37f624e1f8b 100644 --- a/intern/opencolorio/fallback_impl.cc +++ b/intern/opencolorio/fallback_impl.cc @@ -381,8 +381,9 @@ void FallbackImpl::matrixTransformScale(float * , float * , const float *) { } -void FallbackImpl::setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor) +bool FallbackImpl::setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor, bool predivide) { + return false; } void FallbackImpl::finishGLSLDraw(OCIO_GLSLDrawState *state) diff --git a/intern/opencolorio/ocio_capi.cc b/intern/opencolorio/ocio_capi.cc index 73d8af295f2..c8db2c2b531 100644 --- a/intern/opencolorio/ocio_capi.cc +++ b/intern/opencolorio/ocio_capi.cc @@ -283,9 +283,9 @@ void OCIO_matrixTransformScale(float * m44, float * offset4, const float *scale4 impl->matrixTransformScale(m44, offset4, scale4f); } -void OCIO_setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor) +int OCIO_setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor, int predivide) { - impl->setupGLSLDraw(state_r, processor); + return (int) impl->setupGLSLDraw(state_r, processor, (bool) predivide); } void OCIO_finishGLSLDraw(struct OCIO_GLSLDrawState *state) diff --git a/intern/opencolorio/ocio_capi.h b/intern/opencolorio/ocio_capi.h index 3c42e0a1a1e..3632a0da1c6 100644 --- a/intern/opencolorio/ocio_capi.h +++ b/intern/opencolorio/ocio_capi.h @@ -121,7 +121,7 @@ void OCIO_matrixTransformRelease(OCIO_MatrixTransformRcPtr *mt); void OCIO_matrixTransformScale(float * m44, float * offset4, const float * scale4); -void OCIO_setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor); +int OCIO_setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor, int predivide); void OCIO_finishGLSLDraw(struct OCIO_GLSLDrawState *state); void OCIO_freeOGLState(struct OCIO_GLSLDrawState *state); diff --git a/intern/opencolorio/ocio_impl.cc b/intern/opencolorio/ocio_impl.cc index 8803814ce3f..05c29fd5854 100644 --- a/intern/opencolorio/ocio_impl.cc +++ b/intern/opencolorio/ocio_impl.cc @@ -551,228 +551,3 @@ void OCIOImpl::matrixTransformScale(float * m44, float * offset4, const float *s { MatrixTransform::Scale(m44, offset4, scale4f); } - -/* **** OpenGL drawing routines using GLSL for color space transform ***** */ - -/* Some of the GLSL transform related functions below are adopted from - * ociodisplay utility of OpenColorIO project which are originally - * - * Copyright (c) 2003-2010 Sony Pictures Imageworks Inc., et al. All Rights Reserved. - */ - -typedef struct OCIO_GLSLDrawState { - bool lut3d_texture_allocated; /* boolean flag indicating whether - * lut texture is allocated - */ - - GLuint lut3d_texture; /* OGL texture ID for 3D LUT */ - - float *lut3d; /* 3D LUT table */ - - /* Cache */ - std::string lut3dcacheid; - std::string shadercacheid; - - /* GLSL stuff */ - GLuint fragShader; - GLuint program; - - /* Previous OpenGL state. */ - GLint last_texture, last_texture_unit; -} OCIO_GLSLDrawState; - -static const char * g_fragShaderText = "" -"\n" -"uniform sampler2D tex1;\n" -"uniform sampler3D tex2;\n" -"\n" -"void main()\n" -"{\n" -" vec4 col = texture2D(tex1, gl_TexCoord[0].st);\n" -" gl_FragColor = OCIODisplay(col, tex2);\n" -"}\n"; - -static GLuint compileShaderText(GLenum shaderType, const char *text) -{ - GLuint shader; - GLint stat; - - shader = glCreateShader(shaderType); - glShaderSource(shader, 1, (const GLchar **) &text, NULL); - glCompileShader(shader); - glGetShaderiv(shader, GL_COMPILE_STATUS, &stat); - - if (!stat) { - GLchar log[1000]; - GLsizei len; - glGetShaderInfoLog(shader, 1000, &len, log); - return 0; - } - - return shader; -} - -static GLuint linkShaders(GLuint fragShader) -{ - if (!fragShader) - return 0; - - GLuint program = glCreateProgram(); - - if (fragShader) - glAttachShader(program, fragShader); - - glLinkProgram(program); - - /* check link */ - { - GLint stat; - glGetProgramiv(program, GL_LINK_STATUS, &stat); - if (!stat) { - GLchar log[1000]; - GLsizei len; - glGetProgramInfoLog(program, 1000, &len, log); - fprintf(stderr, "Shader link error:\n%s\n", log); - return 0; - } - } - - return program; -} - -static OCIO_GLSLDrawState *allocateOpenGLState(void) -{ - OCIO_GLSLDrawState *state; - - /* Allocate memory for state. */ - state = (OCIO_GLSLDrawState *) MEM_callocN(sizeof(OCIO_GLSLDrawState), - "OCIO OpenGL State struct"); - - /* Call constructors on new memory. */ - new (&state->lut3dcacheid) std::string(""); - new (&state->shadercacheid) std::string(""); - - return state; -} - -/* Ensure LUT texture and array are allocated */ -static void ensureLUT3DAllocated(OCIO_GLSLDrawState *state) -{ - int num_3d_entries = 3 * LUT3D_EDGE_SIZE * LUT3D_EDGE_SIZE * LUT3D_EDGE_SIZE; - - if (state->lut3d_texture_allocated) - return; - - glGenTextures(1, &state->lut3d_texture); - - state->lut3d = (float *) MEM_callocN(sizeof(float) * num_3d_entries, "OCIO GPU 3D LUT"); - - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_3D, state->lut3d_texture); - glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); - glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB16F_ARB, - LUT3D_EDGE_SIZE, LUT3D_EDGE_SIZE, LUT3D_EDGE_SIZE, - 0, GL_RGB,GL_FLOAT, &state->lut3d); - - state->lut3d_texture_allocated = true; -} - -/** - * Setup OpenGL contexts for a transform defined by processor using GLSL - * All LUT allocating baking and shader compilation happens here. - * - * Once this function is called, callee could start drawing images - * using regular 2D texture. - * - * When all drawing is finished, finishGLSLDraw shall be called to - * restore OpenGL context to it's pre-GLSL draw state. - */ -void OCIOImpl::setupGLSLDraw(OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor) -{ - ConstProcessorRcPtr ocio_processor = *(ConstProcessorRcPtr *) processor; - - /* Create state if needed. */ - OCIO_GLSLDrawState *state; - if (!*state_r) - *state_r = allocateOpenGLState(); - state = *state_r; - - glGetIntegerv(GL_TEXTURE_2D, &state->last_texture); - glGetIntegerv(GL_ACTIVE_TEXTURE, &state->last_texture_unit); - - ensureLUT3DAllocated(state); - - /* Step 1: Create a GPU Shader Description */ - GpuShaderDesc shaderDesc; - shaderDesc.setLanguage(GPU_LANGUAGE_GLSL_1_0); - shaderDesc.setFunctionName("OCIODisplay"); - shaderDesc.setLut3DEdgeLen(LUT3D_EDGE_SIZE); - - /* Step 2: Compute the 3D LUT */ - std::string lut3dCacheID = ocio_processor->getGpuLut3DCacheID(shaderDesc); - if (lut3dCacheID != state->lut3dcacheid) { - state->lut3dcacheid = lut3dCacheID; - ocio_processor->getGpuLut3D(state->lut3d, shaderDesc); - - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_3D, state->lut3d_texture); - glTexSubImage3D(GL_TEXTURE_3D, 0, 0, 0, 0, - LUT3D_EDGE_SIZE, LUT3D_EDGE_SIZE, LUT3D_EDGE_SIZE, - GL_RGB, GL_FLOAT, state->lut3d); - } - - /* Step 3: Compute the Shader */ - std::string shaderCacheID = ocio_processor->getGpuShaderTextCacheID(shaderDesc); - if (state->program == 0 || shaderCacheID != state->shadercacheid) { - state->shadercacheid = shaderCacheID; - - std::ostringstream os; - os << ocio_processor->getGpuShaderText(shaderDesc) << "\n"; - os << g_fragShaderText; - - if (state->fragShader) - glDeleteShader(state->fragShader); - state->fragShader = compileShaderText(GL_FRAGMENT_SHADER, os.str().c_str()); - - if (state->program) - glDeleteProgram(state->program); - - state->program = linkShaders(state->fragShader); - } - - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_3D, state->lut3d_texture); - - glActiveTexture(GL_TEXTURE0); - - glUseProgram(state->program); - glUniform1i(glGetUniformLocation(state->program, "tex1"), 0); - glUniform1i(glGetUniformLocation(state->program, "tex2"), 1); -} - -void OCIOImpl::finishGLSLDraw(OCIO_GLSLDrawState *state) -{ - glActiveTexture(state->last_texture_unit); - glBindTexture(GL_TEXTURE_2D, state->last_texture); - glUseProgram(0); -} - -void OCIOImpl::freeGLState(struct OCIO_GLSLDrawState *state) -{ - using std::string; - - if (state->lut3d_texture_allocated) - glDeleteTextures(1, &state->lut3d_texture); - - if (state->lut3d) - MEM_freeN(state->lut3d); - - state->lut3dcacheid.~string(); - state->shadercacheid.~string(); - - MEM_freeN(state); -} diff --git a/intern/opencolorio/ocio_impl.h b/intern/opencolorio/ocio_impl.h index 2a1f88be5f4..a328470ccb5 100644 --- a/intern/opencolorio/ocio_impl.h +++ b/intern/opencolorio/ocio_impl.h @@ -96,7 +96,7 @@ public: virtual void matrixTransformScale(float * m44, float * offset4, const float * scale4) = 0; - virtual void setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor) = 0; + virtual bool setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor, bool predivide) = 0; virtual void finishGLSLDraw(struct OCIO_GLSLDrawState *state) = 0; virtual void freeGLState(struct OCIO_GLSLDrawState *state_r) = 0; }; @@ -169,7 +169,7 @@ public: void matrixTransformScale(float * m44, float * offset4, const float * scale4); - void setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor); + bool setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor, bool predivide); void finishGLSLDraw(struct OCIO_GLSLDrawState *state); void freeGLState(struct OCIO_GLSLDrawState *state_r); }; @@ -243,7 +243,7 @@ public: void matrixTransformScale(float * m44, float * offset4, const float * scale4); - void setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor); + bool setupGLSLDraw(struct OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor, bool predivide); void finishGLSLDraw(struct OCIO_GLSLDrawState *state); void freeGLState(struct OCIO_GLSLDrawState *state_r); }; diff --git a/intern/opencolorio/ocio_impl_glsl.cc b/intern/opencolorio/ocio_impl_glsl.cc index c79593779cf..9343a13e888 100644 --- a/intern/opencolorio/ocio_impl_glsl.cc +++ b/intern/opencolorio/ocio_impl_glsl.cc @@ -77,15 +77,33 @@ typedef struct OCIO_GLSLDrawState { GLint last_texture, last_texture_unit; } OCIO_GLSLDrawState; -static const char * g_fragShaderText = "" +/* Hardcoded to do alpha predivide before color space conversion */ +static const char *g_fragShaderText = "" "\n" "uniform sampler2D tex1;\n" "uniform sampler3D tex2;\n" +"uniform bool predivide;\n" "\n" "void main()\n" "{\n" " vec4 col = texture2D(tex1, gl_TexCoord[0].st);\n" -" gl_FragColor = OCIODisplay(col, tex2);\n" +" if (predivide == false || col[3] == 1.0f || col[3] == 0.0f) {\n" +" gl_FragColor = OCIODisplay(col, tex2);\n" +" } else {\n" +" float alpha = col[3];\n" +" float inv_alpha = 1.0f / alpha;\n" +"\n" +" col[0] *= inv_alpha;\n" +" col[1] *= inv_alpha;\n" +" col[2] *= inv_alpha;\n" +"\n" +" gl_FragColor = OCIODisplay(col, tex2);\n" +"\n" +" col[0] *= alpha;\n" +" col[1] *= alpha;\n" +" col[2] *= alpha;\n" +" }\n" +"\n" "}\n"; static GLuint compileShaderText(GLenum shaderType, const char *text) @@ -187,7 +205,7 @@ static void ensureLUT3DAllocated(OCIO_GLSLDrawState *state) * When all drawing is finished, finishGLSLDraw shall be called to * restore OpenGL context to it's pre-GLSL draw state. */ -void OCIOImpl::setupGLSLDraw(OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor) +bool OCIOImpl::setupGLSLDraw(OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor, bool predivide) { ConstProcessorRcPtr ocio_processor = *(ConstProcessorRcPtr *) processor; @@ -232,22 +250,36 @@ void OCIOImpl::setupGLSLDraw(OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRc if (state->fragShader) glDeleteShader(state->fragShader); + state->fragShader = compileShaderText(GL_FRAGMENT_SHADER, os.str().c_str()); - if (state->program) - glDeleteProgram(state->program); + if (state->fragShader) { + if (state->program) + glDeleteProgram(state->program); - state->program = linkShaders(state->fragShader); + state->program = linkShaders(state->fragShader); + } } - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_3D, state->lut3d_texture); + if (state->program) { + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_3D, state->lut3d_texture); + + glActiveTexture(GL_TEXTURE0); + + glUseProgram(state->program); + glUniform1i(glGetUniformLocation(state->program, "tex1"), 0); + glUniform1i(glGetUniformLocation(state->program, "tex2"), 1); + glUniform1i(glGetUniformLocation(state->program, "predivide"), predivide); - glActiveTexture(GL_TEXTURE0); + return true; + } + else { + glActiveTexture(state->last_texture_unit); + glBindTexture(GL_TEXTURE_2D, state->last_texture); - glUseProgram(state->program); - glUniform1i(glGetUniformLocation(state->program, "tex1"), 0); - glUniform1i(glGetUniformLocation(state->program, "tex2"), 1); + return false; + } } void OCIOImpl::finishGLSLDraw(OCIO_GLSLDrawState *state) @@ -267,6 +299,12 @@ void OCIOImpl::freeGLState(struct OCIO_GLSLDrawState *state) if (state->lut3d) MEM_freeN(state->lut3d); + if (state->program) + glDeleteProgram(state->program); + + if (state->fragShader) + glDeleteShader(state->fragShader); + state->lut3dcacheid.~string(); state->shadercacheid.~string(); |