diff options
author | Lukas Tönne <lukas.toenne@gmail.com> | 2016-08-09 17:37:15 +0300 |
---|---|---|
committer | Lukas Tönne <lukas.toenne@gmail.com> | 2016-08-09 17:37:15 +0300 |
commit | 4a801f6c6f0421ffd515c12422dd197441440520 (patch) | |
tree | 5415e844b460bb2aa07b9467c72e13abc2e61228 /intern/cycles/kernel | |
parent | 49c63d46db8c055152d9e431e89405f9b51a4bbe (diff) | |
parent | 02719521d2e25abcc8ffcccc086d3a651986f52f (diff) |
Merge branch 'master' into object_nodesobject_nodes
Diffstat (limited to 'intern/cycles/kernel')
81 files changed, 2809 insertions, 1932 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index bd3969b2889..7bef247d3bd 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -87,6 +87,7 @@ set(SRC_KERNELS_CPU_HEADERS ) set(SRC_CLOSURE_HEADERS + closure/alloc.h closure/bsdf.h closure/bsdf_ashikhmin_velvet.h closure/bsdf_diffuse.h @@ -140,6 +141,7 @@ set(SRC_SVM_HEADERS svm/svm_noisetex.h svm/svm_normal.h svm/svm_ramp.h + svm/svm_ramp_util.h svm/svm_sepcomb_hsv.h svm/svm_sepcomb_vector.h svm/svm_sky.h @@ -160,7 +162,9 @@ set(SRC_GEOM_HEADERS geom/geom_motion_curve.h geom/geom_motion_triangle.h geom/geom_object.h + geom/geom_patch.h geom/geom_primitive.h + geom/geom_subd_triangle.h geom/geom_triangle.h geom/geom_triangle_intersect.h geom/geom_volume.h @@ -241,12 +245,20 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_debug_flags "") endif() - set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") + set(cuda_nvcc_command ${CUDA_NVCC_EXECUTABLE}) + set(cuda_nvcc_version ${CUDA_VERSION}) + + if(DEFINED CUDA_NVCC8_EXECUTABLE AND ((${arch} STREQUAL "sm_60") OR (${arch} STREQUAL "sm_61"))) + set(cuda_nvcc_command ${CUDA_NVCC8_EXECUTABLE}) + set(cuda_nvcc_version "80") + endif() + + set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}") set(cuda_math_flags "--use_fast_math") add_custom_command( OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} + COMMAND ${cuda_nvcc_command} -arch=${arch} ${CUDA_NVCC_FLAGS} -m${CUDA_BITS} @@ -263,7 +275,6 @@ if(WITH_CYCLES_CUDA_BINARIES) -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC - DEPENDS ${cuda_sources}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) @@ -271,6 +282,9 @@ if(WITH_CYCLES_CUDA_BINARIES) unset(cuda_extra_flags) unset(cuda_debug_flags) + + unset(cuda_nvcc_command) + unset(cuda_nvcc_version) endmacro() foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index b27afaa9869..e9eeff31ecc 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -37,11 +37,16 @@ * */ -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - uint *num_hits) +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + uint *num_hits) { /* todo: * - likely and unlikely for if() statements @@ -254,9 +259,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, /* shadow ray early termination */ if(hit) { - /* Update number of hits now, so we do proper check on max bounces. */ - (*num_hits)++; - /* detect if this surface has a shader with transparent shadows */ /* todo: optimize so primitive visibility flag indicates if @@ -283,15 +285,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, return true; } /* if maximum number of hits reached, block all light */ - else if(*num_hits >= max_hits) { + else if(*num_hits == max_hits) { return true; } + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; #if BVH_FEATURE(BVH_INSTANCING) num_hits_in_instance++; #endif - /* Move on to next entry in intersections array */ - isect_array++; + + isect_array->t = isect_t; } prim_addr++; diff --git a/intern/cycles/kernel/bvh/bvh_subsurface.h b/intern/cycles/kernel/bvh/bvh_subsurface.h index 18978efcfa3..d9623c94b2e 100644 --- a/intern/cycles/kernel/bvh/bvh_subsurface.h +++ b/intern/cycles/kernel/bvh/bvh_subsurface.h @@ -35,12 +35,17 @@ * */ -ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + SubsurfaceIntersection *ss_isect, + int subsurface_object, + uint *lcg_state, + int max_hits) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h index 68a11b65ad7..b1a52968a26 100644 --- a/intern/cycles/kernel/bvh/bvh_traversal.h +++ b/intern/cycles/kernel/bvh/bvh_traversal.h @@ -40,16 +40,21 @@ * */ -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , uint *lcg_state, - float difl, - float extmax + , uint *lcg_state, + float difl, + float extmax #endif - ) + ) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h index 03499e94347..107373c17dc 100644 --- a/intern/cycles/kernel/bvh/bvh_volume.h +++ b/intern/cycles/kernel/bvh/bvh_volume.h @@ -36,10 +36,15 @@ * */ -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h index d7f6bf86c71..1f6515c9862 100644 --- a/intern/cycles/kernel/bvh/bvh_volume_all.h +++ b/intern/cycles/kernel/bvh/bvh_volume_all.h @@ -36,11 +36,16 @@ * */ -ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - const uint visibility) +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + const uint visibility) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) @@ -201,12 +206,14 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, object, prim_addr); if(hit) { - /* Update number of hits now, so we do proper check on max bounces. */ + /* Move on to next entry in intersections array. */ + isect_array++; num_hits++; #if BVH_FEATURE(BVH_INSTANCING) num_hits_in_instance++; #endif - if(num_hits >= max_hits) { + isect_array->t = isect_t; + if(num_hits == max_hits) { #if BVH_FEATURE(BVH_INSTANCING) # if BVH_FEATURE(BVH_MOTION) float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); @@ -220,9 +227,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, #endif /* BVH_FEATURE(BVH_INSTANCING) */ return num_hits; } - /* Move on to next entry in intersections array */ - isect_array++; - isect_array->t = isect_t; } } break; @@ -247,12 +251,14 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, object, prim_addr); if(hit) { - /* Update number of hits now, so we do proper check on max bounces. */ + /* Move on to next entry in intersections array. */ + isect_array++; num_hits++; # if BVH_FEATURE(BVH_INSTANCING) num_hits_in_instance++; # endif - if(num_hits >= max_hits) { + isect_array->t = isect_t; + if(num_hits == max_hits) { # if BVH_FEATURE(BVH_INSTANCING) # if BVH_FEATURE(BVH_MOTION) float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); @@ -266,9 +272,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, # endif /* BVH_FEATURE(BVH_INSTANCING) */ return num_hits; } - /* Move on to next entry in intersections array */ - isect_array++; - isect_array->t = isect_t; } } break; diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h index eb98eaf7455..3a728b388eb 100644 --- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h @@ -337,9 +337,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Shadow ray early termination. */ if(hit) { - /* Update number of hits now, so we do proper check on max bounces. */ - (*num_hits)++; - /* detect if this surface has a shader with transparent shadows */ /* todo: optimize so primitive visibility flag indicates if @@ -366,15 +363,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, return true; } /* if maximum number of hits reached, block all light */ - else if(*num_hits >= max_hits) { + else if(*num_hits == max_hits) { return true; } + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; #if BVH_FEATURE(BVH_INSTANCING) num_hits_in_instance++; #endif - /* Move on to next entry in intersections array */ - isect_array++; + isect_array->t = isect_t; } diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h index 90cad9d91c0..4d3028b37bf 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume_all.h +++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h @@ -268,12 +268,14 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Intersect ray against primitive. */ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, prim_addr); if(hit) { - /* Update number of hits now, so we do proper check on max bounces. */ + /* Move on to next entry in intersections array. */ + isect_array++; num_hits++; #if BVH_FEATURE(BVH_INSTANCING) num_hits_in_instance++; #endif - if(num_hits >= max_hits) { + isect_array->t = isect_t; + if(num_hits == max_hits) { #if BVH_FEATURE(BVH_INSTANCING) # if BVH_FEATURE(BVH_MOTION) float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); @@ -287,9 +289,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, #endif /* BVH_FEATURE(BVH_INSTANCING) */ return num_hits; } - /* Move on to next entry in intersections array */ - isect_array++; - isect_array->t = isect_t; } } break; @@ -307,12 +306,14 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Intersect ray against primitive. */ hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr); if(hit) { - /* Update number of hits now, so we do proper check on max bounces. */ + /* Move on to next entry in intersections array. */ + isect_array++; num_hits++; # if BVH_FEATURE(BVH_INSTANCING) num_hits_in_instance++; # endif - if(num_hits >= max_hits) { + isect_array->t = isect_t; + if(num_hits == max_hits) { # if BVH_FEATURE(BVH_INSTANCING) # if BVH_FEATURE(BVH_MOTION) float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); @@ -326,9 +327,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, # endif /* BVH_FEATURE(BVH_INSTANCING) */ return num_hits; } - /* Move on to next entry in intersections array */ - isect_array++; - isect_array->t = isect_t; } } break; diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h new file mode 100644 index 00000000000..b7abc1ec507 --- /dev/null +++ b/intern/cycles/kernel/closure/alloc.h @@ -0,0 +1,90 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType type, float3 weight) +{ + kernel_assert(size <= sizeof(ShaderClosure)); + + int num_closure = ccl_fetch(sd, num_closure); + int num_closure_extra = ccl_fetch(sd, num_closure_extra); + if(num_closure + num_closure_extra >= MAX_CLOSURE) + return NULL; + + ShaderClosure *sc = &ccl_fetch(sd, closure)[num_closure]; + + sc->type = type; + sc->weight = weight; + + ccl_fetch(sd, num_closure)++; + + return sc; +} + +ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size) +{ + /* Allocate extra space for closure that need more parameters. We allocate + * in chunks of sizeof(ShaderClosure) starting from the end of the closure + * array. + * + * This lets us keep the same fast array iteration over closures, as we + * found linked list iteration and iteration with skipping to be slower. */ + int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure)); + int num_closure = ccl_fetch(sd, num_closure); + int num_closure_extra = ccl_fetch(sd, num_closure_extra) + num_extra; + + if(num_closure + num_closure_extra > MAX_CLOSURE) { + /* Remove previous closure. */ + ccl_fetch(sd, num_closure)--; + ccl_fetch(sd, num_closure_extra)++; + return NULL; + } + + ccl_fetch(sd, num_closure_extra) = num_closure_extra; + return (ccl_addr_space void*)(ccl_fetch(sd, closure) + MAX_CLOSURE - num_closure_extra); +} + +ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight) +{ + ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); + + if(!sc) + return NULL; + + float sample_weight = fabsf(average(weight)); + sc->sample_weight = sample_weight; + return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL; +} + +#ifdef __OSL__ +ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, int size, float3 weight, void *data) +{ + ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); + + if(!sc) + return NULL; + + memcpy(sc, data, size); + + float sample_weight = fabsf(average(weight)); + sc->weight = weight; + sc->sample_weight = sample_weight; + return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL; +} +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index f318a61f3a3..1e7fbdb5450 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -36,15 +36,18 @@ CCL_NAMESPACE_BEGIN -ccl_device int bsdf_sample(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float randu, float randv, float3 *eval, float3 *omega_in, differential3 *domega_in, float *pdf) +ccl_device_inline int bsdf_sample(KernelGlobals *kg, + ShaderData *sd, + const ShaderClosure *sc, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + differential3 *domega_in, + float *pdf) { int label; -#ifdef __OSL__ - if(kg->osl && sc->prim) - return OSLShader::bsdf_sample(sd, sc, randu, randv, *eval, *omega_in, *domega_in, *pdf); -#endif - switch(sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: case CLOSURE_BSDF_BSSRDF_ID: @@ -56,14 +59,16 @@ ccl_device int bsdf_sample(KernelGlobals *kg, ShaderData *sd, const ShaderClosur label = bsdf_oren_nayar_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; - /*case CLOSURE_BSDF_PHONG_RAMP_ID: +#ifdef __OSL__ + case CLOSURE_BSDF_PHONG_RAMP_ID: label = bsdf_phong_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_DIFFUSE_RAMP_ID: label = bsdf_diffuse_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break;*/ + break; +#endif case CLOSURE_BSDF_TRANSLUCENT_ID: label = bsdf_translucent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); @@ -139,15 +144,19 @@ ccl_device int bsdf_sample(KernelGlobals *kg, ShaderData *sd, const ShaderClosur return label; } -ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf) +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_inline +#endif +float3 bsdf_eval(KernelGlobals *kg, + ShaderData *sd, + const ShaderClosure *sc, + const float3 omega_in, + float *pdf) { float3 eval; -#ifdef __OSL__ - if(kg->osl && sc->prim) - return OSLShader::bsdf_eval(sd, sc, omega_in, *pdf); -#endif - if(dot(ccl_fetch(sd, Ng), omega_in) >= 0.0f) { switch(sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: @@ -158,12 +167,14 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosu case CLOSURE_BSDF_OREN_NAYAR_ID: eval = bsdf_oren_nayar_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; - /*case CLOSURE_BSDF_PHONG_RAMP_ID: +#ifdef __OSL__ + case CLOSURE_BSDF_PHONG_RAMP_ID: eval = bsdf_phong_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_DIFFUSE_RAMP_ID: eval = bsdf_diffuse_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); - break;*/ + break; +#endif case CLOSURE_BSDF_TRANSLUCENT_ID: eval = bsdf_translucent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; @@ -296,15 +307,7 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosu ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness) { -/* ToDo: do we want to blur volume closures? */ - -#ifdef __OSL__ - if(kg->osl && sc->prim) { - OSLShader::bsdf_blur(sc, roughness); - return; - } -#endif - + /* ToDo: do we want to blur volume closures? */ #ifdef __SVM__ switch(sc->type) { case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: @@ -331,5 +334,48 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness) #endif } +ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b) +{ +#ifdef __SVM__ + switch(a->type) { + case CLOSURE_BSDF_TRANSPARENT_ID: + return true; + case CLOSURE_BSDF_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_ID: + case CLOSURE_BSDF_TRANSLUCENT_ID: + return bsdf_diffuse_merge(a, b); + case CLOSURE_BSDF_OREN_NAYAR_ID: + return bsdf_oren_nayar_merge(a, b); + case CLOSURE_BSDF_REFLECTION_ID: + case CLOSURE_BSDF_REFRACTION_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: + return bsdf_microfacet_merge(a, b); + case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: + return bsdf_ashikhmin_velvet_merge(a, b); + case CLOSURE_BSDF_DIFFUSE_TOON_ID: + case CLOSURE_BSDF_GLOSSY_TOON_ID: + return bsdf_toon_merge(a, b); + case CLOSURE_BSDF_HAIR_REFLECTION_ID: + case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: + return bsdf_hair_merge(a, b); +#ifdef __VOLUME__ + case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: + return volume_henyey_greenstein_merge(a, b); +#endif + default: + return false; + } +#endif +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h index 8d7d533d6f8..9929246ae5c 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h @@ -31,28 +31,30 @@ Other than that, the implementation directly follows the paper. CCL_NAMESPACE_BEGIN -ccl_device int bsdf_ashikhmin_shirley_setup(ShaderClosure *sc) +ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf) { - sc->data0 = clamp(sc->data0, 1e-4f, 1.0f); - sc->data1 = sc->data0; + bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); + bsdf->alpha_y = bsdf->alpha_x; - sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID; + bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_ashikhmin_shirley_aniso_setup(ShaderClosure *sc) +ccl_device int bsdf_ashikhmin_shirley_aniso_setup(MicrofacetBsdf *bsdf) { - sc->data0 = clamp(sc->data0, 1e-4f, 1.0f); - sc->data1 = clamp(sc->data1, 1e-4f, 1.0f); + bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); + bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f); - sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID; + bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness) { - sc->data0 = fmaxf(roughness, sc->data0); /* clamp roughness */ - sc->data1 = fmaxf(roughness, sc->data1); + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc; + + bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); + bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); } ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float roughness) @@ -60,16 +62,21 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough return 2.0f / (roughness*roughness) - 2.0f; } -ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device_inline float3 bsdf_ashikhmin_shirley_eval_reflect( + const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float3 N = bsdf->N; float NdotI = dot(N, I); /* in Cycles/OSL convention I is omega_out */ float NdotO = dot(N, omega_in); /* and consequently we use for O omaga_in ;) */ float out = 0.0f; - if(fmaxf(sc->data0, sc->data1) <= 1e-4f) + if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) return make_float3(0.0f, 0.0f, 0.0f); if(NdotI > 0.0f && NdotO > 0.0f) { @@ -82,8 +89,8 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c float pump = 1.0f / fmaxf(1e-6f, (HdotI*fmaxf(NdotO, NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */ /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */ - float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0); - float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1); + float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x); + float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y); if(n_x == n_y) { /* isotropic */ @@ -97,12 +104,18 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c else { /* anisotropic */ float3 X, Y; - make_orthonormals_tangent(N, sc->T, &X, &Y); + make_orthonormals_tangent(N, bsdf->T, &X, &Y); float HdotX = dot(H, X); float HdotY = dot(H, Y); - float e = (n_x * HdotX*HdotX + n_y * HdotY*HdotY) / (1.0f - HdotN*HdotN); - float lobe = powf(HdotN, e); + float lobe; + if(HdotN < 1.0f) { + float e = (n_x * HdotX*HdotX + n_y * HdotY*HdotY) / (1.0f - HdotN*HdotN); + lobe = powf(HdotN, e); + } + else { + lobe = 1.0f; + } float norm = sqrtf((n_x + 1.0f)*(n_y + 1.0f)) / (8.0f * M_PI_F); out = NdotO * norm * lobe * pump; @@ -128,13 +141,14 @@ ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x, f ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float3 N = bsdf->N; float NdotI = dot(N, I); if(NdotI > 0.0f) { - float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0); - float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1); + float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x); + float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y); /* get x,y basis on the surface for anisotropy */ float3 X, Y; @@ -142,7 +156,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, if(n_x == n_y) make_orthonormals(N, &X, &Y); else - make_orthonormals_tangent(N, sc->T, &X, &Y); + make_orthonormals_tangent(N, bsdf->T, &X, &Y); /* sample spherical coords for h in tangent space */ float phi; @@ -193,7 +207,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, /* reflect I on H to get omega_in */ *omega_in = -I + (2.0f * HdotI) * H; - if(fmaxf(sc->data0, sc->data1) <= 1e-4f) { + if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) { /* Some high number for MIS. */ *pdf = 1e6f; *eval = make_float3(1e6f, 1e6f, 1e6f); diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h index f1a26650078..7e0f5a7ec75 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h @@ -35,20 +35,38 @@ CCL_NAMESPACE_BEGIN -ccl_device int bsdf_ashikhmin_velvet_setup(ShaderClosure *sc) +typedef ccl_addr_space struct VelvetBsdf { + SHADER_CLOSURE_BASE; + + float sigma; + float invsigma2; + float3 N; +} VelvetBsdf; + +ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf) { - float sigma = fmaxf(sc->data0, 0.01f); - sc->data0 = 1.0f/(sigma * sigma); /* m_invsigma2 */ + float sigma = fmaxf(bsdf->sigma, 0.01f); + bsdf->invsigma2 = 1.0f/(sigma * sigma); - sc->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID; + bsdf->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } +ccl_device bool bsdf_ashikhmin_velvet_merge(const ShaderClosure *a, const ShaderClosure *b) +{ + const VelvetBsdf *bsdf_a = (const VelvetBsdf*)a; + const VelvetBsdf *bsdf_b = (const VelvetBsdf*)b; + + return (isequal_float3(bsdf_a->N, bsdf_b->N)) && + (bsdf_a->sigma == bsdf_b->sigma); +} + ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float m_invsigma2 = sc->data0; - float3 N = sc->N; + const VelvetBsdf *bsdf = (const VelvetBsdf*)sc; + float m_invsigma2 = bsdf->invsigma2; + float3 N = bsdf->N; float cosNO = dot(N, I); float cosNI = dot(N, omega_in); @@ -90,8 +108,9 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc, c ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float m_invsigma2 = sc->data0; - float3 N = sc->N; + const VelvetBsdf *bsdf = (const VelvetBsdf*)sc; + float m_invsigma2 = bsdf->invsigma2; + float3 N = bsdf->N; // we are viewing the surface from above - send a ray out with uniform // distribution over the hemisphere diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h index 4b29bb096d1..dcd187f9305 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse.h @@ -35,17 +35,31 @@ CCL_NAMESPACE_BEGIN +typedef ccl_addr_space struct DiffuseBsdf { + SHADER_CLOSURE_BASE; + float3 N; +} DiffuseBsdf; + /* DIFFUSE */ -ccl_device int bsdf_diffuse_setup(ShaderClosure *sc) +ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf) { - sc->type = CLOSURE_BSDF_DIFFUSE_ID; + bsdf->type = CLOSURE_BSDF_DIFFUSE_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } +ccl_device bool bsdf_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b) +{ + const DiffuseBsdf *bsdf_a = (const DiffuseBsdf*)a; + const DiffuseBsdf *bsdf_b = (const DiffuseBsdf*)b; + + return (isequal_float3(bsdf_a->N, bsdf_b->N)); +} + ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float3 N = sc->N; + const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; + float3 N = bsdf->N; float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F; *pdf = cos_pi; @@ -59,7 +73,8 @@ ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc, const floa ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float3 N = sc->N; + const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; + float3 N = bsdf->N; // distribution over the hemisphere sample_cos_hemisphere(N, randu, randv, omega_in, pdf); @@ -80,9 +95,9 @@ ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc, float3 Ng, float3 I, /* TRANSLUCENT */ -ccl_device int bsdf_translucent_setup(ShaderClosure *sc) +ccl_device int bsdf_translucent_setup(DiffuseBsdf *bsdf) { - sc->type = CLOSURE_BSDF_TRANSLUCENT_ID; + bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } @@ -93,7 +108,8 @@ ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc, const f ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float3 N = sc->N; + const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; + float3 N = bsdf->N; float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F; *pdf = cos_pi; @@ -102,7 +118,8 @@ ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, const ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float3 N = sc->N; + const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; + float3 N = bsdf->N; // we are viewing the surface from the right side - send a ray out with cosine // distribution over the hemisphere diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h index e0287e7655a..2d982a95fe4 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h @@ -35,7 +35,16 @@ CCL_NAMESPACE_BEGIN -ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const float3 colors[8], float pos) +#ifdef __OSL__ + +typedef ccl_addr_space struct DiffuseRampBsdf { + SHADER_CLOSURE_BASE; + + float3 N; + float3 *colors; +} DiffuseRampBsdf; + +ccl_device float3 bsdf_diffuse_ramp_get_color(const float3 colors[8], float pos) { int MAXCOLORS = 8; @@ -49,11 +58,9 @@ ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const flo return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset; } -ccl_device int bsdf_diffuse_ramp_setup(ShaderClosure *sc) +ccl_device int bsdf_diffuse_ramp_setup(DiffuseRampBsdf *bsdf) { - sc->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID; - sc->data0 = 0.0f; - sc->data1 = 0.0f; + bsdf->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } @@ -61,29 +68,31 @@ ccl_device void bsdf_diffuse_ramp_blur(ShaderClosure *sc, float roughness) { } -ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc, const float3 colors[8], const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float3 N = sc->N; + const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc; + float3 N = bsdf->N; float cos_pi = fmaxf(dot(N, omega_in), 0.0f); *pdf = cos_pi * M_1_PI_F; - return bsdf_diffuse_ramp_get_color(sc, colors, cos_pi) * M_1_PI_F; + return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F; } -ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc, const float3 colors[8], const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, const float3 colors[8], float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float3 N = sc->N; + const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc; + float3 N = bsdf->N; // distribution over the hemisphere sample_cos_hemisphere(N, randu, randv, omega_in, pdf); if(dot(Ng, *omega_in) > 0.0f) { - *eval = bsdf_diffuse_ramp_get_color(sc, colors, *pdf * M_PI_F) * M_1_PI_F; + *eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F; #ifdef __RAY_DIFFERENTIALS__ *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; @@ -95,6 +104,8 @@ ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, const float3 co return LABEL_REFLECT|LABEL_DIFFUSE; } +#endif /* __OSL__ */ + CCL_NAMESPACE_END #endif /* __BSDF_DIFFUSE_RAMP_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index 1e81617a7d3..bede5f45e7e 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -35,29 +35,49 @@ CCL_NAMESPACE_BEGIN +typedef ccl_addr_space struct HairBsdf { + SHADER_CLOSURE_BASE; -ccl_device int bsdf_hair_reflection_setup(ShaderClosure *sc) + float3 T; + float roughness1; + float roughness2; + float offset; +} HairBsdf; + +ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf) { - sc->type = CLOSURE_BSDF_HAIR_REFLECTION_ID; - sc->data0 = clamp(sc->data0, 0.001f, 1.0f); - sc->data1 = clamp(sc->data1, 0.001f, 1.0f); + bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID; + bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f); + bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f); return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_hair_transmission_setup(ShaderClosure *sc) +ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf) { - sc->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID; - sc->data0 = clamp(sc->data0, 0.001f, 1.0f); - sc->data1 = clamp(sc->data1, 0.001f, 1.0f); + bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID; + bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f); + bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f); return SD_BSDF|SD_BSDF_HAS_EVAL; } +ccl_device bool bsdf_hair_merge(const ShaderClosure *a, const ShaderClosure *b) +{ + const HairBsdf *bsdf_a = (const HairBsdf*)a; + const HairBsdf *bsdf_b = (const HairBsdf*)b; + + return (isequal_float3(bsdf_a->T, bsdf_b->T)) && + (bsdf_a->roughness1 == bsdf_b->roughness1) && + (bsdf_a->roughness2 == bsdf_b->roughness2) && + (bsdf_a->offset == bsdf_b->offset); +} + ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float offset = sc->data2; - float3 Tg = sc->T; - float roughness1 = sc->data0; - float roughness2 = sc->data1; + const HairBsdf *bsdf = (const HairBsdf*)sc; + float offset = bsdf->offset; + float3 Tg = bsdf->T; + float roughness1 = bsdf->roughness1; + float roughness2 = bsdf->roughness2; float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); @@ -107,10 +127,11 @@ ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc, co ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float offset = sc->data2; - float3 Tg = sc->T; - float roughness1 = sc->data0; - float roughness2 = sc->data1; + const HairBsdf *bsdf = (const HairBsdf*)sc; + float offset = bsdf->offset; + float3 Tg = bsdf->T; + float roughness1 = bsdf->roughness1; + float roughness2 = bsdf->roughness2; float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); @@ -148,10 +169,11 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float offset = sc->data2; - float3 Tg = sc->T; - float roughness1 = sc->data0; - float roughness2 = sc->data1; + const HairBsdf *bsdf = (const HairBsdf*)sc; + float offset = bsdf->offset; + float3 Tg = bsdf->T; + float roughness1 = bsdf->roughness1; + float roughness2 = bsdf->roughness2; float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); float3 locx = cross(locy, Tg); @@ -198,10 +220,11 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, f ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float offset = sc->data2; - float3 Tg = sc->T; - float roughness1 = sc->data0; - float roughness2 = sc->data1; + const HairBsdf *bsdf = (const HairBsdf*)sc; + float offset = bsdf->offset; + float3 Tg = bsdf->T; + float roughness1 = bsdf->roughness1; + float roughness2 = bsdf->roughness2; float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); float3 locx = cross(locy, Tg); diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index 7bf7c2806d4..9da73f66da0 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -35,6 +35,19 @@ CCL_NAMESPACE_BEGIN +typedef ccl_addr_space struct MicrofacetExtra { + float3 color; +} MicrofacetExtra; + +typedef ccl_addr_space struct MicrofacetBsdf { + SHADER_CLOSURE_BASE; + + float alpha_x, alpha_y, ior; + MicrofacetExtra *extra; + float3 T; + float3 N; +} MicrofacetBsdf; + /* Beckmann and GGX microfacet importance sampling. */ ccl_device_inline void microfacet_beckmann_sample_slopes( @@ -233,48 +246,66 @@ ccl_device_inline float3 microfacet_sample_stretched( * Anisotropy is only supported for reflection currently, but adding it for * transmission is just a matter of copying code from reflection if needed. */ -ccl_device int bsdf_microfacet_ggx_setup(ShaderClosure *sc) +ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf) { - sc->data0 = saturate(sc->data0); /* alpha_x */ - sc->data1 = sc->data0; /* alpha_y */ + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - sc->type = CLOSURE_BSDF_MICROFACET_GGX_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_ggx_aniso_setup(ShaderClosure *sc) +ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosure *b) +{ + const MicrofacetBsdf *bsdf_a = (const MicrofacetBsdf*)a; + const MicrofacetBsdf *bsdf_b = (const MicrofacetBsdf*)b; + + return (isequal_float3(bsdf_a->N, bsdf_b->N)) && + (bsdf_a->alpha_x == bsdf_b->alpha_x) && + (bsdf_a->alpha_y == bsdf_b->alpha_y) && + (isequal_float3(bsdf_a->T, bsdf_b->T)) && + (bsdf_a->ior == bsdf_b->ior) && + ((!bsdf_a->extra && !bsdf_b->extra) || + ((bsdf_a->extra && bsdf_b->extra) && + (isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color)))); +} + +ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf) { - sc->data0 = saturate(sc->data0); /* alpha_x */ - sc->data1 = saturate(sc->data1); /* alpha_y */ + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = saturate(bsdf->alpha_y); - sc->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_ggx_refraction_setup(ShaderClosure *sc) +ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf) { - sc->data0 = saturate(sc->data0); /* alpha_x */ - sc->data1 = sc->data0; /* alpha_y */ + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - sc->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness) { - sc->data0 = fmaxf(roughness, sc->data0); /* alpha_x */ - sc->data1 = fmaxf(roughness, sc->data1); /* alpha_y */ + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc; + + bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); + bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); } ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float alpha_x = sc->data0; - float alpha_y = sc->data1; - bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + float3 N = bsdf->N; if(m_refractive || alpha_x*alpha_y <= 1e-7f) return make_float3(0.0f, 0.0f, 0.0f); @@ -305,7 +336,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons else { /* anisotropic */ float3 X, Y, Z = N; - make_orthonormals_tangent(Z, sc->T, &X, &Y); + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); /* distribution */ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); @@ -361,11 +392,12 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float alpha_x = sc->data0; - float alpha_y = sc->data1; - float m_eta = sc->data2; - bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + float m_eta = bsdf->ior; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + float3 N = bsdf->N; if(!m_refractive || alpha_x*alpha_y <= 1e-7f) return make_float3(0.0f, 0.0f, 0.0f); @@ -415,10 +447,11 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float alpha_x = sc->data0; - float alpha_y = sc->data1; - bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + float3 N = bsdf->N; float cosNO = dot(N, I); if(cosNO > 0) { @@ -427,7 +460,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure if(alpha_x == alpha_y) make_orthonormals(Z, &X, &Y); else - make_orthonormals_tangent(Z, sc->T, &X, &Y); + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); /* importance sampling with distribution of visible normals. vectors are * transformed to local space before and after */ @@ -522,7 +555,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure #ifdef __RAY_DIFFERENTIALS__ float3 dRdx, dRdy, dTdx, dTdy; #endif - float m_eta = sc->data2, fresnel; + float m_eta = bsdf->ior, fresnel; bool inside; fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, @@ -582,37 +615,39 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure * Microfacet Models for Refraction through Rough Surfaces * B. Walter, S. R. Marschner, H. Li, K. E. Torrance, EGSR 2007 */ -ccl_device int bsdf_microfacet_beckmann_setup(ShaderClosure *sc) +ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf) { - sc->data0 = saturate(sc->data0); /* alpha_x */ - sc->data1 = sc->data0; /* alpha_y */ + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_beckmann_aniso_setup(ShaderClosure *sc) +ccl_device int bsdf_microfacet_beckmann_aniso_setup(MicrofacetBsdf *bsdf) { - sc->data0 = saturate(sc->data0); /* alpha_x */ - sc->data1 = saturate(sc->data1); /* alpha_y */ + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = saturate(bsdf->alpha_y); - sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_beckmann_refraction_setup(ShaderClosure *sc) +ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf) { - sc->data0 = saturate(sc->data0); /* alpha_x */ - sc->data1 = sc->data0; /* alpha_y */ + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness) { - sc->data0 = fmaxf(roughness, sc->data0); /* alpha_x */ - sc->data1 = fmaxf(roughness, sc->data1); /* alpha_y */ + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc; + + bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); + bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); } ccl_device_inline float bsdf_beckmann_G1(float alpha, float cos_n) @@ -647,10 +682,11 @@ ccl_device_inline float bsdf_beckmann_aniso_G1(float alpha_x, float alpha_y, flo ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float alpha_x = sc->data0; - float alpha_y = sc->data1; - bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + float3 N = bsdf->N; if(m_refractive || alpha_x*alpha_y <= 1e-7f) return make_float3(0.0f, 0.0f, 0.0f); @@ -682,7 +718,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, else { /* anisotropic */ float3 X, Y, Z = N; - make_orthonormals_tangent(Z, sc->T, &X, &Y); + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); /* distribution */ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); @@ -722,11 +758,12 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float alpha_x = sc->data0; - float alpha_y = sc->data1; - float m_eta = sc->data2; - bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + float m_eta = bsdf->ior; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + float3 N = bsdf->N; if(!m_refractive || alpha_x*alpha_y <= 1e-7f) return make_float3(0.0f, 0.0f, 0.0f); @@ -773,10 +810,11 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float alpha_x = sc->data0; - float alpha_y = sc->data1; - bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + float3 N = bsdf->N; float cosNO = dot(N, I); if(cosNO > 0) { @@ -785,7 +823,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl if(alpha_x == alpha_y) make_orthonormals(Z, &X, &Y); else - make_orthonormals_tangent(Z, sc->T, &X, &Y); + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); /* importance sampling with distribution of visible normals. vectors are * transformed to local space before and after */ @@ -872,7 +910,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl #ifdef __RAY_DIFFERENTIALS__ float3 dRdx, dRdy, dTdx, dTdy; #endif - float m_eta = sc->data2, fresnel; + float m_eta = bsdf->ior, fresnel; bool inside; fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h index 51b12fe4e45..df848c3d179 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h @@ -42,7 +42,7 @@ ccl_device_inline float D_ggx_aniso(const float3 wm, const float2 alpha) /* Sample slope distribution (based on page 14 of the supplemental implementation). */ ccl_device_inline float2 mf_sampleP22_11(const float cosI, const float2 randU) { - if(cosI > 0.9999f) { + if(cosI > 0.9999f || cosI < 1e-6f) { const float r = sqrtf(randU.x / (1.0f - randU.x)); const float phi = M_2PI_F * randU.y; return make_float2(r*cosf(phi), r*sinf(phi)); @@ -117,7 +117,7 @@ ccl_device_inline float3 mf_eval_phase_glossy(const float3 w, const float lambda if(dotW_WH < 0.0f) return make_float3(0.0f, 0.0f, 0.0f); - float phase = max(0.0f, dotW_WH) * 0.25f / (pArea * dotW_WH); + float phase = max(0.0f, dotW_WH) * 0.25f / max(pArea * dotW_WH, 1e-7f); if(alpha.x == alpha.y) phase *= D_ggx(wh, alpha.x); else @@ -200,9 +200,9 @@ ccl_device_inline float mf_lambda(const float3 w, const float2 alpha) if(w.z > 0.9999f) return 0.0f; else if(w.z < -0.9999f) - return -1.0f; + return -0.9999f; - const float inv_wz2 = 1.0f / (w.z*w.z); + const float inv_wz2 = 1.0f / max(w.z*w.z, 1e-7f); const float2 wa = make_float2(w.x, w.y)*alpha; float v = sqrtf(1.0f + dot(wa, wa) * inv_wz2); if(w.z <= 0.0f) @@ -271,7 +271,10 @@ ccl_device_inline float mf_ggx_albedo(float r) ccl_device_inline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha) { - return 0.25f * D_ggx(normalize(wi+wo), alpha) / ((1.0f + mf_lambda(wi, make_float2(alpha, alpha))) * wi.z) + (1.0f - mf_ggx_albedo(alpha)) * wo.z; + float D = D_ggx(normalize(wi+wo), alpha); + float lambda = mf_lambda(wi, make_float2(alpha, alpha)); + float albedo = mf_ggx_albedo(alpha); + return 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f) + (1.0f - albedo) * wo.z; } ccl_device_inline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha) @@ -325,40 +328,42 @@ ccl_device_inline float mf_glass_pdf(const float3 wi, const float3 wo, const flo ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughness) { - sc->data0 = fmaxf(roughness, sc->data0); /* alpha_x */ - sc->data1 = fmaxf(roughness, sc->data1); /* alpha_y */ + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc; + + bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); + bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); } /* === Closure implementations === */ /* Multiscattering GGX Glossy closure */ -ccl_device int bsdf_microfacet_multi_ggx_common_setup(ShaderClosure *sc) +ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf) { - sc->data0 = clamp(sc->data0, 1e-4f, 1.0f); /* alpha */ - sc->data1 = clamp(sc->data1, 1e-4f, 1.0f); - sc->custom1 = saturate(sc->custom1); /* color */ - sc->custom2 = saturate(sc->custom2); - sc->custom3 = saturate(sc->custom3); + bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); + bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f); + bsdf->extra->color.x = saturate(bsdf->extra->color.x); + bsdf->extra->color.y = saturate(bsdf->extra->color.y); + bsdf->extra->color.z = saturate(bsdf->extra->color.z); - sc->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG|SD_BSDF_HAS_CUSTOM; + return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; } -ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(ShaderClosure *sc) +ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf) { - if(is_zero(sc->T)) - sc->T = make_float3(1.0f, 0.0f, 0.0f); + if(is_zero(bsdf->T)) + bsdf->T = make_float3(1.0f, 0.0f, 0.0f); - return bsdf_microfacet_multi_ggx_common_setup(sc); + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } -ccl_device int bsdf_microfacet_multi_ggx_setup(ShaderClosure *sc) +ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf) { - sc->data1 = sc->data0; + bsdf->alpha_y = bsdf->alpha_x; - return bsdf_microfacet_multi_ggx_common_setup(sc); + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) { @@ -367,11 +372,12 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *s } ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) { - bool is_aniso = (sc->data0 != sc->data1); + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y); float3 X, Y, Z; - Z = sc->N; + Z = bsdf->N; if(is_aniso) - make_orthonormals_tangent(Z, sc->T, &X, &Y); + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); else make_orthonormals(Z, &X, &Y); @@ -379,94 +385,115 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); if(is_aniso) - *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(sc->data0, sc->data1)); + *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y)); else - *pdf = mf_ggx_pdf(localI, localO, sc->data0); - return mf_eval_glossy(localI, localO, true, make_float3(sc->custom1, sc->custom2, sc->custom3), sc->data0, sc->data1, lcg_state, NULL, NULL); + *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x); + return mf_eval_glossy(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL); } ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state) { - bool is_aniso = (sc->data0 != sc->data1); + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y); float3 X, Y, Z; - Z = sc->N; + Z = bsdf->N; if(is_aniso) - make_orthonormals_tangent(Z, sc->T, &X, &Y); + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); else make_orthonormals(Z, &X, &Y); float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); float3 localO; - *eval = mf_sample_glossy(localI, &localO, make_float3(sc->custom1, sc->custom2, sc->custom3), sc->data0, sc->data1, lcg_state, NULL, NULL); + *eval = mf_sample_glossy(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL); if(is_aniso) - *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(sc->data0, sc->data1)); + *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y)); else - *pdf = mf_ggx_pdf(localI, localO, sc->data0); + *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x); *eval *= *pdf; *omega_in = X*localO.x + Y*localO.y + Z*localO.z; +#ifdef __RAY_DIFFERENTIALS__ + *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; + *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; +#endif return LABEL_REFLECT|LABEL_GLOSSY; } /* Multiscattering GGX Glass closure */ -ccl_device int bsdf_microfacet_multi_ggx_glass_setup(ShaderClosure *sc) +ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf) { - sc->data0 = clamp(sc->data0, 1e-4f, 1.0f); /* alpha */ - sc->data1 = sc->data0; - sc->data2 = max(0.0f, sc->data2); /* ior */ - sc->custom1 = saturate(sc->custom1); /* color */ - sc->custom2 = saturate(sc->custom2); - sc->custom3 = saturate(sc->custom3); + bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); + bsdf->alpha_y = bsdf->alpha_x; + bsdf->ior = max(0.0f, bsdf->ior); + bsdf->extra->color.x = saturate(bsdf->extra->color.x); + bsdf->extra->color.y = saturate(bsdf->extra->color.y); + bsdf->extra->color.z = saturate(bsdf->extra->color.z); - sc->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG|SD_BSDF_HAS_CUSTOM; + return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; } ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) { + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; float3 X, Y, Z; - Z = sc->N; + Z = bsdf->N; make_orthonormals(Z, &X, &Y); float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); - *pdf = mf_glass_pdf(localI, localO, sc->data0, sc->data2); - return mf_eval_glass(localI, localO, false, make_float3(sc->custom1, sc->custom2, sc->custom3), sc->data0, sc->data1, lcg_state, sc->data2); + *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior); + return mf_eval_glass(localI, localO, false, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior); } ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) { + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; float3 X, Y, Z; - Z = sc->N; + Z = bsdf->N; make_orthonormals(Z, &X, &Y); float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); - *pdf = mf_glass_pdf(localI, localO, sc->data0, sc->data2); - return mf_eval_glass(localI, localO, true, make_float3(sc->custom1, sc->custom2, sc->custom3), sc->data0, sc->data1, lcg_state, sc->data2); + *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior); + return mf_eval_glass(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior); } ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state) { + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; float3 X, Y, Z; - Z = sc->N; + Z = bsdf->N; make_orthonormals(Z, &X, &Y); float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); float3 localO; - *eval = mf_sample_glass(localI, &localO, make_float3(sc->custom1, sc->custom2, sc->custom3), sc->data0, sc->data1, lcg_state, sc->data2); - *pdf = mf_glass_pdf(localI, localO, sc->data0, sc->data2); + *eval = mf_sample_glass(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior); + *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior); *eval *= *pdf; *omega_in = X*localO.x + Y*localO.y + Z*localO.z; - if(localO.z*localI.z > 0.0f) + if(localO.z*localI.z > 0.0f) { +#ifdef __RAY_DIFFERENTIALS__ + *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; + *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; +#endif return LABEL_REFLECT|LABEL_GLOSSY; - else + } + else { +#ifdef __RAY_DIFFERENTIALS__ + float cosI = dot(Z, I); + float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI*cosI))), 1e-7f); + *domega_in_dx = -(bsdf->ior * dIdx) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z; + *domega_in_dy = -(bsdf->ior * dIdy) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z; +#endif + return LABEL_TRANSMIT|LABEL_GLOSSY; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h index afd4a8da62a..6ebe2f6a751 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h @@ -25,11 +25,18 @@ * energy is used. In combination with MIS, that is enough to produce an unbiased result, although * the balance heuristic isn't necessarily optimal anymore. */ -ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const bool wo_outside, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint* lcg_state +ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)( + float3 wi, + float3 wo, + const bool wo_outside, + const float3 color, + const float alpha_x, + const float alpha_y, + ccl_addr_space uint *lcg_state #ifdef MF_MULTI_GLASS - , const float eta + , const float eta #elif defined(MF_MULTI_GLOSSY) - , float3 *n, float3 *k + , float3 *n, float3 *k #endif ) { diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h index 61b7cb11b02..cb342a026ef 100644 --- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h +++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h @@ -19,39 +19,59 @@ CCL_NAMESPACE_BEGIN +typedef ccl_addr_space struct OrenNayarBsdf { + SHADER_CLOSURE_BASE; + + float3 N; + float roughness; + float a; + float b; +} OrenNayarBsdf; + ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc, float3 n, float3 v, float3 l) { + const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc; float nl = max(dot(n, l), 0.0f); float nv = max(dot(n, v), 0.0f); float t = dot(l, v) - nl * nv; if(t > 0.0f) t /= max(nl, nv) + FLT_MIN; - float is = nl * (sc->data0 + sc->data1 * t); + float is = nl * (bsdf->a + bsdf->b * t); return make_float3(is, is, is); } -ccl_device int bsdf_oren_nayar_setup(ShaderClosure *sc) +ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf) { - float sigma = sc->data0; + float sigma = bsdf->roughness; - sc->type = CLOSURE_BSDF_OREN_NAYAR_ID; + bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID; sigma = saturate(sigma); float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma); - sc->data0 = 1.0f * div; - sc->data1 = sigma * div; + bsdf->a = 1.0f * div; + bsdf->b = sigma * div; return SD_BSDF|SD_BSDF_HAS_EVAL; } +ccl_device bool bsdf_oren_nayar_merge(const ShaderClosure *a, const ShaderClosure *b) +{ + const OrenNayarBsdf *bsdf_a = (const OrenNayarBsdf*)a; + const OrenNayarBsdf *bsdf_b = (const OrenNayarBsdf*)b; + + return (isequal_float3(bsdf_a->N, bsdf_b->N)) && + (bsdf_a->roughness == bsdf_b->roughness); +} + ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - if(dot(sc->N, omega_in) > 0.0f) { + const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc; + if(dot(bsdf->N, omega_in) > 0.0f) { *pdf = 0.5f * M_1_PI_F; - return bsdf_oren_nayar_get_intensity(sc, sc->N, I, omega_in); + return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in); } else { *pdf = 0.0f; @@ -66,15 +86,16 @@ ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc, const f ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - sample_uniform_hemisphere(sc->N, randu, randv, omega_in, pdf); + const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc; + sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf); if(dot(Ng, *omega_in) > 0.0f) { - *eval = bsdf_oren_nayar_get_intensity(sc, sc->N, I, *omega_in); + *eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in); #ifdef __RAY_DIFFERENTIALS__ // TODO: find a better approximation for the bounce - *domega_in_dx = (2.0f * dot(sc->N, dIdx)) * sc->N - dIdx; - *domega_in_dy = (2.0f * dot(sc->N, dIdy)) * sc->N - dIdy; + *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; #endif } else { diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h index 1ab15eee954..e152a8780db 100644 --- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h @@ -35,7 +35,17 @@ CCL_NAMESPACE_BEGIN -ccl_device float3 bsdf_phong_ramp_get_color(const ShaderClosure *sc, const float3 colors[8], float pos) +#ifdef __OSL__ + +typedef ccl_addr_space struct PhongRampBsdf { + SHADER_CLOSURE_BASE; + + float3 N; + float exponent; + float3 *colors; +} PhongRampBsdf; + +ccl_device float3 bsdf_phong_ramp_get_color(const float3 colors[8], float pos) { int MAXCOLORS = 8; @@ -49,57 +59,54 @@ ccl_device float3 bsdf_phong_ramp_get_color(const ShaderClosure *sc, const float return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset; } -ccl_device int bsdf_phong_ramp_setup(ShaderClosure *sc) +ccl_device int bsdf_phong_ramp_setup(PhongRampBsdf *bsdf) { - sc->type = CLOSURE_BSDF_PHONG_RAMP_ID; - sc->data0 = max(sc->data0, 0.0f); - sc->data1 = 0.0f; + bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID; + bsdf->exponent = max(bsdf->exponent, 0.0f); return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device void bsdf_phong_ramp_blur(ShaderClosure *sc, float roughness) -{ -} - -ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, const float3 colors[8], const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float m_exponent = sc->data0; - float cosNI = dot(sc->N, omega_in); - float cosNO = dot(sc->N, I); + const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc; + float m_exponent = bsdf->exponent; + float cosNI = dot(bsdf->N, omega_in); + float cosNO = dot(bsdf->N, I); if(cosNI > 0 && cosNO > 0) { // reflect the view vector - float3 R = (2 * cosNO) * sc->N - I; + float3 R = (2 * cosNO) * bsdf->N - I; float cosRI = dot(R, omega_in); if(cosRI > 0) { float cosp = powf(cosRI, m_exponent); float common = 0.5f * M_1_PI_F * cosp; float out = cosNI * (m_exponent + 2) * common; *pdf = (m_exponent + 1) * common; - return bsdf_phong_ramp_get_color(sc, colors, cosp) * out; + return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out; } } return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc, const float3 colors[8], const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colors[8], float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float cosNO = dot(sc->N, I); - float m_exponent = sc->data0; + const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc; + float cosNO = dot(bsdf->N, I); + float m_exponent = bsdf->exponent; if(cosNO > 0) { // reflect the view vector - float3 R = (2 * cosNO) * sc->N - I; + float3 R = (2 * cosNO) * bsdf->N - I; #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(sc->N, dIdx)) * sc->N - dIdx; - *domega_in_dy = (2 * dot(sc->N, dIdy)) * sc->N - dIdy; + *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; #endif float3 T, B; @@ -114,7 +121,7 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo if(dot(Ng, *omega_in) > 0.0f) { // common terms for pdf and eval - float cosNI = dot(sc->N, *omega_in); + float cosNI = dot(bsdf->N, *omega_in); // make sure the direction we chose is still in the right hemisphere if(cosNI > 0) { @@ -122,13 +129,14 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo float common = 0.5f * M_1_PI_F * cosp; *pdf = (m_exponent + 1) * common; float out = cosNI * (m_exponent + 2) * common; - *eval = bsdf_phong_ramp_get_color(sc, colors, cosp) * out; + *eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out; } } } return LABEL_REFLECT|LABEL_GLOSSY; } +#endif /* __OSL__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h index 303f4c9ce34..1d21614ecee 100644 --- a/intern/cycles/kernel/closure/bsdf_reflection.h +++ b/intern/cycles/kernel/closure/bsdf_reflection.h @@ -37,9 +37,9 @@ CCL_NAMESPACE_BEGIN /* REFLECTION */ -ccl_device int bsdf_reflection_setup(ShaderClosure *sc) +ccl_device int bsdf_reflection_setup(MicrofacetBsdf *bsdf) { - sc->type = CLOSURE_BSDF_REFLECTION_ID; + bsdf->type = CLOSURE_BSDF_REFLECTION_ID; return SD_BSDF; } @@ -55,7 +55,8 @@ ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc, const f ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float3 N = bsdf->N; // only one direction is possible float cosNO = dot(N, I); diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h index c78a4b67134..050a4e76fa9 100644 --- a/intern/cycles/kernel/closure/bsdf_refraction.h +++ b/intern/cycles/kernel/closure/bsdf_refraction.h @@ -37,9 +37,9 @@ CCL_NAMESPACE_BEGIN /* REFRACTION */ -ccl_device int bsdf_refraction_setup(ShaderClosure *sc) +ccl_device int bsdf_refraction_setup(MicrofacetBsdf *bsdf) { - sc->type = CLOSURE_BSDF_REFRACTION_ID; + bsdf->type = CLOSURE_BSDF_REFRACTION_ID; return SD_BSDF; } @@ -55,8 +55,9 @@ ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc, const f ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float m_eta = sc->data0; - float3 N = sc->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + float m_eta = bsdf->ior; + float3 N = bsdf->N; float3 R, T; #ifdef __RAY_DIFFERENTIALS__ diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h index e5b6ab93a64..28e775bcbc8 100644 --- a/intern/cycles/kernel/closure/bsdf_toon.h +++ b/intern/cycles/kernel/closure/bsdf_toon.h @@ -35,17 +35,35 @@ CCL_NAMESPACE_BEGIN +typedef ccl_addr_space struct ToonBsdf { + SHADER_CLOSURE_BASE; + + float3 N; + float size; + float smooth; +} ToonBsdf; + /* DIFFUSE TOON */ -ccl_device int bsdf_diffuse_toon_setup(ShaderClosure *sc) +ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf) { - sc->type = CLOSURE_BSDF_DIFFUSE_TOON_ID; - sc->data0 = saturate(sc->data0); - sc->data1 = saturate(sc->data1); + bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID; + bsdf->size = saturate(bsdf->size); + bsdf->smooth = saturate(bsdf->smooth); return SD_BSDF|SD_BSDF_HAS_EVAL; } +ccl_device bool bsdf_toon_merge(const ShaderClosure *a, const ShaderClosure *b) +{ + const ToonBsdf *bsdf_a = (const ToonBsdf*)a; + const ToonBsdf *bsdf_b = (const ToonBsdf*)b; + + return (isequal_float3(bsdf_a->N, bsdf_b->N)) && + (bsdf_a->size == bsdf_b->size) && + (bsdf_a->smooth == bsdf_b->smooth); +} + ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float angle) { float is; @@ -67,9 +85,10 @@ ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth) ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float max_angle = sc->data0*M_PI_2_F; - float smooth = sc->data1*M_PI_2_F; - float angle = safe_acosf(fmaxf(dot(sc->N, omega_in), 0.0f)); + const ToonBsdf *bsdf = (const ToonBsdf*)sc; + float max_angle = bsdf->size*M_PI_2_F; + float smooth = bsdf->smooth*M_PI_2_F; + float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f)); float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle); @@ -90,21 +109,22 @@ ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc, const ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float max_angle = sc->data0*M_PI_2_F; - float smooth = sc->data1*M_PI_2_F; + const ToonBsdf *bsdf = (const ToonBsdf*)sc; + float max_angle = bsdf->size*M_PI_2_F; + float smooth = bsdf->smooth*M_PI_2_F; float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); float angle = sample_angle*randu; if(sample_angle > 0.0f) { - sample_uniform_cone(sc->N, sample_angle, randu, randv, omega_in, pdf); + sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf); if(dot(Ng, *omega_in) > 0.0f) { *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle); #ifdef __RAY_DIFFERENTIALS__ // TODO: find a better approximation for the bounce - *domega_in_dx = (2.0f * dot(sc->N, dIdx)) * sc->N - dIdx; - *domega_in_dy = (2.0f * dot(sc->N, dIdy)) * sc->N - dIdy; + *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; #endif } else @@ -117,25 +137,26 @@ ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, float3 Ng, floa /* GLOSSY TOON */ -ccl_device int bsdf_glossy_toon_setup(ShaderClosure *sc) +ccl_device int bsdf_glossy_toon_setup(ToonBsdf *bsdf) { - sc->type = CLOSURE_BSDF_GLOSSY_TOON_ID; - sc->data0 = saturate(sc->data0); - sc->data1 = saturate(sc->data1); + bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID; + bsdf->size = saturate(bsdf->size); + bsdf->smooth = saturate(bsdf->smooth); return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - float max_angle = sc->data0*M_PI_2_F; - float smooth = sc->data1*M_PI_2_F; - float cosNI = dot(sc->N, omega_in); - float cosNO = dot(sc->N, I); + const ToonBsdf *bsdf = (const ToonBsdf*)sc; + float max_angle = bsdf->size*M_PI_2_F; + float smooth = bsdf->smooth*M_PI_2_F; + float cosNI = dot(bsdf->N, omega_in); + float cosNO = dot(bsdf->N, I); if(cosNI > 0 && cosNO > 0) { /* reflect the view vector */ - float3 R = (2 * cosNO) * sc->N - I; + float3 R = (2 * cosNO) * bsdf->N - I; float cosRI = dot(R, omega_in); float angle = safe_acosf(fmaxf(cosRI, 0.0f)); @@ -157,13 +178,14 @@ ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc, const ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float max_angle = sc->data0*M_PI_2_F; - float smooth = sc->data1*M_PI_2_F; - float cosNO = dot(sc->N, I); + const ToonBsdf *bsdf = (const ToonBsdf*)sc; + float max_angle = bsdf->size*M_PI_2_F; + float smooth = bsdf->smooth*M_PI_2_F; + float cosNO = dot(bsdf->N, I); if(cosNO > 0) { /* reflect the view vector */ - float3 R = (2 * cosNO) * sc->N - I; + float3 R = (2 * cosNO) * bsdf->N - I; float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); float angle = sample_angle*randu; @@ -171,15 +193,15 @@ ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, float3 Ng, float sample_uniform_cone(R, sample_angle, randu, randv, omega_in, pdf); if(dot(Ng, *omega_in) > 0.0f) { - float cosNI = dot(sc->N, *omega_in); + float cosNI = dot(bsdf->N, *omega_in); /* make sure the direction we chose is still in the right hemisphere */ if(cosNI > 0) { *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle); #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(sc->N, dIdx)) * sc->N - dIdx; - *domega_in_dy = (2 * dot(sc->N, dIdy)) * sc->N - dIdy; + *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; #endif } else diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index c24720cefbe..35c95768b69 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -19,6 +19,17 @@ CCL_NAMESPACE_BEGIN +typedef ccl_addr_space struct Bssrdf { + SHADER_CLOSURE_BASE; + + float radius; + float sharpness; + float d; + float texture_blur; + float albedo; + float3 N; +} Bssrdf; + /* Planar Truncated Gaussian * * Note how this is different from the typical gaussian, this one integrates @@ -28,11 +39,12 @@ CCL_NAMESPACE_BEGIN /* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */ #define GAUSS_TRUNCATE 12.46f -ccl_device float bssrdf_gaussian_eval(ShaderClosure *sc, float r) +ccl_device float bssrdf_gaussian_eval(const ShaderClosure *sc, float r) { /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm * = 1 - exp(-Rm*Rm/(2*v)) */ - const float v = sc->data0*sc->data0*(0.25f*0.25f); + const Bssrdf *bssrdf = (const Bssrdf*)sc; + const float v = bssrdf->radius*bssrdf->radius*(0.25f*0.25f); const float Rm = sqrtf(v*GAUSS_TRUNCATE); if(r >= Rm) @@ -41,7 +53,7 @@ ccl_device float bssrdf_gaussian_eval(ShaderClosure *sc, float r) return expf(-r*r/(2.0f*v))/(2.0f*M_PI_F*v); } -ccl_device float bssrdf_gaussian_pdf(ShaderClosure *sc, float r) +ccl_device float bssrdf_gaussian_pdf(const ShaderClosure *sc, float r) { /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE); @@ -49,12 +61,12 @@ ccl_device float bssrdf_gaussian_pdf(ShaderClosure *sc, float r) return bssrdf_gaussian_eval(sc, r) * (1.0f/(area_truncated)); } -ccl_device void bssrdf_gaussian_sample(ShaderClosure *sc, float xi, float *r, float *h) +ccl_device void bssrdf_gaussian_sample(const ShaderClosure *sc, float xi, float *r, float *h) { /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v)) * r = sqrt(-2*v*logf(xi)) */ - - const float v = sc->data0*sc->data0*(0.25f*0.25f); + const Bssrdf *bssrdf = (const Bssrdf*)sc; + const float v = bssrdf->radius*bssrdf->radius*(0.25f*0.25f); const float Rm = sqrtf(v*GAUSS_TRUNCATE); /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ @@ -75,12 +87,13 @@ ccl_device void bssrdf_gaussian_sample(ShaderClosure *sc, float xi, float *r, fl * far as I can tell has no closed form solution. So we get an iterative solution * instead with newton-raphson. */ -ccl_device float bssrdf_cubic_eval(ShaderClosure *sc, float r) +ccl_device float bssrdf_cubic_eval(const ShaderClosure *sc, float r) { - const float sharpness = sc->T.x; + const Bssrdf *bssrdf = (const Bssrdf*)sc; + const float sharpness = bssrdf->sharpness; if(sharpness == 0.0f) { - const float Rm = sc->data0; + const float Rm = bssrdf->radius; if(r >= Rm) return 0.0f; @@ -94,7 +107,7 @@ ccl_device float bssrdf_cubic_eval(ShaderClosure *sc, float r) } else { - float Rm = sc->data0*(1.0f + sharpness); + float Rm = bssrdf->radius*(1.0f + sharpness); if(r >= Rm) return 0.0f; @@ -122,13 +135,13 @@ ccl_device float bssrdf_cubic_eval(ShaderClosure *sc, float r) } } -ccl_device float bssrdf_cubic_pdf(ShaderClosure *sc, float r) +ccl_device float bssrdf_cubic_pdf(const ShaderClosure *sc, float r) { return bssrdf_cubic_eval(sc, r); } /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */ -ccl_device float bssrdf_cubic_quintic_root_find(float xi) +ccl_device_inline float bssrdf_cubic_quintic_root_find(float xi) { /* newton-raphson iteration, usually succeeds in 2-4 iterations, except * outside 0.02 ... 0.98 where it can go up to 10, so overall performance @@ -155,12 +168,13 @@ ccl_device float bssrdf_cubic_quintic_root_find(float xi) return x; } -ccl_device void bssrdf_cubic_sample(ShaderClosure *sc, float xi, float *r, float *h) +ccl_device void bssrdf_cubic_sample(const ShaderClosure *sc, float xi, float *r, float *h) { - float Rm = sc->data0; + const Bssrdf *bssrdf = (const Bssrdf*)sc; + const float sharpness = bssrdf->sharpness; + float Rm = bssrdf->radius; float r_ = bssrdf_cubic_quintic_root_find(xi); - const float sharpness = sc->T.x; if(sharpness != 0.0f) { r_ = powf(r_, 1.0f + sharpness); Rm *= (1.0f + sharpness); @@ -198,21 +212,22 @@ ccl_device_inline float bssrdf_burley_compatible_mfp(float r) return 0.25f * M_1_PI_F * r; } -ccl_device void bssrdf_burley_setup(ShaderClosure *sc) +ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf) { /* Mean free path length. */ - const float l = bssrdf_burley_compatible_mfp(sc->data0); + const float l = bssrdf_burley_compatible_mfp(bssrdf->radius); /* Surface albedo. */ - const float A = sc->data2; + const float A = bssrdf->albedo; const float s = bssrdf_burley_fitting(A); const float d = l / s; - sc->custom1 = d; + bssrdf->d = d; } -ccl_device float bssrdf_burley_eval(ShaderClosure *sc, float r) +ccl_device float bssrdf_burley_eval(const ShaderClosure *sc, float r) { - const float d = sc->custom1; + const Bssrdf *bssrdf = (const Bssrdf*)sc; + const float d = bssrdf->d; const float Rm = BURLEY_TRUNCATE * d; if(r >= Rm) @@ -231,7 +246,7 @@ ccl_device float bssrdf_burley_eval(ShaderClosure *sc, float r) return (exp_r_d + exp_r_3_d) / (4.0f*d); } -ccl_device float bssrdf_burley_pdf(ShaderClosure *sc, float r) +ccl_device float bssrdf_burley_pdf(const ShaderClosure *sc, float r) { return bssrdf_burley_eval(sc, r) * (1.0f/BURLEY_TRUNCATE_CDF); } @@ -240,7 +255,7 @@ ccl_device float bssrdf_burley_pdf(ShaderClosure *sc, float r) * Returns scaled radius, meaning the result is to be scaled up by d. * Since there's no closed form solution we do Newton-Raphson method to find it. */ -ccl_device float bssrdf_burley_root_find(float xi) +ccl_device_inline float bssrdf_burley_root_find(float xi) { const float tolerance = 1e-6f; const int max_iteration_count = 10; @@ -276,12 +291,13 @@ ccl_device float bssrdf_burley_root_find(float xi) return r; } -ccl_device void bssrdf_burley_sample(ShaderClosure *sc, +ccl_device void bssrdf_burley_sample(const ShaderClosure *sc, float xi, float *r, float *h) { - const float d = sc->custom1; + const Bssrdf *bssrdf = (const Bssrdf*)sc; + const float d = bssrdf->d; const float Rm = BURLEY_TRUNCATE * d; const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d; @@ -295,26 +311,29 @@ ccl_device void bssrdf_burley_sample(ShaderClosure *sc, * * Samples distributed over disk with no falloff, for reference. */ -ccl_device float bssrdf_none_eval(ShaderClosure *sc, float r) +ccl_device float bssrdf_none_eval(const ShaderClosure *sc, float r) { - const float Rm = sc->data0; + const Bssrdf *bssrdf = (const Bssrdf*)sc; + const float Rm = bssrdf->radius; return (r < Rm)? 1.0f: 0.0f; } -ccl_device float bssrdf_none_pdf(ShaderClosure *sc, float r) +ccl_device float bssrdf_none_pdf(const ShaderClosure *sc, float r) { /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */ - const float Rm = sc->data0; + const Bssrdf *bssrdf = (const Bssrdf*)sc; + const float Rm = bssrdf->radius; const float area = (M_PI_F*Rm*Rm); return bssrdf_none_eval(sc, r) / area; } -ccl_device void bssrdf_none_sample(ShaderClosure *sc, float xi, float *r, float *h) +ccl_device void bssrdf_none_sample(const ShaderClosure *sc, float xi, float *r, float *h) { /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2 * r = sqrt(xi)*Rm */ - const float Rm = sc->data0; + const Bssrdf *bssrdf = (const Bssrdf*)sc; + const float Rm = bssrdf->radius; const float r_ = sqrtf(xi)*Rm; *r = r_; @@ -325,30 +344,42 @@ ccl_device void bssrdf_none_sample(ShaderClosure *sc, float xi, float *r, float /* Generic */ -ccl_device int bssrdf_setup(ShaderClosure *sc, ClosureType type) +ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight) +{ + Bssrdf *bssrdf = (Bssrdf*)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight); + + if(!bssrdf) + return NULL; + + float sample_weight = fabsf(average(weight)); + bssrdf->sample_weight = sample_weight; + return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL; +} + +ccl_device int bssrdf_setup(Bssrdf *bssrdf, ClosureType type) { - if(sc->data0 < BSSRDF_MIN_RADIUS) { + if(bssrdf->radius < BSSRDF_MIN_RADIUS) { /* revert to diffuse BSDF if radius too small */ - sc->data0 = 0.0f; - sc->data1 = 0.0f; - int flag = bsdf_diffuse_setup(sc); - sc->type = CLOSURE_BSDF_BSSRDF_ID; + DiffuseBsdf *bsdf = (DiffuseBsdf*)bssrdf; + bsdf->N = bssrdf->N; + int flag = bsdf_diffuse_setup(bsdf); + bsdf->type = CLOSURE_BSDF_BSSRDF_ID; return flag; } else { - sc->data1 = saturate(sc->data1); /* texture blur */ - sc->T.x = saturate(sc->T.x); /* sharpness */ - sc->type = type; + bssrdf->texture_blur = saturate(bssrdf->texture_blur); + bssrdf->sharpness = saturate(bssrdf->sharpness); + bssrdf->type = type; if(type == CLOSURE_BSSRDF_BURLEY_ID) { - bssrdf_burley_setup(sc); + bssrdf_burley_setup(bssrdf); } return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF; } } -ccl_device void bssrdf_sample(ShaderClosure *sc, float xi, float *r, float *h) +ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h) { if(sc->type == CLOSURE_BSSRDF_CUBIC_ID) bssrdf_cubic_sample(sc, xi, r, h); @@ -358,7 +389,7 @@ ccl_device void bssrdf_sample(ShaderClosure *sc, float xi, float *r, float *h) bssrdf_burley_sample(sc, xi, r, h); } -ccl_device float bssrdf_pdf(ShaderClosure *sc, float r) +ccl_device_inline float bssrdf_pdf(const ShaderClosure *sc, float r) { if(sc->type == CLOSURE_BSSRDF_CUBIC_ID) return bssrdf_cubic_pdf(sc, r); diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h index 4d71ba50ec3..01e67c7c2fd 100644 --- a/intern/cycles/kernel/closure/volume.h +++ b/intern/cycles/kernel/closure/volume.h @@ -19,6 +19,12 @@ CCL_NAMESPACE_BEGIN +typedef ccl_addr_space struct HenyeyGreensteinVolume { + SHADER_CLOSURE_BASE; + + float g; +} HenyeyGreensteinVolume; + /* HENYEY-GREENSTEIN CLOSURE */ /* Given cosine between rays, return probability density that a photon bounces @@ -29,19 +35,28 @@ ccl_device float single_peaked_henyey_greenstein(float cos_theta, float g) return ((1.0f - g * g) / safe_powf(1.0f + g * g - 2.0f * g * cos_theta, 1.5f)) * (M_1_PI_F * 0.25f); }; -ccl_device int volume_henyey_greenstein_setup(ShaderClosure *sc) +ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume) { - sc->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID; + volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID; /* clamp anisotropy to avoid delta function */ - sc->data0 = signf(sc->data0) * min(fabsf(sc->data0), 1.0f - 1e-3f); + volume->g = signf(volume->g) * min(fabsf(volume->g), 1.0f - 1e-3f); return SD_SCATTER; } +ccl_device bool volume_henyey_greenstein_merge(const ShaderClosure *a, const ShaderClosure *b) +{ + const HenyeyGreensteinVolume *volume_a = (const HenyeyGreensteinVolume*)a; + const HenyeyGreensteinVolume *volume_b = (const HenyeyGreensteinVolume*)b; + + return (volume_a->g == volume_b->g); +} + ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, const float3 I, float3 omega_in, float *pdf) { - float g = sc->data0; + const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc; + float g = volume->g; /* note that I points towards the viewer */ if(fabsf(g) < 1e-3f) { @@ -58,7 +73,8 @@ ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, c ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { - float g = sc->data0; + const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc; + float g = volume->g; float cos_phi, sin_phi, cos_theta; /* match pdf for small g */ diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h index d2c7edb11ea..11548324e18 100644 --- a/intern/cycles/kernel/geom/geom.h +++ b/intern/cycles/kernel/geom/geom.h @@ -17,7 +17,9 @@ #include "geom_attribute.h" #include "geom_object.h" +#include "geom_patch.h" #include "geom_triangle.h" +#include "geom_subd_triangle.h" #include "geom_triangle_intersect.h" #include "geom_motion_triangle.h" #include "geom_motion_curve.h" diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h index c7364e9edac..8604d30ad34 100644 --- a/intern/cycles/kernel/geom/geom_attribute.h +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -25,47 +25,76 @@ CCL_NAMESPACE_BEGIN * Lookup of attributes is different between OSL and SVM, as OSL is ustring * based while for SVM we use integer ids. */ +ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *sd); + +ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd) +{ +#ifdef __HAIR__ + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + return ATTR_PRIM_CURVE; + } + else +#endif + if(subd_triangle_patch(kg, sd) != ~0) { + return ATTR_PRIM_SUBD; + } + else { + return ATTR_PRIM_TRIANGLE; + } +} + +ccl_device_inline AttributeDescriptor attribute_not_found() +{ + const AttributeDescriptor desc = {ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND}; + return desc; +} + /* Find attribute based on ID */ -ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem) +ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id) { - if(ccl_fetch(sd, object) == PRIM_NONE) - return (int)ATTR_STD_NOT_FOUND; + if(ccl_fetch(sd, object) == PRIM_NONE) { + return attribute_not_found(); + } /* for SVM, find attribute by unique id */ uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride; -#ifdef __HAIR__ - attr_offset = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; -#endif + attr_offset += attribute_primitive_type(kg, sd); uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); while(attr_map.x != id) { if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) { - return ATTR_STD_NOT_FOUND; + return attribute_not_found(); } attr_offset += ATTR_PRIM_TYPES; attr_map = kernel_tex_fetch(__attributes_map, attr_offset); } - *elem = (AttributeElement)attr_map.y; + AttributeDescriptor desc; + desc.element = (AttributeElement)attr_map.y; - if(ccl_fetch(sd, prim) == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH) - return ATTR_STD_NOT_FOUND; + if(ccl_fetch(sd, prim) == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH) { + return attribute_not_found(); + } /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + desc.type = (NodeAttributeType)(attr_map.w & 0xff); + desc.flags = (AttributeFlag)(attr_map.w >> 8); + + return desc; } /* Transform matrix attribute on meshes */ -ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, int offset) +ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { Transform tfm; - tfm.x = kernel_tex_fetch(__attributes_float3, offset + 0); - tfm.y = kernel_tex_fetch(__attributes_float3, offset + 1); - tfm.z = kernel_tex_fetch(__attributes_float3, offset + 2); - tfm.w = kernel_tex_fetch(__attributes_float3, offset + 3); + tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0); + tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1); + tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2); + tfm.w = kernel_tex_fetch(__attributes_float3, desc.offset + 3); return tfm; } diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 292e1bfca0e..aa9cd295452 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -24,23 +24,23 @@ CCL_NAMESPACE_BEGIN /* Reading attributes on various curve elements */ -ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(elem == ATTR_ELEMENT_CURVE) { + if(desc.element == ATTR_ELEMENT_CURVE) { #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; #endif - return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim)); + return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim)); } - else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { + else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; - float f0 = kernel_tex_fetch(__attributes_float, offset + k0); - float f1 = kernel_tex_fetch(__attributes_float, offset + k1); + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); @@ -59,9 +59,9 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, } } -ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { - if(elem == ATTR_ELEMENT_CURVE) { + if(desc.element == ATTR_ELEMENT_CURVE) { /* idea: we can't derive any useful differentials here, but for tiled * mipmap image caching it would be useful to avoid reading the highest * detail level always. maybe a derivative based on the hair density @@ -71,15 +71,15 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim))); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim))); } - else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { + else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1)); + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1)); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 2fb8e219884..dabba3fb1f0 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -387,6 +387,12 @@ ccl_device_inline void motion_triangle_intersect_subsurface( float t, u, v; if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) { + for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) { + if(ss_isect->hits[i].t == t) { + return; + } + } + ss_isect->num_hits++; int hit; diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index c0d15a95954..883c5dc100d 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -292,6 +292,18 @@ ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *nu *numverts = __float_as_int(f.w); } +/* Offset to an objects patch map */ + +ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object) +{ + if(object == OBJECT_NONE) + return 0; + + int offset = object*OBJECT_SIZE + 11; + float4 f = kernel_tex_fetch(__objects, offset); + return __float_as_uint(f.x); +} + /* Pass ID for shader */ ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h new file mode 100644 index 00000000000..6a0ff5a4a04 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_patch.h @@ -0,0 +1,343 @@ +/* + * Based on code from OpenSubdiv released under this license: + * + * Copyright 2013 Pixar + * + * Licensed under the Apache License, Version 2.0 (the "Apache License") + * with the following modification; you may not use this file except in + * compliance with the Apache License and the following modification to it: + * Section 6. Trademarks. is deleted and replaced with: + * + * 6. Trademarks. This License does not grant permission to use the trade + * names, trademarks, service marks, or product names of the Licensor + * and its affiliates, except as required to comply with Section 4(c) of + * the License and to reproduce the content of the NOTICE file. + * + * You may obtain a copy of the Apache License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Apache License with the above modification is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the Apache License for the specific + * language governing permissions and limitations under the Apache License. + * + */ + +CCL_NAMESPACE_BEGIN + +typedef struct PatchHandle { + int array_index, patch_index, vert_index; +} PatchHandle; + +ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *v) +{ + int quadrant = -1; + + if(*u < median) { + if(*v < median) { + quadrant = 0; + } + else { + quadrant = 1; + *v -= median; + } + } + else { + if(*v < median) { + quadrant = 3; + } + else { + quadrant = 2; + *v -= median; + } + *u -= median; + } + + return quadrant; +} + +/* retrieve PatchHandle from patch coords */ + +ccl_device_inline PatchHandle patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v) +{ + PatchHandle handle; + + kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f)); + + int node = (object_patch_map_offset(kg, object) + patch)/2; + float median = 0.5f; + + for(int depth = 0; depth < 0xff; depth++) { + float delta = median * 0.5f; + + int quadrant = patch_map_resolve_quadrant(median, &u, &v); + kernel_assert(quadrant >= 0); + + uint child = kernel_tex_fetch(__patches, node + quadrant); + + /* is the quadrant a hole? */ + if(!(child & PATCH_MAP_NODE_IS_SET)) { + handle.array_index = -1; + return handle; + } + + uint index = child & PATCH_MAP_NODE_INDEX_MASK; + + if(child & PATCH_MAP_NODE_IS_LEAF) { + handle.array_index = kernel_tex_fetch(__patches, index + 0); + handle.patch_index = kernel_tex_fetch(__patches, index + 1); + handle.vert_index = kernel_tex_fetch(__patches, index + 2); + + return handle; + } else { + node = index; + } + + median = delta; + } + + /* no leaf found */ + kernel_assert(0); + + handle.array_index = -1; + return handle; +} + +ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *deriv) +{ + /* The four uniform cubic B-Spline basis functions evaluated at t */ + float inv_6 = 1.0f / 6.0f; + + float t2 = t * t; + float t3 = t * t2; + + point[0] = inv_6 * (1.0f - 3.0f*(t - t2) - t3); + point[1] = inv_6 * (4.0f - 6.0f*t2 + 3.0f*t3); + point[2] = inv_6 * (1.0f + 3.0f*(t + t2 - t3)); + point[3] = inv_6 * t3; + + /* Derivatives of the above four basis functions at t */ + deriv[0] = -0.5f*t2 + t - 0.5f; + deriv[1] = 1.5f*t2 - 2.0f*t; + deriv[2] = -1.5f*t2 + t + 0.5f; + deriv[3] = 0.5f*t2; +} + +ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits, float *s, float *t) +{ + int boundary = ((bits >> 8) & 0xf); + + if(boundary & 1) { + t[2] -= t[0]; + t[1] += 2*t[0]; + t[0] = 0; + } + + if(boundary & 2) { + s[1] -= s[3]; + s[2] += 2*s[3]; + s[3] = 0; + } + + if(boundary & 4) { + t[1] -= t[3]; + t[2] += 2*t[3]; + t[3] = 0; + } + + if(boundary & 8) { + s[2] -= s[0]; + s[1] += 2*s[0]; + s[0] = 0; + } +} + +ccl_device_inline int patch_eval_depth(uint patch_bits) +{ + return (patch_bits & 0xf); +} + +ccl_device_inline float patch_eval_param_fraction(uint patch_bits) +{ + bool non_quad_root = (patch_bits >> 4) & 0x1; + int depth = patch_eval_depth(patch_bits); + + if(non_quad_root) { + return 1.0f / (float)(1 << (depth-1)); + } + else { + return 1.0f / (float)(1 << depth); + } +} + +ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, float *v) +{ + float frac = patch_eval_param_fraction(patch_bits); + + int iu = (patch_bits >> 22) & 0x3ff; + int iv = (patch_bits >> 12) & 0x3ff; + + /* top left corner */ + float pu = (float)iu*frac; + float pv = (float)iv*frac; + + /* normalize uv coordinates */ + *u = (*u - pu) / frac; + *v = (*v - pv) / frac; +} + +/* retrieve patch control indices */ + +ccl_device_inline int patch_eval_indices(KernelGlobals *kg, const PatchHandle *handle, int channel, + int indices[PATCH_MAX_CONTROL_VERTS]) +{ + int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index; + + /* XXX: regular patches only */ + for(int i = 0; i < 16; i++) { + indices[i] = kernel_tex_fetch(__patches, index_base + i); + } + + return 16; +} + +/* evaluate patch basis functions */ + +ccl_device_inline void patch_eval_basis(KernelGlobals *kg, const PatchHandle *handle, float u, float v, + float weights[PATCH_MAX_CONTROL_VERTS], + float weights_du[PATCH_MAX_CONTROL_VERTS], + float weights_dv[PATCH_MAX_CONTROL_VERTS]) +{ + uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */ + float d_scale = 1 << patch_eval_depth(patch_bits); + + bool non_quad_root = (patch_bits >> 4) & 0x1; + if(non_quad_root) { + d_scale *= 0.5f; + } + + patch_eval_normalize_coords(patch_bits, &u, &v); + + /* XXX: regular patches only for now. */ + + float s[4], t[4], ds[4], dt[4]; + + patch_eval_bspline_weights(u, s, ds); + patch_eval_bspline_weights(v, t, dt); + + patch_eval_adjust_boundary_weights(patch_bits, s, t); + patch_eval_adjust_boundary_weights(patch_bits, ds, dt); + + for(int k = 0; k < 4; k++) { + for(int l = 0; l < 4; l++) { + weights[4*k+l] = s[l] * t[k]; + weights_du[4*k+l] = ds[l] * t[k] * d_scale; + weights_dv[4*k+l] = s[l] * dt[k] * d_scale; + } + } +} + +/* generic function for evaluating indices and weights from patch coords */ + +ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg, int object, int patch, float u, float v, int channel, + int indices[PATCH_MAX_CONTROL_VERTS], + float weights[PATCH_MAX_CONTROL_VERTS], + float weights_du[PATCH_MAX_CONTROL_VERTS], + float weights_dv[PATCH_MAX_CONTROL_VERTS]) +{ + PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v); + kernel_assert(handle.array_index >= 0); + + int num_control = patch_eval_indices(kg, &handle, channel, indices); + patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv); + + return num_control; +} + +/* functions for evaluating attributes on patches */ + +ccl_device float patch_eval_float(KernelGlobals *kg, const ShaderData *sd, int offset, + int patch, float u, float v, int channel, + float *du, float* dv) +{ + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel, + indices, weights, weights_du, weights_dv); + + float val = 0.0f; + if(du) *du = 0.0f; + if(dv) *dv = 0.0f; + + for(int i = 0; i < num_control; i++) { + float v = kernel_tex_fetch(__attributes_float, offset + indices[i]); + + val += v * weights[i]; + if(du) *du += v * weights_du[i]; + if(dv) *dv += v * weights_dv[i]; + } + + return val; +} + +ccl_device float3 patch_eval_float3(KernelGlobals *kg, const ShaderData *sd, int offset, + int patch, float u, float v, int channel, + float3 *du, float3 *dv) +{ + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel, + indices, weights, weights_du, weights_dv); + + float3 val = make_float3(0.0f, 0.0f, 0.0f); + if(du) *du = make_float3(0.0f, 0.0f, 0.0f); + if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f); + + for(int i = 0; i < num_control; i++) { + float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i])); + + val += v * weights[i]; + if(du) *du += v * weights_du[i]; + if(dv) *dv += v * weights_dv[i]; + } + + return val; +} + +ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, const ShaderData *sd, int offset, + int patch, float u, float v, int channel, + float3 *du, float3 *dv) +{ + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel, + indices, weights, weights_du, weights_dv); + + float3 val = make_float3(0.0f, 0.0f, 0.0f); + if(du) *du = make_float3(0.0f, 0.0f, 0.0f); + if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f); + + for(int i = 0; i < num_control; i++) { + float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i])); + + val += v * weights[i]; + if(du) *du += v * weights_du[i]; + if(dv) *dv += v * weights_dv[i]; + } + + return val; +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index b1b1e919e00..4384c2093e9 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -23,19 +23,25 @@ CCL_NAMESPACE_BEGIN /* Generic primitive attribute reading functions */ -ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float *dx, float *dy) { if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { - return triangle_attribute_float(kg, sd, elem, offset, dx, dy); + if(subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float(kg, sd, desc, dx, dy); } #ifdef __HAIR__ else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float(kg, sd, elem, offset, dx, dy); + return curve_attribute_float(kg, sd, desc, dx, dy); } #endif #ifdef __VOLUME__ - else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float(kg, sd, elem, offset, dx, dy); + else if(ccl_fetch(sd, object) != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float(kg, sd, desc, dx, dy); } #endif else { @@ -45,19 +51,25 @@ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData * } } -ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float3 *dx, float3 *dy) { if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { - return triangle_attribute_float3(kg, sd, elem, offset, dx, dy); + if(subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float3(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); } #ifdef __HAIR__ else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float3(kg, sd, elem, offset, dx, dy); + return curve_attribute_float3(kg, sd, desc, dx, dy); } #endif #ifdef __VOLUME__ - else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float3(kg, sd, elem, offset, dx, dy); + else if(ccl_fetch(sd, object) != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float3(kg, sd, desc, dx, dy); } #endif else { @@ -69,15 +81,14 @@ ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData /* Default UV coordinate */ -ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) +ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) { - AttributeElement elem_uv; - int offset_uv = find_attribute(kg, sd, ATTR_STD_UV, &elem_uv); + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV); - if(offset_uv == ATTR_STD_NOT_FOUND) + if(desc.offset == ATTR_STD_NOT_FOUND) return make_float3(0.0f, 0.0f, 0.0f); - float3 uv = primitive_attribute_float3(kg, sd, elem_uv, offset_uv, NULL, NULL); + float3 uv = primitive_attribute_float3(kg, sd, desc, NULL, NULL); uv.z = 1.0f; return uv; } @@ -87,15 +98,14 @@ ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id) { /* storing ptex data as attributes is not memory efficient but simple for tests */ - AttributeElement elem_face_id, elem_uv; - int offset_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID, &elem_face_id); - int offset_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV, &elem_uv); + const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID); + const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV); - if(offset_face_id == ATTR_STD_NOT_FOUND || offset_uv == ATTR_STD_NOT_FOUND) + if(desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND) return false; - float3 uv3 = primitive_attribute_float3(kg, sd, elem_uv, offset_uv, NULL, NULL); - float face_id_f = primitive_attribute_float(kg, sd, elem_face_id, offset_face_id, NULL, NULL); + float3 uv3 = primitive_attribute_float3(kg, sd, desc_uv, NULL, NULL); + float face_id_f = primitive_attribute_float(kg, sd, desc_face_id, NULL, NULL); *uv = make_float2(uv3.x, uv3.y); *face_id = (int)face_id_f; @@ -117,11 +127,10 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) #endif /* try to create spherical tangent from generated coordinates */ - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED, &attr_elem); + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED); - if(attr_offset != ATTR_STD_NOT_FOUND) { - float3 data = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); + if(desc.offset != ATTR_STD_NOT_FOUND) { + float3 data = primitive_attribute_float3(kg, sd, desc, NULL, NULL); data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); object_normal_transform(kg, sd, &data); return cross(ccl_fetch(sd, N), normalize(cross(data, ccl_fetch(sd, N)))); @@ -138,7 +147,7 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) /* Motion vector for motion pass */ -ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) +ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) { /* center position */ float3 center; @@ -158,19 +167,18 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) float3 motion_pre = center, motion_post = center; /* deformation motion */ - AttributeElement elem; - int offset = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION); - if(offset != ATTR_STD_NOT_FOUND) { + if(desc.offset != ATTR_STD_NOT_FOUND) { /* get motion info */ int numverts, numkeys; object_motion_info(kg, ccl_fetch(sd, object), NULL, &numverts, &numkeys); /* lookup attributes */ - int offset_next = (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys; + motion_pre = primitive_attribute_float3(kg, sd, desc, NULL, NULL); - motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); - motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL); + desc.offset += (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys; + motion_post = primitive_attribute_float3(kg, sd, desc, NULL, NULL); #ifdef __HAIR__ if(is_curve_primitive && (ccl_fetch(sd, flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h new file mode 100644 index 00000000000..fccacf435f9 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_subd_triangle.h @@ -0,0 +1,349 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Functions for retrieving attributes on triangles produced from subdivision meshes */ + +CCL_NAMESPACE_BEGIN + +/* Patch index for triangle, -1 if not subdivision triangle */ + +ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *sd) +{ + return (ccl_fetch(sd, prim) != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, ccl_fetch(sd, prim)) : ~0; +} + +/* UV coords of triangle within patch */ + +ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg, const ShaderData *sd, float2 uv[3]) +{ + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + + uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x); + uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y); + uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z); +} + +/* Vertex indices of patch */ + +ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals *kg, int patch) +{ + uint4 indices; + + indices.x = kernel_tex_fetch(__patches, patch+0); + indices.y = kernel_tex_fetch(__patches, patch+1); + indices.z = kernel_tex_fetch(__patches, patch+2); + indices.w = kernel_tex_fetch(__patches, patch+3); + + return indices; +} + +/* Originating face for patch */ + +ccl_device_inline uint subd_triangle_patch_face(KernelGlobals *kg, int patch) +{ + return kernel_tex_fetch(__patches, patch+4); +} + +/* Number of corners on originating face */ + +ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals *kg, int patch) +{ + return kernel_tex_fetch(__patches, patch+5) & 0xffff; +} + +/* Indices of the four corners that are used by the patch */ + +ccl_device_inline void subd_triangle_patch_corners(KernelGlobals *kg, int patch, int corners[4]) +{ + uint4 data; + + data.x = kernel_tex_fetch(__patches, patch+4); + data.y = kernel_tex_fetch(__patches, patch+5); + data.z = kernel_tex_fetch(__patches, patch+6); + data.w = kernel_tex_fetch(__patches, patch+7); + + int num_corners = data.y & 0xffff; + + if(num_corners == 4) { + /* quad */ + corners[0] = data.z; + corners[1] = data.z+1; + corners[2] = data.z+2; + corners[3] = data.z+3; + } + else { + /* ngon */ + int c = data.y >> 16; + + corners[0] = data.z + c; + corners[1] = data.z + mod(c+1, num_corners); + corners[2] = data.w; + corners[3] = data.z + mod(c-1, num_corners); + } +} + +/* Reading attributes on various subdivision triangle elements */ + +ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) +{ + int patch = subd_triangle_patch(kg, sd); + + if(desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2]; + + float a, dads, dadt; + a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if(dx) { + float dudx = ccl_fetch(sd, du).dx; + float dvdx = ccl_fetch(sd, dv).dx; + + float dsdx = dsdu*dudx + dsdv*dvdx; + float dtdx = dtdu*dudx + dtdv*dvdx; + + *dx = dads*dsdx + dadt*dtdx; + } + if(dy) { + float dudy = ccl_fetch(sd, du).dy; + float dvdy = ccl_fetch(sd, dv).dy; + + float dsdy = dsdu*dudy + dsdv*dvdy; + float dtdy = dtdu*dudy + dtdv*dvdy; + + *dy = dads*dsdy + dadt*dtdy; + } + } +#endif + + return a; + } + else if(desc.element == ATTR_ELEMENT_FACE) { + if(dx) *dx = 0.0f; + if(dy) *dy = 0.0f; + + return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch)); + } + else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + uint4 v = subd_triangle_patch_indices(kg, patch); + + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y); + float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z); + float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w); + + if(subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1+f0)*0.5f; + f3 = (f3+f0)*0.5f; + } + + float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; + if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c; +#endif + + return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; + } + else if(desc.element == ATTR_ELEMENT_CORNER) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); + + float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset); + float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset); + float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset); + float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset); + + if(subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1+f0)*0.5f; + f3 = (f3+f0)*0.5f; + } + + float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; + if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c; +#endif + + return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; + } + else { + if(dx) *dx = 0.0f; + if(dy) *dy = 0.0f; + + return 0.0f; + } +} + +ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) +{ + int patch = subd_triangle_patch(kg, sd); + + if(desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2]; + + float3 a, dads, dadt; + + if(desc.element == ATTR_ELEMENT_CORNER_BYTE) { + a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + } + else { + a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + } + +#ifdef __RAY_DIFFERENTIALS__ + if(dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if(dx) { + float dudx = ccl_fetch(sd, du).dx; + float dvdx = ccl_fetch(sd, dv).dx; + + float dsdx = dsdu*dudx + dsdv*dvdx; + float dtdx = dtdu*dudx + dtdv*dvdx; + + *dx = dads*dsdx + dadt*dtdx; + } + if(dy) { + float dudy = ccl_fetch(sd, du).dy; + float dvdy = ccl_fetch(sd, dv).dy; + + float dsdy = dsdu*dudy + dsdv*dvdy; + float dtdy = dtdu*dudy + dtdv*dvdy; + + *dy = dads*dsdy + dadt*dtdy; + } + } +#endif + + return a; + } + else if(desc.element == ATTR_ELEMENT_FACE) { + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch))); + } + else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + uint4 v = subd_triangle_patch_indices(kg, patch); + + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y)); + float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z)); + float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w)); + + if(subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1+f0)*0.5f; + f3 = (f3+f0)*0.5f; + } + + float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; + if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c; +#endif + + return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; + } + else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); + + float3 f0, f1, f2, f3; + + if(desc.element == ATTR_ELEMENT_CORNER) { + f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset)); + f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset)); + f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset)); + f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset)); + } + else { + f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset)); + f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset)); + f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset)); + f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset)); + } + + if(subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1+f0)*0.5f; + f3 = (f3+f0)*0.5f; + } + + float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; + if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c; +#endif + + return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; + } + else { + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + + return make_float3(0.0f, 0.0f, 0.0f); + } +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 0c2351e1d1b..d3289d6572c 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -105,20 +105,20 @@ ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_spa /* Reading attributes on various triangle elements */ -ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(elem == ATTR_ELEMENT_FACE) { + if(desc.element == ATTR_ELEMENT_FACE) { if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim)); + return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim)); } - else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { + else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); - float f0 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.x); - float f1 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.y); - float f2 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.z); + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y); + float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; @@ -127,8 +127,8 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } - else if(elem == ATTR_ELEMENT_CORNER) { - int tri = offset + ccl_fetch(sd, prim)*3; + else if(desc.element == ATTR_ELEMENT_CORNER) { + int tri = desc.offset + ccl_fetch(sd, prim)*3; float f0 = kernel_tex_fetch(__attributes_float, tri + 0); float f1 = kernel_tex_fetch(__attributes_float, tri + 1); float f2 = kernel_tex_fetch(__attributes_float, tri + 2); @@ -148,20 +148,20 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s } } -ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { - if(elem == ATTR_ELEMENT_FACE) { + if(desc.element == ATTR_ELEMENT_FACE) { if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim))); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim))); } - else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { + else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y)); + float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z)); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; @@ -170,11 +170,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } - else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) { - int tri = offset + ccl_fetch(sd, prim)*3; + else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { + int tri = desc.offset + ccl_fetch(sd, prim)*3; float3 f0, f1, f2; - if(elem == ATTR_ELEMENT_CORNER) { + if(desc.element == ATTR_ELEMENT_CORNER) { f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index caa6c9d9a5b..dd5328220ab 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -342,9 +342,16 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); float3 qvec = cross(tvec, edge1); float3 pvec = cross(D, edge2); - float rt = dot(edge2, qvec) / dot(edge1, pvec); - - P = P + D*rt; + float det = dot(edge1, pvec); + if(det != 0.0f) { + /* If determinant is zero it means ray lies in the plane of + * the triangle. It is possible in theory due to watertight + * nature of triangle intersection. For such cases we simply + * don't refine intersection hoping it'll go all fine. + */ + float rt = dot(edge2, qvec) / det; + P = P + D*rt; + } if(isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ @@ -400,9 +407,16 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); float3 qvec = cross(tvec, edge1); float3 pvec = cross(D, edge2); - float rt = dot(edge2, qvec) / dot(edge1, pvec); - - P = P + D*rt; + float det = dot(edge1, pvec); + if(det != 0.0f) { + /* If determinant is zero it means ray lies in the plane of + * the triangle. It is possible in theory due to watertight + * nature of triangle intersection. For such cases we simply + * don't refine intersection hoping it'll go all fine. + */ + float rt = dot(edge2, qvec) / det; + P = P + D*rt; + } #endif /* __INTERSECTION_REFINE__ */ if(isect->object != OBJECT_NONE) { diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 2044aafc877..efe540a8518 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -44,40 +44,41 @@ ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z) } #endif /* __KERNEL_GPU__ */ -ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P) +ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, + const ShaderData *sd, + float3 P) { /* todo: optimize this so it's just a single matrix multiplication when * possible (not motion blur), or perhaps even just translation + scale */ - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem); + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM); object_inverse_position_transform(kg, sd, &P); - if(attr_offset != ATTR_STD_NOT_FOUND) { - Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset); + if(desc.offset != ATTR_STD_NOT_FOUND) { + Transform tfm = primitive_attribute_matrix(kg, sd, desc); P = transform_point(&tfm, P); } return P; } -ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float *dx, float *dy) +ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { float3 P = volume_normalized_position(kg, sd, sd->P); #ifdef __KERNEL_GPU__ # if __CUDA_ARCH__ >= 300 - CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); + CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset); float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z); float4 r = make_float4(f, f, f, 1.0); # else - float4 r = volume_image_texture_3d(id, P.x, P.y, P.z); + float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z); # endif #else float4 r; if(sd->flag & SD_VOLUME_CUBIC) - r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC); + r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC); else - r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z); #endif if(dx) *dx = 0.0f; @@ -86,22 +87,22 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, return average(float4_to_float3(r)); } -ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float3 *dx, float3 *dy) +ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { float3 P = volume_normalized_position(kg, sd, sd->P); #ifdef __KERNEL_GPU__ # if __CUDA_ARCH__ >= 300 - CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); + CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset); float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z); # else - float4 r = volume_image_texture_3d(id, P.x, P.y, P.z); + float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z); # endif #else float4 r; if(sd->flag & SD_VOLUME_CUBIC) - r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC); + r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC); else - r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z); #endif if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 9ee0b09529e..bfbf73df54f 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -18,8 +18,12 @@ CCL_NAMESPACE_BEGIN #ifdef __BAKING__ -ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng, - int pass_filter, int sample) +ccl_device_inline void compute_light_pass(KernelGlobals *kg, + ShaderData *sd, + PathRadiance *L, + RNG rng, + int pass_filter, + int sample) { /* initialize master radiance accumulator */ kernel_assert(kernel_data.film.use_light_pass); diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index f6c103d59dd..88514de514c 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -211,7 +211,10 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl /* Panorama Camera */ -ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray) +ccl_device_inline void camera_sample_panorama(KernelGlobals *kg, + float raster_x, float raster_y, + float lens_u, float lens_v, + ccl_addr_space Ray *ray) { Transform rastertocamera = kernel_data.cam.rastertocamera; float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f)); @@ -303,8 +306,12 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float /* Common */ -ccl_device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, float filter_v, - float lens_u, float lens_v, float time, ccl_addr_space Ray *ray) +ccl_device_inline void camera_sample(KernelGlobals *kg, + int x, int y, + float filter_u, float filter_v, + float lens_u, float lens_v, + float time, + ccl_addr_space Ray *ray) { /* pixel filter */ int filter_table_offset = kernel_data.film.filter_table_offset; diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 08f6f457805..063220b542e 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -36,13 +36,18 @@ /* Qualifier wrappers for different names on different devices */ #define ccl_device __device__ __inline__ -#define ccl_device_inline __device__ __inline__ +#if (__KERNEL_CUDA_VERSION__ == 80) && (__CUDA_ARCH__ < 500) +# define ccl_device_inline __device__ __forceinline__ +#else +# define ccl_device_inline __device__ __inline__ +#endif #define ccl_device_noinline __device__ __noinline__ #define ccl_global #define ccl_constant #define ccl_may_alias #define ccl_addr_space #define ccl_restrict __restrict__ +#define ccl_align(n) __align__(n) /* No assert supported for CUDA */ diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 8505cb85576..2ae89dde7c4 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -40,6 +40,7 @@ #define ccl_local __local #define ccl_private __private #define ccl_restrict restrict +#define ccl_align(n) __attribute__((aligned(n))) #ifdef __SPLIT_KERNEL__ # define ccl_addr_space __global diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 93c4bd3f7d5..1e829eaa1fa 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -44,11 +44,11 @@ typedef struct LightSample { * * Note: light_p is modified when sample_coord is true. */ -ccl_device float area_light_sample(float3 P, - float3 *light_p, - float3 axisu, float3 axisv, - float randu, float randv, - bool sample_coord) +ccl_device_inline float area_light_sample(float3 P, + float3 *light_p, + float3 axisu, float3 axisv, + float randu, float randv, + bool sample_coord) { /* In our name system we're using P for the center, * which is o in the paper. @@ -268,11 +268,11 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals return false; } -ccl_device float background_portal_pdf(KernelGlobals *kg, - float3 P, - float3 direction, - int ignore_portal, - bool *is_possible) +ccl_device_inline float background_portal_pdf(KernelGlobals *kg, + float3 P, + float3 direction, + int ignore_portal, + bool *is_possible) { float portal_pdf = 0.0f; @@ -367,7 +367,10 @@ ccl_device float3 background_portal_sample(KernelGlobals *kg, return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf) +ccl_device_inline float3 background_light_sample(KernelGlobals *kg, + float3 P, + float randu, float randv, + float *pdf) { /* Probability of sampling portals instead of the map. */ float portal_sampling_pdf = kernel_data.integrator.portal_pdf; @@ -507,8 +510,11 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 return t*t/cos_pi; } -ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp, - float randu, float randv, float3 P, LightSample *ls) +ccl_device_inline void lamp_light_sample(KernelGlobals *kg, + int lamp, + float randu, float randv, + float3 P, + LightSample *ls) { float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0); float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1); diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index d5b31037723..903be4f09a0 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -435,8 +435,12 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, } #ifdef __SUBSURFACE__ - -ccl_device bool kernel_path_subsurface_scatter( +# ifndef __KERNEL_CUDA__ +ccl_device +# else +ccl_device_inline +# endif +bool kernel_path_subsurface_scatter( KernelGlobals *kg, ShaderData *sd, ShaderData *emission_sd, diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index 56516967d8f..64f1468eacf 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN #ifdef __BRANCHED_PATH__ -ccl_device void kernel_branched_path_ao(KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - PathRadiance *L, - PathState *state, - RNG *rng, - float3 throughput) +ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + PathRadiance *L, + PathState *state, + RNG *rng, + float3 throughput) { int num_samples = kernel_data.integrator.ao_samples; float num_samples_inv = 1.0f/num_samples; diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 74b1ae0ca32..250b8e92a45 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -222,8 +222,13 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_ #endif /* path tracing: bounce off or through surface to with new direction stored in ray */ -ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, ccl_addr_space RNG *rng, - ShaderData *sd, ccl_addr_space float3 *throughput, ccl_addr_space PathState *state, PathRadiance *L, ccl_addr_space Ray *ray) +ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg, + ccl_addr_space RNG *rng, + ShaderData *sd, + ccl_addr_space float3 *throughput, + ccl_addr_space PathState *state, + PathRadiance *L, + ccl_addr_space Ray *ray) { /* no BSDF? we can stop here */ if(ccl_fetch(sd, flag) & SD_BSDF) { diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index e45522a4641..5fd4f2fad4c 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -18,8 +18,14 @@ CCL_NAMESPACE_BEGIN #ifdef __VOLUME_SCATTER__ -ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, - ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L) +ccl_device_inline void kernel_path_volume_connect_light( + KernelGlobals *kg, + RNG *rng, + ShaderData *sd, + ShaderData *emission_sd, + float3 throughput, + PathState *state, + PathRadiance *L) { #ifdef __EMISSION__ if(!kernel_data.integrator.use_direct_light) diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h index 8be6742699a..3437d83ed7d 100644 --- a/intern/cycles/kernel/kernel_projection.h +++ b/intern/cycles/kernel/kernel_projection.h @@ -130,7 +130,10 @@ ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float w return make_float2(u, v); } -ccl_device float3 fisheye_equisolid_to_direction(float u, float v, float lens, float fov, float width, float height) +ccl_device_inline float3 fisheye_equisolid_to_direction(float u, float v, + float lens, + float fov, + float width, float height) { u = (u - 0.5f) * width; v = (v - 0.5f) * height; @@ -189,7 +192,7 @@ ccl_device float2 direction_to_mirrorball(float3 dir) return make_float2(u, v); } -ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v) +ccl_device_inline float3 panorama_to_direction(KernelGlobals *kg, float u, float v) { switch(kernel_data.cam.panorama_type) { case PANORAMA_EQUIRECTANGULAR: @@ -205,7 +208,7 @@ ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v) } } -ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir) +ccl_device_inline float2 direction_to_panorama(KernelGlobals *kg, float3 dir) { switch(kernel_data.cam.panorama_type) { case PANORAMA_EQUIRECTANGULAR: @@ -221,9 +224,9 @@ ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir) } } -ccl_device float3 spherical_stereo_position(KernelGlobals *kg, - float3 dir, - float3 pos) +ccl_device_inline float3 spherical_stereo_position(KernelGlobals *kg, + float3 dir, + float3 pos) { float interocular_offset = kernel_data.cam.interocular_offset; diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 765baa2a5ba..079bea30bdd 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -24,6 +24,7 @@ * */ +#include "closure/alloc.h" #include "closure/bsdf_util.h" #include "closure/bsdf.h" #include "closure/emissive.h" @@ -148,8 +149,16 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, /* ShaderData setup from BSSRDF scatter */ #ifdef __SUBSURFACE__ -ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd, - const Intersection *isect, const Ray *ray) +# ifndef __KERNEL_CUDA__ +ccl_device +# else +ccl_device_inline +# endif +void shader_setup_from_subsurface( + KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray) { bool backfacing = sd->flag & SD_BACKFACING; @@ -225,14 +234,14 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat /* ShaderData setup from position sampled on mesh */ -ccl_device void shader_setup_from_sample(KernelGlobals *kg, - ShaderData *sd, - const float3 P, - const float3 Ng, - const float3 I, - int shader, int object, int prim, - float u, float v, float t, - float time) +ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, + ShaderData *sd, + const float3 P, + const float3 Ng, + const float3 I, + int shader, int object, int prim, + float u, float v, float t, + float time) { /* vectors */ ccl_fetch(sd, P) = P; @@ -444,7 +453,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s /* Merging */ #if defined(__BRANCHED_PATH__) || defined(__VOLUME__) -ccl_device void shader_merge_closures(ShaderData *sd) +ccl_device_inline void shader_merge_closures(ShaderData *sd) { /* merge identical closures, better when we sample a single closure at a time */ for(int i = 0; i < sd->num_closure; i++) { @@ -453,22 +462,9 @@ ccl_device void shader_merge_closures(ShaderData *sd) for(int j = i + 1; j < sd->num_closure; j++) { ShaderClosure *scj = &sd->closure[j]; -#ifdef __OSL__ - if(sci->prim || scj->prim) + if(sci->type != scj->type) continue; -#endif - - if(!(sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1 && sci->data2 == scj->data2)) - continue; - - if(CLOSURE_IS_BSDF_OR_BSSRDF(sci->type)) { - if(sci->N != scj->N) - continue; - else if(CLOSURE_IS_BSDF_ANISOTROPIC(sci->type) && sci->T != scj->T) - continue; - } - - if((sd->flag & SD_BSDF_HAS_CUSTOM) && !(sci->custom1 == scj->custom1 && sci->custom2 == scj->custom2 && sci->custom3 == scj->custom3)) + if(!bsdf_merge(sci, scj)) continue; sci->weight += scj->weight; @@ -542,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg, } #endif -ccl_device void shader_bsdf_eval(KernelGlobals *kg, - ShaderData *sd, - const float3 omega_in, - BsdfEval *eval, - float light_pdf, - bool use_mis) + +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_inline +#endif +void shader_bsdf_eval(KernelGlobals *kg, + ShaderData *sd, + const float3 omega_in, + BsdfEval *eval, + float light_pdf, + bool use_mis) { bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); @@ -566,9 +568,13 @@ ccl_device void shader_bsdf_eval(KernelGlobals *kg, } } -ccl_device int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd, - float randu, float randv, BsdfEval *bsdf_eval, - float3 *omega_in, differential3 *domega_in, float *pdf) +ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg, + ShaderData *sd, + float randu, float randv, + BsdfEval *bsdf_eval, + float3 *omega_in, + differential3 *domega_in, + float *pdf) { int sampled = 0; @@ -741,8 +747,9 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { + const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; eval += sc->weight*ao_factor; - N += sc->N*average(sc->weight); + N += bsdf->N*average(sc->weight); } else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) { eval += sc->weight; @@ -759,6 +766,7 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac return eval; } +#ifdef __SUBSURFACE__ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); @@ -769,11 +777,12 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BSSRDF(sc->type)) { + const Bssrdf *bssrdf = (const Bssrdf*)sc; float avg_weight = fabsf(average(sc->weight)); - N += sc->N*avg_weight; + N += bssrdf->N*avg_weight; eval += sc->weight; - texture_blur += sc->data1*avg_weight; + texture_blur += bssrdf->texture_blur*avg_weight; weight_sum += avg_weight; } } @@ -786,6 +795,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b return eval; } +#endif /* Emission */ @@ -831,6 +841,7 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_ ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx) { ccl_fetch(sd, num_closure) = 0; + ccl_fetch(sd, num_closure_extra) = 0; ccl_fetch(sd, randb_closure) = randb; #ifdef __OSL__ @@ -861,33 +872,33 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, int path_flag, ShaderContext ctx) { ccl_fetch(sd, num_closure) = 0; + ccl_fetch(sd, num_closure_extra) = 0; ccl_fetch(sd, randb_closure) = 0.0f; +#ifdef __SVM__ #ifdef __OSL__ if(kg->osl) { - return OSLShader::eval_background(kg, sd, state, path_flag, ctx); + OSLShader::eval_background(kg, sd, state, path_flag, ctx); } else #endif - { -#ifdef __SVM__ svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag); + } - float3 eval = make_float3(0.0f, 0.0f, 0.0f); + float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); - if(CLOSURE_IS_BACKGROUND(sc->type)) - eval += sc->weight; - } + if(CLOSURE_IS_BACKGROUND(sc->type)) + eval += sc->weight; + } - return eval; + return eval; #else - return make_float3(0.8f, 0.8f, 0.8f); + return make_float3(0.8f, 0.8f, 0.8f); #endif - } } /* Volume */ @@ -998,12 +1009,17 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData * /* Volume Evaluation */ -ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, - PathState *state, VolumeStack *stack, int path_flag, ShaderContext ctx) +ccl_device_inline void shader_eval_volume(KernelGlobals *kg, + ShaderData *sd, + PathState *state, + VolumeStack *stack, + int path_flag, + ShaderContext ctx) { /* reset closures once at the start, we will be accumulating the closures * for all volumes in the stack into a single array of closures */ sd->num_closure = 0; + sd->num_closure_extra = 0; sd->flag = 0; for(int i = 0; stack[i].shader != SHADER_NONE; i++) { @@ -1051,6 +1067,7 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx) { ccl_fetch(sd, num_closure) = 0; + ccl_fetch(sd, num_closure_extra) = 0; ccl_fetch(sd, randb_closure) = 0.0f; /* this will modify sd->P */ diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index d1576754d2e..db2fc84834a 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -75,12 +75,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, } uint num_hits; - if(max_hits == 0) { - blocked = true; - num_hits = 0; - } else { - blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits); - } + blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits); /* if no opaque surface found but we did find transparent hits, shade them */ if(!blocked && num_hits > 0) { diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index b048bd38fc9..ba45eea6388 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -85,7 +85,16 @@ ccl_device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Sha return NULL; } -ccl_device float3 subsurface_scatter_eval(ShaderData *sd, ShaderClosure *sc, float disk_r, float r, bool all) +#ifndef __KERNEL_GPU__ +ccl_device_noinline +#else +ccl_device_inline +#endif +float3 subsurface_scatter_eval(ShaderData *sd, + ShaderClosure *sc, + float disk_r, + float r, + bool all) { #ifdef BSSRDF_MULTI_EVAL /* this is the veach one-sample model with balance heuristic, some pdf @@ -140,24 +149,21 @@ ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 wei { sd->flag &= ~SD_CLOSURE_FLAGS; sd->randb_closure = 0.0f; + sd->num_closure = 0; + sd->num_closure_extra = 0; if(hit) { - ShaderClosure *sc = &sd->closure[0]; - sd->num_closure = 1; - - sc->weight = weight; - sc->sample_weight = 1.0f; - sc->data0 = 0.0f; - sc->data1 = 0.0f; - sc->N = N; - sd->flag |= bsdf_diffuse_setup(sc); - - /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes - * can recognize it as not being a regular diffuse closure */ - sc->type = CLOSURE_BSDF_BSSRDF_ID; + DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight); + + if(bsdf) { + bsdf->N = N; + sd->flag |= bsdf_diffuse_setup(bsdf); + + /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes + * can recognize it as not being a regular diffuse closure */ + bsdf->type = CLOSURE_BSDF_BSSRDF_ID; + } } - else - sd->num_closure = 0; } /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */ @@ -217,7 +223,12 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, /* Subsurface scattering step, from a point on the surface to other * nearby points on the same object. */ -ccl_device int subsurface_scatter_multi_intersect( +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_inline +#endif +int subsurface_scatter_multi_intersect( KernelGlobals *kg, SubsurfaceIntersection* ss_isect, ShaderData *sd, diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 5ba262c1044..7d6fec02331 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -41,11 +41,16 @@ KERNEL_TEX(float4, texture_float4, __objects_vector) KERNEL_TEX(uint, texture_uint, __tri_shader) KERNEL_TEX(float4, texture_float4, __tri_vnormal) KERNEL_TEX(uint4, texture_uint4, __tri_vindex) +KERNEL_TEX(uint, texture_uint, __tri_patch) +KERNEL_TEX(float2, texture_float2, __tri_patch_uv) /* curves */ KERNEL_TEX(float4, texture_float4, __curves) KERNEL_TEX(float4, texture_float4, __curve_keys) +/* patches */ +KERNEL_TEX(uint, texture_uint, __patches) + /* attributes */ KERNEL_TEX(uint4, texture_uint4, __attributes_map) KERNEL_TEX(float, texture_float, __attributes_float) @@ -173,9 +178,6 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_086) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_087) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_088) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_089) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_090) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_091) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_092) # else /* bindless textures */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index a9be2ae717a..f3b10c21b9d 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN /* constants */ -#define OBJECT_SIZE 11 +#define OBJECT_SIZE 12 #define OBJECT_VECTOR_SIZE 6 #define LIGHT_SIZE 5 #define FILTER_TABLE_SIZE 1024 @@ -573,8 +573,13 @@ typedef enum PrimitiveType { /* Attributes */ -#define ATTR_PRIM_TYPES 2 -#define ATTR_PRIM_CURVE 1 +typedef enum AttributePrimitive { + ATTR_PRIM_TRIANGLE = 0, + ATTR_PRIM_CURVE, + ATTR_PRIM_SUBD, + + ATTR_PRIM_TYPES +} AttributePrimitive; typedef enum AttributeElement { ATTR_ELEMENT_NONE, @@ -619,6 +624,18 @@ typedef enum AttributeStandard { ATTR_STD_NOT_FOUND = ~0 } AttributeStandard; +typedef enum AttributeFlag { + ATTR_FINAL_SIZE = (1 << 0), + ATTR_SUBDIVIDED = (1 << 1), +} AttributeFlag; + +typedef struct AttributeDescriptor { + AttributeElement element; + NodeAttributeType type; + uint flags; /* see enum AttributeFlag */ + int offset; +} AttributeDescriptor; + /* Closure data */ #ifdef __MULTI_CLOSURE__ @@ -631,33 +648,26 @@ typedef enum AttributeStandard { # define MAX_CLOSURE 1 #endif -/* This struct is to be 16 bytes aligned, we also keep some extra precautions: - * - All the float3 members are in the beginning of the struct, so compiler - * does not put own padding trying to align this members. - * - We make sure OSL pointer is also 16 bytes aligned. - */ -typedef ccl_addr_space struct ShaderClosure { - float3 weight; - float3 N; - float3 T; - - ClosureType type; - float sample_weight; - float data0; - float data1; - float data2; - - /* Following fields could be used to store pre-calculated - * values by various BSDF closures for more effective sampling - * and evaluation. - */ - float custom1; - float custom2; - float custom3; +/* This struct is the base class for all closures. The common members are + * duplicated in all derived classes since we don't have C++ in the kernel + * yet, and because it lets us lay out the members to minimize padding. The + * weight member is located at the beginning of the struct for this reason. + * + * ShaderClosure has a fixed size, and any extra space must be allocated + * with closure_alloc_extra(). + * + * We pad the struct to 80 bytes and ensure it is aligned to 16 bytes, which + * we assume to be the maximum required alignment for any struct. */ -#ifdef __OSL__ - void *prim, *pad4; -#endif +#define SHADER_CLOSURE_BASE \ + float3 weight; \ + ClosureType type; \ + float sample_weight \ + +typedef ccl_addr_space struct ccl_align(16) ShaderClosure { + SHADER_CLOSURE_BASE; + + float data[14]; /* pad to 80 bytes */ } ShaderClosure; /* Shader Context @@ -692,11 +702,10 @@ enum ShaderDataFlag { SD_AO = (1 << 8), /* have ao closure? */ SD_TRANSPARENT = (1 << 9), /* have transparent closure? */ SD_BSDF_NEEDS_LCG = (1 << 10), - SD_BSDF_HAS_CUSTOM = (1 << 11), /* are the custom variables relevant? */ SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF| SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO| - SD_BSDF_NEEDS_LCG|SD_BSDF_HAS_CUSTOM), + SD_BSDF_NEEDS_LCG), /* shader flags */ SD_USE_MIS = (1 << 12), /* direct light sample */ @@ -729,94 +738,97 @@ enum ShaderDataFlag { SD_OBJECT_INTERSECTS_VOLUME) }; -struct KernelGlobals; - #ifdef __SPLIT_KERNEL__ # define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0)) # if defined(__SPLIT_KERNEL_AOS__) /* ShaderData is stored as an Array-of-Structures */ -# define ccl_fetch(s, t) (s[SD_THREAD].t) -# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].t[index]) +# define ccl_soa_member(type, name) type soa_##name +# define ccl_fetch(s, t) (s[SD_THREAD].soa_##t) +# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].soa_##t[index]) # else /* ShaderData is stored as an Structure-of-Arrays */ # define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1)) # define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t) # define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0) -# define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(t) + SD_FIELD_SIZE(t) * SD_THREAD - SD_OFFSETOF(t)))->t) +# define ccl_soa_member(type, name) type soa_##name +# define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(soa_##t) + SD_FIELD_SIZE(soa_##t) * SD_THREAD - SD_OFFSETOF(soa_##t)))->soa_##t) # define ccl_fetch_array(s, t, index) (&ccl_fetch(s, t)[index]) # endif #else +# define ccl_soa_member(type, name) type name # define ccl_fetch(s, t) (s->t) # define ccl_fetch_array(s, t, index) (&s->t[index]) #endif typedef ccl_addr_space struct ShaderData { /* position */ - float3 P; + ccl_soa_member(float3, P); /* smooth normal for shading */ - float3 N; + ccl_soa_member(float3, N); /* true geometric normal */ - float3 Ng; + ccl_soa_member(float3, Ng); /* view/incoming direction */ - float3 I; + ccl_soa_member(float3, I); /* shader id */ - int shader; + ccl_soa_member(int, shader); /* booleans describing shader, see ShaderDataFlag */ - int flag; + ccl_soa_member(int, flag); /* primitive id if there is one, ~0 otherwise */ - int prim; + ccl_soa_member(int, prim); /* combined type and curve segment for hair */ - int type; + ccl_soa_member(int, type); /* parametric coordinates * - barycentric weights for triangles */ - float u; - float v; + ccl_soa_member(float, u); + ccl_soa_member(float, v); /* object id if there is one, ~0 otherwise */ - int object; + ccl_soa_member(int, object); /* motion blur sample time */ - float time; + ccl_soa_member(float, time); /* length of the ray being shaded */ - float ray_length; + ccl_soa_member(float, ray_length); #ifdef __RAY_DIFFERENTIALS__ /* differential of P. these are orthogonal to Ng, not N */ - differential3 dP; + ccl_soa_member(differential3, dP); /* differential of I */ - differential3 dI; + ccl_soa_member(differential3, dI); /* differential of u, v */ - differential du; - differential dv; + ccl_soa_member(differential, du); + ccl_soa_member(differential, dv); #endif #ifdef __DPDU__ /* differential of P w.r.t. parametric coordinates. note that dPdu is * not readily suitable as a tangent for shading on triangles. */ - float3 dPdu; - float3 dPdv; + ccl_soa_member(float3, dPdu); + ccl_soa_member(float3, dPdv); #endif #ifdef __OBJECT_MOTION__ /* object <-> world space transformations, cached to avoid * re-interpolating them constantly for shading */ - Transform ob_tfm; - Transform ob_itfm; + ccl_soa_member(Transform, ob_tfm); + ccl_soa_member(Transform, ob_itfm); #endif /* Closure data, we store a fixed array of closures */ - struct ShaderClosure closure[MAX_CLOSURE]; - int num_closure; - float randb_closure; + ccl_soa_member(struct ShaderClosure, closure[MAX_CLOSURE]); + ccl_soa_member(int, num_closure); + ccl_soa_member(int, num_closure_extra); + ccl_soa_member(float, randb_closure); + ccl_soa_member(float3, svm_closure_weight); /* LCG state for closures that require additional random numbers. */ - uint lcg_state; + ccl_soa_member(uint, lcg_state); /* ray start position, only set for backgrounds */ - float3 ray_P; - differential3 ray_dP; + ccl_soa_member(float3, ray_P); + ccl_soa_member(differential3, ray_dP); #ifdef __OSL__ struct KernelGlobals * osl_globals; @@ -1234,6 +1246,16 @@ enum RayState { #define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag))) #define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag) +/* Patches */ + +#define PATCH_MAX_CONTROL_VERTS 16 + +/* Patch map node flags */ + +#define PATCH_MAP_NODE_IS_SET (1 << 30) +#define PATCH_MAP_NODE_IS_LEAF (1 << 31) +#define PATCH_MAP_NODE_INDEX_MASK (~(PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF)) + CCL_NAMESPACE_END #endif /* __KERNEL_TYPES_H__ */ diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index bf8301fe5fb..9dafed9afd1 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -36,7 +36,11 @@ typedef struct VolumeShaderCoefficients { } VolumeShaderCoefficients; /* evaluate shader to get extinction coefficient at P */ -ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, float3 *extinction) +ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg, + ShaderData *sd, + PathState *state, + float3 P, + float3 *extinction) { sd->P = P; shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); @@ -58,7 +62,11 @@ ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *s } /* evaluate shader to get absorption, scattering and emission at P */ -ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, VolumeShaderCoefficients *coeff) +ccl_device_inline bool volume_shader_sample(KernelGlobals *kg, + ShaderData *sd, + PathState *state, + float3 P, + VolumeShaderCoefficients *coeff) { sd->P = P; shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, SHADER_CONTEXT_VOLUME); @@ -1029,7 +1037,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, int stack_index = 0, enclosed_index = 0; #ifdef __VOLUME_RECORD_ALL__ - Intersection hits[2*VOLUME_STACK_SIZE]; + Intersection hits[2*VOLUME_STACK_SIZE + 1]; uint num_hits = scene_intersect_volume_all(kg, &volume_ray, hits, @@ -1199,7 +1207,7 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, Ray volume_ray = *ray; # ifdef __VOLUME_RECORD_ALL__ - Intersection hits[2*VOLUME_STACK_SIZE]; + Intersection hits[2*VOLUME_STACK_SIZE + 1]; uint num_hits = scene_intersect_volume_all(kg, &volume_ray, hits, diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt index 9cf4f2d759a..98de40e5a8a 100644 --- a/intern/cycles/kernel/osl/CMakeLists.txt +++ b/intern/cycles/kernel/osl/CMakeLists.txt @@ -25,7 +25,6 @@ set(SRC ) set(HEADER_SRC - osl_bssrdf.h osl_closures.h osl_globals.h osl_services.h diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp index 85fa7b34bcc..d835f9be45c 100644 --- a/intern/cycles/kernel/osl/background.cpp +++ b/intern/cycles/kernel/osl/background.cpp @@ -36,6 +36,9 @@ #include "osl_closures.h" +#include "kernel_compat_cpu.h" +#include "closure/alloc.h" + CCL_NAMESPACE_BEGIN using namespace OSL; @@ -48,7 +51,10 @@ using namespace OSL; /// class GenericBackgroundClosure : public CClosurePrimitive { public: - GenericBackgroundClosure() : CClosurePrimitive(Background) {} + void setup(ShaderData *sd, int /* path_flag */, float3 weight) + { + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BACKGROUND_ID, weight); + } }; /// Holdout closure @@ -60,7 +66,11 @@ public: /// class HoldoutClosure : CClosurePrimitive { public: - HoldoutClosure () : CClosurePrimitive(Holdout) {} + void setup(ShaderData *sd, int /* path_flag */, float3 weight) + { + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, weight); + sd->flag |= SD_HOLDOUT; + } }; /// ambient occlusion closure @@ -71,7 +81,11 @@ public: /// class AmbientOcclusionClosure : public CClosurePrimitive { public: - AmbientOcclusionClosure () : CClosurePrimitive(AmbientOcclusion) {} + void setup(ShaderData *sd, int /* path_flag */, float3 weight) + { + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_AMBIENT_OCCLUSION_ID, weight); + sd->flag |= SD_AO; + } }; ClosureParam *closure_background_params() diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp index b5c0d76cf37..bc26f42b559 100644 --- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp +++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp @@ -39,6 +39,7 @@ #include "kernel_types.h" #include "kernel_montecarlo.h" +#include "closure/alloc.h" #include "closure/bsdf_diffuse_ramp.h" CCL_NAMESPACE_BEGIN @@ -47,51 +48,30 @@ using namespace OSL; class DiffuseRampClosure : public CBSDFClosure { public: + DiffuseRampBsdf params; Color3 colors[8]; - float3 fcolors[8]; - DiffuseRampClosure() : CBSDFClosure(LABEL_DIFFUSE) - {} - - void setup() + void setup(ShaderData *sd, int /* path_flag */, float3 weight) { - sc.prim = this; - m_shaderdata_flag = bsdf_diffuse_ramp_setup(&sc); + DiffuseRampBsdf *bsdf = (DiffuseRampBsdf*)bsdf_alloc_osl(sd, sizeof(DiffuseRampBsdf), weight, ¶ms); - for(int i = 0; i < 8; i++) - fcolors[i] = TO_FLOAT3(colors[i]); - } + if(bsdf) { + bsdf->colors = (float3*)closure_alloc_extra(sd, sizeof(float3)*8); - void blur(float roughness) - { - bsdf_diffuse_ramp_blur(&sc, roughness); - } + if(bsdf->colors) { + for(int i = 0; i < 8; i++) + bsdf->colors[i] = TO_FLOAT3(colors[i]); - float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const - { - return bsdf_diffuse_ramp_eval_reflect(&sc, fcolors, omega_out, omega_in, &pdf); - } - - float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const - { - return bsdf_diffuse_ramp_eval_transmit(&sc, fcolors, omega_out, omega_in, &pdf); - } - - int sample(const float3 &Ng, - const float3 &omega_out, const float3 &domega_out_dx, const float3 &domega_out_dy, - float randu, float randv, - float3 &omega_in, float3 &domega_in_dx, float3 &domega_in_dy, - float &pdf, float3 &eval) const - { - return bsdf_diffuse_ramp_sample(&sc, fcolors, Ng, omega_out, domega_out_dx, domega_out_dy, - randu, randv, &eval, &omega_in, &domega_in_dx, &domega_in_dy, &pdf); + sd->flag |= bsdf_diffuse_ramp_setup(bsdf); + } + } } }; ClosureParam *closure_bsdf_diffuse_ramp_params() { static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, sc.N), + CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, params.N), CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8), CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"), CLOSURE_FINISH_PARAM(DiffuseRampClosure) diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp index bc73d80cd78..14c7644936e 100644 --- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp +++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp @@ -38,6 +38,7 @@ #include "osl_closures.h" #include "kernel_types.h" +#include "closure/alloc.h" #include "closure/bsdf_phong_ramp.h" CCL_NAMESPACE_BEGIN @@ -46,52 +47,31 @@ using namespace OSL; class PhongRampClosure : public CBSDFClosure { public: + PhongRampBsdf params; Color3 colors[8]; - float3 fcolors[8]; - PhongRampClosure() : CBSDFClosure(LABEL_GLOSSY) - {} - - void setup() + void setup(ShaderData *sd, int /* path_flag */, float3 weight) { - sc.prim = this; - m_shaderdata_flag = bsdf_phong_ramp_setup(&sc); + PhongRampBsdf *bsdf = (PhongRampBsdf*)bsdf_alloc_osl(sd, sizeof(PhongRampBsdf), weight, ¶ms); - for(int i = 0; i < 8; i++) - fcolors[i] = TO_FLOAT3(colors[i]); - } + if(bsdf) { + bsdf->colors = (float3*)closure_alloc_extra(sd, sizeof(float3)*8); - void blur(float roughness) - { - bsdf_phong_ramp_blur(&sc, roughness); - } + if(bsdf->colors) { + for(int i = 0; i < 8; i++) + bsdf->colors[i] = TO_FLOAT3(colors[i]); - float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const - { - return bsdf_phong_ramp_eval_reflect(&sc, fcolors, omega_out, omega_in, &pdf); - } - - float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const - { - return bsdf_phong_ramp_eval_transmit(&sc, fcolors, omega_out, omega_in, &pdf); - } - - int sample(const float3 &Ng, - const float3 &omega_out, const float3 &domega_out_dx, const float3 &domega_out_dy, - float randu, float randv, - float3 &omega_in, float3 &domega_in_dx, float3 &domega_in_dy, - float &pdf, float3 &eval) const - { - return bsdf_phong_ramp_sample(&sc, fcolors, Ng, omega_out, domega_out_dx, domega_out_dy, - randu, randv, &eval, &omega_in, &domega_in_dx, &domega_in_dy, &pdf); + sd->flag |= bsdf_phong_ramp_setup(bsdf); + } + } } }; ClosureParam *closure_bsdf_phong_ramp_params() { static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(PhongRampClosure, sc.N), - CLOSURE_FLOAT_PARAM(PhongRampClosure, sc.data0), + CLOSURE_FLOAT3_PARAM(PhongRampClosure, params.N), + CLOSURE_FLOAT_PARAM(PhongRampClosure, params.exponent), CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8), CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"), CLOSURE_FINISH_PARAM(PhongRampClosure) diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp index f91fd6e015c..3f13e08b302 100644 --- a/intern/cycles/kernel/osl/emissive.cpp +++ b/intern/cycles/kernel/osl/emissive.cpp @@ -36,7 +36,9 @@ #include "osl_closures.h" +#include "kernel_compat_cpu.h" #include "kernel_types.h" +#include "closure/alloc.h" #include "closure/emissive.h" CCL_NAMESPACE_BEGIN @@ -52,25 +54,10 @@ using namespace OSL; /// class GenericEmissiveClosure : public CClosurePrimitive { public: - GenericEmissiveClosure() : CClosurePrimitive(Emissive) { } - - Color3 eval(const Vec3 &Ng, const Vec3 &omega_out) const - { - float3 result = emissive_simple_eval(TO_FLOAT3(Ng), TO_FLOAT3(omega_out)); - return TO_COLOR3(result); - } - - void sample(const Vec3 &Ng, float randu, float randv, - Vec3 &omega_out, float &pdf) const - { - float3 omega_out_; - emissive_sample(TO_FLOAT3(Ng), randu, randv, &omega_out_, &pdf); - omega_out = TO_VEC3(omega_out_); - } - - float pdf(const Vec3 &Ng, const Vec3 &omega_out) const + void setup(ShaderData *sd, int /* path_flag */, float3 weight) { - return emissive_pdf(TO_FLOAT3(Ng), TO_FLOAT3(omega_out)); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_EMISSION_ID, weight); + sd->flag |= SD_EMISSION; } }; diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index da4afb138f6..3614717e28c 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -30,17 +30,15 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <OpenImageIO/fmath.h> - #include <OSL/genclosure.h> #include "kernel_compat_cpu.h" -#include "osl_bssrdf.h" #include "osl_closures.h" #include "kernel_types.h" #include "kernel_montecarlo.h" +#include "closure/alloc.h" #include "closure/bsdf_diffuse.h" #include "closure/bssrdf.h" @@ -48,27 +46,83 @@ CCL_NAMESPACE_BEGIN using namespace OSL; +class CBSSRDFClosure : public CClosurePrimitive { +public: + Bssrdf params; + float3 radius; + float3 albedo; + + void alloc(ShaderData *sd, int path_flag, float3 weight, ClosureType type) + { + float sample_weight = fabsf(average(weight)); + + /* disable in case of diffuse ancestor, can't see it well then and + * adds considerably noise due to probabilities of continuing path + * getting lower and lower */ + if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) { + radius = make_float3(0.0f, 0.0f, 0.0f); + } + + if(sample_weight > CLOSURE_WEIGHT_CUTOFF) { + /* sharpness */ + float sharpness = params.sharpness; + /* texture color blur */ + float texture_blur = params.texture_blur; + + /* create one closure per color channel */ + Bssrdf *bssrdf = bssrdf_alloc(sd, make_float3(weight.x, 0.0f, 0.0f)); + if(bssrdf) { + bssrdf->sample_weight = sample_weight; + bssrdf->radius = radius.x; + bssrdf->texture_blur = texture_blur; + bssrdf->albedo = albedo.x; + bssrdf->sharpness = sharpness; + bssrdf->N = params.N; + ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); + } + + bssrdf = bssrdf_alloc(sd, make_float3(0.0f, weight.y, 0.0f)); + if(bssrdf) { + bssrdf->sample_weight = sample_weight; + bssrdf->radius = radius.y; + bssrdf->texture_blur = texture_blur; + bssrdf->albedo = albedo.y; + bssrdf->sharpness = sharpness; + bssrdf->N = params.N; + ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); + } + + bssrdf = bssrdf_alloc(sd, make_float3(0.0f, 0.0f, weight.z)); + if(bssrdf) { + bssrdf->sample_weight = sample_weight; + bssrdf->radius = radius.z; + bssrdf->texture_blur = texture_blur; + bssrdf->albedo = albedo.z; + bssrdf->sharpness = sharpness; + bssrdf->N = params.N; + ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); + } + } + } +}; + /* Cubic */ class CubicBSSRDFClosure : public CBSSRDFClosure { public: - CubicBSSRDFClosure() - {} - - void setup() + void setup(ShaderData *sd, int path_flag, float3 weight) { - sc.type = CLOSURE_BSSRDF_CUBIC_ID; - sc.data0 = fabsf(average(radius)); + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_CUBIC_ID); } }; ClosureParam *closure_bssrdf_cubic_params() { static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N), + CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, params.N), CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius), - CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1), - CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.T.x), + CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, params.texture_blur), + CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, params.sharpness), CLOSURE_STRING_KEYPARAM(CubicBSSRDFClosure, label, "label"), CLOSURE_FINISH_PARAM(CubicBSSRDFClosure) }; @@ -81,22 +135,18 @@ CCLOSURE_PREPARE(closure_bssrdf_cubic_prepare, CubicBSSRDFClosure) class GaussianBSSRDFClosure : public CBSSRDFClosure { public: - GaussianBSSRDFClosure() - {} - - void setup() + void setup(ShaderData *sd, int path_flag, float3 weight) { - sc.type = CLOSURE_BSSRDF_GAUSSIAN_ID; - sc.data0 = fabsf(average(radius)); + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_GAUSSIAN_ID); } }; ClosureParam *closure_bssrdf_gaussian_params() { static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N), + CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, params.N), CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius), - CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1), + CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, params.texture_blur), CLOSURE_STRING_KEYPARAM(GaussianBSSRDFClosure, label, "label"), CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure) }; @@ -109,22 +159,18 @@ CCLOSURE_PREPARE(closure_bssrdf_gaussian_prepare, GaussianBSSRDFClosure) class BurleyBSSRDFClosure : public CBSSRDFClosure { public: - BurleyBSSRDFClosure() - {} - - void setup() + void setup(ShaderData *sd, int path_flag, float3 weight) { - sc.type = CLOSURE_BSSRDF_BURLEY_ID; - sc.data0 = fabsf(average(radius)); + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID); } }; ClosureParam *closure_bssrdf_burley_params() { static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(BurleyBSSRDFClosure, sc.N), + CLOSURE_FLOAT3_PARAM(BurleyBSSRDFClosure, params.N), CLOSURE_FLOAT3_PARAM(BurleyBSSRDFClosure, radius), - CLOSURE_FLOAT_PARAM(BurleyBSSRDFClosure, sc.data1), + CLOSURE_FLOAT_PARAM(BurleyBSSRDFClosure, params.texture_blur), CLOSURE_FLOAT3_PARAM(BurleyBSSRDFClosure, albedo), CLOSURE_STRING_KEYPARAM(BurleyBSSRDFClosure, label, "label"), CLOSURE_FINISH_PARAM(BurleyBSSRDFClosure) diff --git a/intern/cycles/kernel/osl/osl_bssrdf.h b/intern/cycles/kernel/osl/osl_bssrdf.h deleted file mode 100644 index d81ecade543..00000000000 --- a/intern/cycles/kernel/osl/osl_bssrdf.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Adapted from Open Shading Language with this license: - * - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011, Blender Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Sony Pictures Imageworks nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __OSL_BSSRDF_H__ -#define __OSL_BSSRDF_H__ - -#include <OSL/oslclosure.h> -#include <OSL/oslexec.h> -#include <OSL/genclosure.h> - -#include "osl_closures.h" - -#include "kernel_types.h" - -#include "util_types.h" - -CCL_NAMESPACE_BEGIN - -class CBSSRDFClosure : public CClosurePrimitive { -public: - ShaderClosure sc; - float3 radius; - float3 albedo; - - CBSSRDFClosure() : CClosurePrimitive(BSSRDF) { } - int scattering() const { return LABEL_DIFFUSE; } -}; - -CCL_NAMESPACE_END - -#endif /* __OSL_BSSRDF_H__ */ - diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 02b1491489c..94de782dca0 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -46,6 +46,7 @@ #include "kernel_montecarlo.h" #include "kernel_random.h" +#include "closure/alloc.h" #include "closure/bsdf_util.h" #include "closure/bsdf_ashikhmin_velvet.h" #include "closure/bsdf_diffuse.h" @@ -66,112 +67,112 @@ using namespace OSL; /* BSDF class definitions */ -BSDF_CLOSURE_CLASS_BEGIN(Diffuse, diffuse, diffuse, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(DiffuseClosure, sc.N), +BSDF_CLOSURE_CLASS_BEGIN(Diffuse, diffuse, DiffuseBsdf, LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N), BSDF_CLOSURE_CLASS_END(Diffuse, diffuse) -BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, translucent, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(TranslucentClosure, sc.N), +BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N), BSDF_CLOSURE_CLASS_END(Translucent, translucent) -BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, oren_nayar, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(OrenNayarClosure, sc.N), - CLOSURE_FLOAT_PARAM(OrenNayarClosure, sc.data0), +BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N), + CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness), BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar) -BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, reflection, LABEL_SINGULAR) - CLOSURE_FLOAT3_PARAM(ReflectionClosure, sc.N), +BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR) + CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N), BSDF_CLOSURE_CLASS_END(Reflection, reflection) -BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, refraction, LABEL_SINGULAR) - CLOSURE_FLOAT3_PARAM(RefractionClosure, sc.N), - CLOSURE_FLOAT_PARAM(RefractionClosure, sc.data0), +BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR) + CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N), + CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior), BSDF_CLOSURE_CLASS_END(Refraction, refraction) -BSDF_CLOSURE_CLASS_BEGIN(Transparent, transparent, transparent, LABEL_SINGULAR) +BSDF_CLOSURE_CLASS_BEGIN(Transparent, transparent, ShaderClosure, LABEL_SINGULAR) BSDF_CLOSURE_CLASS_END(Transparent, transparent) -BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, ashikhmin_velvet, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, sc.N), - CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, sc.data0), +BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N), + CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma), BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet) -BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, ashikhmin_shirley, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, sc.N), - CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, sc.T), - CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, sc.data0), - CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, sc.data1), +BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N), + CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T), + CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y), BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley_aniso) -BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, diffuse_toon, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, sc.N), - CLOSURE_FLOAT_PARAM(DiffuseToonClosure, sc.data0), - CLOSURE_FLOAT_PARAM(DiffuseToonClosure, sc.data1), +BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N), + CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size), + CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth), BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon) -BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, glossy_toon, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(GlossyToonClosure, sc.N), - CLOSURE_FLOAT_PARAM(GlossyToonClosure, sc.data0), - CLOSURE_FLOAT_PARAM(GlossyToonClosure, sc.data1), +BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY) + CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N), + CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size), + CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth), BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, microfacet_ggx, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, sc.N), - CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, sc.data0), +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x), BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, microfacet_ggx, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, sc.N), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, sc.T), - CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, sc.data0), - CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, sc.data1), +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.N), + CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.T), + CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_y), BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, microfacet_beckmann, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, sc.N), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, sc.data0), +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x), BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, microfacet_beckmann, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, sc.N), - CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, sc.T), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, sc.data0), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, sc.data1), +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.N), + CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.T), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_y), BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, microfacet_ggx, LABEL_GLOSSY|LABEL_TRANSMIT) - CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, sc.N), - CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, sc.data0), - CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, sc.data2), +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, MicrofacetBsdf, LABEL_GLOSSY|LABEL_TRANSMIT) + CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior), BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, microfacet_beckmann, LABEL_GLOSSY|LABEL_TRANSMIT) - CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, sc.N), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, sc.data0), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, sc.data2), +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, MicrofacetBsdf, LABEL_GLOSSY|LABEL_TRANSMIT) + CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior), BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction) -BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, hair_reflection, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.N), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data0), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data1), - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.T), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data2), +BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY) + CLOSURE_FLOAT3_PARAM(HairReflectionClosure, unused), + CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1), + CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2), + CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), + CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset), BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection) -BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, hair_transmission, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, sc.N), - CLOSURE_FLOAT_PARAM(HairTransmissionClosure, sc.data0), - CLOSURE_FLOAT_PARAM(HairTransmissionClosure, sc.data1), - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.T), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data2), +BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY) + CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, unused), + CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1), + CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2), + CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), + CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset), BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission) -VOLUME_CLOSURE_CLASS_BEGIN(VolumeHenyeyGreenstein, henyey_greenstein, LABEL_VOLUME_SCATTER) - CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, sc.data0), +VOLUME_CLOSURE_CLASS_BEGIN(VolumeHenyeyGreenstein, henyey_greenstein, HenyeyGreensteinVolume, LABEL_VOLUME_SCATTER) + CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g), VOLUME_CLOSURE_CLASS_END(VolumeHenyeyGreenstein, henyey_greenstein) -VOLUME_CLOSURE_CLASS_BEGIN(VolumeAbsorption, absorption, LABEL_SINGULAR) +VOLUME_CLOSURE_CLASS_BEGIN(VolumeAbsorption, absorption, ShaderClosure, LABEL_SINGULAR) VOLUME_CLOSURE_CLASS_END(VolumeAbsorption, absorption) /* Registration */ @@ -258,69 +259,64 @@ void OSLShader::register_closures(OSLShadingSystem *ss_) volume_absorption_params(), volume_absorption_prepare); } -/* Multiscattering GGX closures */ - -class MicrofacetMultiClosure : public CBSDFClosure { -public: - float3 color; - - /* Technically, the MultiGGX Glass closure may also transmit. - * However, since this is set statically and only used for caustic flags, this is probably as good as it gets. */ - MicrofacetMultiClosure() : CBSDFClosure(LABEL_GLOSSY|LABEL_REFLECT) - { - } +/* BSDF Closure */ - void setup() - { - sc.prim = NULL; - sc.custom1 = color.x; - sc.custom2 = color.y; - sc.custom3 = color.z; +bool CBSDFClosure::skip(const ShaderData *sd, int path_flag, int scattering) +{ + /* caustic options */ + if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { + KernelGlobals *kg = sd->osl_globals; + + if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || + (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) + { + return true; + } } - void blur(float roughness) - { - } + return false; +} - float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const - { - pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); - } +/* Multiscattering GGX closures */ - float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const - { - pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); - } +class MicrofacetMultiClosure : public CBSDFClosure { +public: + MicrofacetBsdf params; + float3 color; - int sample(const float3 &Ng, - const float3 &omega_out, const float3 &domega_out_dx, const float3 &domega_out_dy, - float randu, float randv, - float3 &omega_in, float3 &domega_in_dx, float3 &domega_in_dy, - float &pdf, float3 &eval) const + MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) { - pdf = 0; - return LABEL_NONE; + /* Technically, the MultiGGX Glass closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if(!skip(sd, path_flag, LABEL_GLOSSY|LABEL_REFLECT)) { + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(bsdf && extra) { + bsdf->extra = extra; + bsdf->extra->color = color; + return bsdf; + } + } + + return NULL; } }; class MicrofacetMultiGGXClosure : public MicrofacetMultiClosure { public: - MicrofacetMultiGGXClosure() : MicrofacetMultiClosure() {} - - void setup() + void setup(ShaderData *sd, int path_flag, float3 weight) { - MicrofacetMultiClosure::setup(); - m_shaderdata_flag = bsdf_microfacet_multi_ggx_setup(&sc); + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_setup(bsdf) : 0; } }; ClosureParam *closure_bsdf_microfacet_multi_ggx_params() { static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, sc.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, sc.data0), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure) @@ -331,22 +327,20 @@ CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_prepare, MicrofacetMultiGGXCl class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure { public: - MicrofacetMultiGGXAnisoClosure() : MicrofacetMultiClosure() {} - - void setup() + void setup(ShaderData *sd, int path_flag, float3 weight) { - MicrofacetMultiClosure::setup(); - m_shaderdata_flag = bsdf_microfacet_multi_ggx_aniso_setup(&sc); + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_aniso_setup(bsdf) : 0; } }; ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params() { static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, sc.N), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, sc.T), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, sc.data0), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, sc.data1), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y), CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure) @@ -359,19 +353,19 @@ class MicrofacetMultiGGXGlassClosure : public MicrofacetMultiClosure { public: MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure() {} - void setup() + void setup(ShaderData *sd, int path_flag, float3 weight) { - MicrofacetMultiClosure::setup(); - m_shaderdata_flag = bsdf_microfacet_multi_ggx_glass_setup(&sc); + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_glass_setup(bsdf) : 0; } }; ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params() { static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, sc.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, sc.data0), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, sc.data2), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior), CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure) diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h index c5a1a29b6af..cd7b33703ff 100644 --- a/intern/cycles/kernel/osl/osl_closures.h +++ b/intern/cycles/kernel/osl/osl_closures.h @@ -90,21 +90,7 @@ void name(RendererServices *, int id, void *data) \ class CClosurePrimitive { public: - enum Category { - BSDF, ///< Reflective and/or transmissive surface - BSSRDF, ///< Sub-surface light transfer - Emissive, ///< Light emission - Background, ///< Background emission - Volume, ///< Volume scattering - Holdout, ///< Holdout from alpha - AmbientOcclusion, ///< Ambient occlusion - }; - - CClosurePrimitive (Category category_) : category (category_) {} - virtual ~CClosurePrimitive() {} - virtual void setup() {} - - Category category; + virtual void setup(ShaderData *sd, int path_flag, float3 weight) = 0; OSL::ustring label; }; @@ -113,68 +99,22 @@ public: class CBSDFClosure : public CClosurePrimitive { public: - ShaderClosure sc; - - CBSDFClosure(int scattering) : CClosurePrimitive(BSDF), - m_scattering_label(scattering), m_shaderdata_flag(0) - {} - - int scattering() const { return m_scattering_label; } - int shaderdata_flag() const { return m_shaderdata_flag; } - - virtual void blur(float roughness) = 0; - virtual float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float &pdf) const = 0; - virtual float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float &pdf) const = 0; - - virtual int sample(const float3 &Ng, - const float3 &omega_out, const float3 &domega_out_dx, const float3 &domega_out_dy, - float randu, float randv, - float3 &omega_in, float3 &domega_in_dx, float3 &domega_in_dy, - float &pdf, float3 &eval) const = 0; - -protected: - int m_scattering_label; - int m_shaderdata_flag; + bool skip(const ShaderData *sd, int path_flag, int scattering); }; -#define BSDF_CLOSURE_CLASS_BEGIN(Upper, lower, svmlower, TYPE) \ +#define BSDF_CLOSURE_CLASS_BEGIN(Upper, lower, structname, TYPE) \ \ class Upper##Closure : public CBSDFClosure { \ public: \ - Upper##Closure() : CBSDFClosure(TYPE) \ - { \ - } \ + structname params; \ + float3 unused; \ \ - void setup() \ + void setup(ShaderData *sd, int path_flag, float3 weight) \ { \ - sc.prim = NULL; \ - m_shaderdata_flag = bsdf_##lower##_setup(&sc); \ - } \ -\ - void blur(float roughness) \ - { \ - } \ -\ - float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const \ - { \ - pdf = 0.0f; \ - return make_float3(0.0f, 0.0f, 0.0f); \ - } \ -\ - float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const \ - { \ - pdf = 0.0f; \ - return make_float3(0.0f, 0.0f, 0.0f); \ - } \ -\ - int sample(const float3 &Ng, \ - const float3 &omega_out, const float3 &domega_out_dx, const float3 &domega_out_dy, \ - float randu, float randv, \ - float3 &omega_in, float3 &domega_in_dx, float3 &domega_in_dy, \ - float &pdf, float3 &eval) const \ - { \ - pdf = 0; \ - return LABEL_NONE; \ + if(!skip(sd, path_flag, TYPE)) { \ + structname *bsdf = (structname*)bsdf_alloc_osl(sd, sizeof(structname), weight, ¶ms); \ + sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \ + } \ } \ }; \ \ @@ -193,36 +133,18 @@ static ClosureParam *bsdf_##lower##_params() \ \ CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure) - /* Volume */ -class CVolumeClosure : public CClosurePrimitive { -public: - ShaderClosure sc; - - CVolumeClosure(int scattering) : CClosurePrimitive(Volume), - m_scattering_label(scattering), m_shaderdata_flag(0) - {} - ~CVolumeClosure() { } - - int scattering() const { return m_scattering_label; } - int shaderdata_flag() const { return m_shaderdata_flag; } - -protected: - int m_scattering_label; - int m_shaderdata_flag; -}; - -#define VOLUME_CLOSURE_CLASS_BEGIN(Upper, lower, TYPE) \ +#define VOLUME_CLOSURE_CLASS_BEGIN(Upper, lower, structname, TYPE) \ \ -class Upper##Closure : public CVolumeClosure { \ +class Upper##Closure : public CBSDFClosure { \ public: \ - Upper##Closure() : CVolumeClosure(TYPE) {} \ + structname params; \ \ - void setup() \ + void setup(ShaderData *sd, int path_flag, float3 weight) \ { \ - sc.prim = NULL; \ - m_shaderdata_flag = volume_##lower##_setup(&sc); \ + structname *volume = (structname*)bsdf_alloc_osl(sd, sizeof(structname), weight, ¶ms); \ + sd->flag |= (volume) ? volume_##lower##_setup(volume) : 0; \ } \ }; \ \ diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h index 916542ec628..8353c4e434b 100644 --- a/intern/cycles/kernel/osl/osl_globals.h +++ b/intern/cycles/kernel/osl/osl_globals.h @@ -59,8 +59,7 @@ struct OSLGlobals { /* attributes */ struct Attribute { TypeDesc type; - AttributeElement elem; - int offset; + AttributeDescriptor desc; ParamValue value; }; diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 2bb2be5e6b3..153ebad6cd2 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -554,13 +554,13 @@ static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd, attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) { float3 fval[3]; - fval[0] = primitive_attribute_float3(kg, sd, attr.elem, attr.offset, + fval[0] = primitive_attribute_float3(kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); return set_attribute_float3(fval, type, derivatives, val); } else if(attr.type == TypeDesc::TypeFloat) { float fval[3]; - fval[0] = primitive_attribute_float(kg, sd, attr.elem, attr.offset, + fval[0] = primitive_attribute_float(kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); return set_attribute_float(fval, type, derivatives, val); } @@ -573,7 +573,7 @@ static bool get_mesh_attribute(KernelGlobals *kg, const ShaderData *sd, const OS const TypeDesc& type, bool derivatives, void *val) { if(attr.type == TypeDesc::TypeMatrix) { - Transform tfm = primitive_attribute_matrix(kg, sd, attr.offset); + Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc); return set_attribute_matrix(tfm, type, val); } else { @@ -787,7 +787,7 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring TypeDesc type, ustring name, void *val) { KernelGlobals *kg = sd->osl_globals; - bool is_curve; + int prim_type = 0; int object; /* lookup of attribute on another object */ @@ -798,25 +798,24 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring return false; object = it->second; - is_curve = false; } else { object = sd->object; - is_curve = (sd->type & PRIMITIVE_ALL_CURVE) != 0; + prim_type = attribute_primitive_type(kg, sd); if(object == OBJECT_NONE) return get_background_attribute(kg, sd, name, type, derivatives, val); } /* find attribute on object */ - object = object*ATTR_PRIM_TYPES + (is_curve == true); + object = object*ATTR_PRIM_TYPES + prim_type; OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object]; OSLGlobals::AttributeMap::iterator it = attribute_map.find(name); if(it != attribute_map.end()) { const OSLGlobals::Attribute& attr = it->second; - if(attr.elem != ATTR_ELEMENT_OBJECT) { + if(attr.desc.element != ATTR_ELEMENT_OBJECT) { /* triangle and vertex attributes */ if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val)) return true; diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 6cde7419e10..20dd167708c 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -23,10 +23,6 @@ #include "geom/geom_object.h" -#include "closure/bsdf_diffuse.h" -#include "closure/bssrdf.h" - -#include "osl_bssrdf.h" #include "osl_closures.h" #include "osl_globals.h" #include "osl_services.h" @@ -141,8 +137,10 @@ static void shaderdata_to_shaderglobals(KernelGlobals *kg, ShaderData *sd, PathS /* Surface */ -static void flatten_surface_closure_tree(ShaderData *sd, int path_flag, - const OSL::ClosureColor *closure, float3 weight = make_float3(1.0f, 1.0f, 1.0f)) +static void flatten_surface_closure_tree(ShaderData *sd, + int path_flag, + const OSL::ClosureColor *closure, + float3 weight = make_float3(1.0f, 1.0f, 1.0f)) { /* OSL gives us a closure tree, we flatten it into arrays per * closure type, for evaluation, sampling, etc later on. */ @@ -164,164 +162,10 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag, CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); if(prim) { - ShaderClosure sc; - #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS weight = weight*TO_FLOAT3(comp->w); #endif - sc.weight = weight; - - prim->setup(); - - switch(prim->category) { - case CClosurePrimitive::BSDF: { - CBSDFClosure *bsdf = (CBSDFClosure *)prim; - int scattering = bsdf->scattering(); - int shaderdata_flag = bsdf->shaderdata_flag(); - - /* caustic options */ - if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { - KernelGlobals *kg = sd->osl_globals; - - if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || - (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) - { - return; - } - } - - /* sample weight */ - float sample_weight = fabsf(average(weight)); - - sc.sample_weight = sample_weight; - - sc.type = bsdf->sc.type; - sc.N = bsdf->sc.N; - sc.T = bsdf->sc.T; - sc.data0 = bsdf->sc.data0; - sc.data1 = bsdf->sc.data1; - sc.data2 = bsdf->sc.data2; - sc.prim = bsdf->sc.prim; - if(shaderdata_flag & SD_BSDF_HAS_CUSTOM) { - sc.custom1 = bsdf->sc.custom1; - sc.custom2 = bsdf->sc.custom2; - sc.custom3 = bsdf->sc.custom3; - } - - /* add */ - if(sc.sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= shaderdata_flag; - } - break; - } - case CClosurePrimitive::Emissive: { - /* sample weight */ - float sample_weight = fabsf(average(weight)); - - sc.sample_weight = sample_weight; - sc.type = CLOSURE_EMISSION_ID; - sc.data0 = 0.0f; - sc.data1 = 0.0f; - sc.data2 = 0.0f; - sc.prim = NULL; - - /* flag */ - if(sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= SD_EMISSION; - } - break; - } - case CClosurePrimitive::AmbientOcclusion: { - /* sample weight */ - float sample_weight = fabsf(average(weight)); - - sc.sample_weight = sample_weight; - sc.type = CLOSURE_AMBIENT_OCCLUSION_ID; - sc.data0 = 0.0f; - sc.data1 = 0.0f; - sc.data2 = 0.0f; - sc.prim = NULL; - - if(sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= SD_AO; - } - break; - } - case CClosurePrimitive::Holdout: { - sc.sample_weight = 0.0f; - sc.type = CLOSURE_HOLDOUT_ID; - sc.data0 = 0.0f; - sc.data1 = 0.0f; - sc.data2 = 0.0f; - sc.prim = NULL; - - if(sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= SD_HOLDOUT; - } - break; - } - case CClosurePrimitive::BSSRDF: { - CBSSRDFClosure *bssrdf = (CBSSRDFClosure *)prim; - float sample_weight = fabsf(average(weight)); - - if(sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure+2 < MAX_CLOSURE) { - sc.sample_weight = sample_weight; - - sc.type = bssrdf->sc.type; - sc.N = bssrdf->sc.N; - sc.data1 = bssrdf->sc.data1; - sc.T.x = bssrdf->sc.T.x; - sc.prim = NULL; - - /* disable in case of diffuse ancestor, can't see it well then and - * adds considerably noise due to probabilities of continuing path - * getting lower and lower */ - if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) - bssrdf->radius = make_float3(0.0f, 0.0f, 0.0f); - - float3 albedo = - (bssrdf->sc.type == CLOSURE_BSSRDF_BURLEY_ID) - ? bssrdf->albedo - : make_float3(0.0f, 0.0f, 0.0f); - - /* create one closure for each color channel */ - if(fabsf(weight.x) > 0.0f) { - sc.weight = make_float3(weight.x, 0.0f, 0.0f); - sc.data0 = bssrdf->radius.x; - sc.data1 = 0.0f; - sc.data2 = albedo.x; - sd->flag |= bssrdf_setup(&sc, sc.type); - sd->closure[sd->num_closure++] = sc; - } - - if(fabsf(weight.y) > 0.0f) { - sc.weight = make_float3(0.0f, weight.y, 0.0f); - sc.data0 = bssrdf->radius.y; - sc.data1 = 0.0f; - sc.data2 = albedo.y; - sd->flag |= bssrdf_setup(&sc, sc.type); - sd->closure[sd->num_closure++] = sc; - } - - if(fabsf(weight.z) > 0.0f) { - sc.weight = make_float3(0.0f, 0.0f, weight.z); - sc.data0 = bssrdf->radius.z; - sc.data1 = 0.0f; - sc.data2 = albedo.z; - sd->flag |= bssrdf_setup(&sc, sc.type); - sd->closure[sd->num_closure++] = sc; - } - } - break; - } - case CClosurePrimitive::Background: - case CClosurePrimitive::Volume: - break; /* not relevant */ - } + prim->setup(sd, path_flag, weight); } break; } @@ -351,7 +195,9 @@ void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state /* Background */ -static float3 flatten_background_closure_tree(const OSL::ClosureColor *closure) +static void flatten_background_closure_tree(ShaderData *sd, + const OSL::ClosureColor *closure, + float3 weight = make_float3(1.0f, 1.0f, 1.0f)) { /* OSL gives us a closure tree, if we are shading for background there * is only one supported closure type at the moment, which has no evaluation @@ -360,32 +206,32 @@ static float3 flatten_background_closure_tree(const OSL::ClosureColor *closure) switch(closure->id) { case OSL::ClosureColor::MUL: { OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - - return TO_FLOAT3(mul->weight) * flatten_background_closure_tree(mul->closure); + flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight)); + break; } case OSL::ClosureColor::ADD: { OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - return flatten_background_closure_tree(add->closureA) + - flatten_background_closure_tree(add->closureB); + flatten_background_closure_tree(sd, add->closureA, weight); + flatten_background_closure_tree(sd, add->closureB, weight); + break; } default: { OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - if(prim && prim->category == CClosurePrimitive::Background) + if(prim) { #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - return TO_FLOAT3(comp->w); -#else - return make_float3(1.0f, 1.0f, 1.0f); + weight = weight*TO_FLOAT3(comp->w); #endif + prim->setup(sd, 0, weight); + } + break; } } - - return make_float3(0.0f, 0.0f, 0.0f); } -float3 OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag, ShaderContext ctx) +void OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag, ShaderContext ctx) { /* setup shader globals from shader data */ OSLThreadData *tdata = kg->osl_tdata; @@ -402,15 +248,14 @@ float3 OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, PathState * /* return background color immediately */ if(globals->Ci) - return flatten_background_closure_tree(globals->Ci); - - return make_float3(0.0f, 0.0f, 0.0f); + flatten_background_closure_tree(sd, globals->Ci); } /* Volume */ static void flatten_volume_closure_tree(ShaderData *sd, - const OSL::ClosureColor *closure, float3 weight = make_float3(1.0f, 1.0f, 1.0f)) + const OSL::ClosureColor *closure, + float3 weight = make_float3(1.0f, 1.0f, 1.0f)) { /* OSL gives us a closure tree, we flatten it into arrays per * closure type, for evaluation, sampling, etc later on. */ @@ -432,60 +277,10 @@ static void flatten_volume_closure_tree(ShaderData *sd, CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); if(prim) { - ShaderClosure sc; - #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS weight = weight*TO_FLOAT3(comp->w); #endif - sc.weight = weight; - - prim->setup(); - - switch(prim->category) { - case CClosurePrimitive::Volume: { - CVolumeClosure *volume = (CVolumeClosure *)prim; - /* sample weight */ - float sample_weight = fabsf(average(weight)); - - sc.sample_weight = sample_weight; - sc.type = volume->sc.type; - sc.data0 = volume->sc.data0; - sc.data1 = volume->sc.data1; - - /* add */ - if((sc.sample_weight > CLOSURE_WEIGHT_CUTOFF) && - (sd->num_closure < MAX_CLOSURE)) - { - sd->closure[sd->num_closure++] = sc; - sd->flag |= volume->shaderdata_flag(); - } - break; - } - case CClosurePrimitive::Emissive: { - /* sample weight */ - float sample_weight = fabsf(average(weight)); - - sc.sample_weight = sample_weight; - sc.type = CLOSURE_EMISSION_ID; - sc.data0 = 0.0f; - sc.data1 = 0.0f; - sc.prim = NULL; - - /* flag */ - if(sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= SD_EMISSION; - } - break; - } - case CClosurePrimitive::Holdout: - break; /* not implemented */ - case CClosurePrimitive::Background: - case CClosurePrimitive::BSDF: - case CClosurePrimitive::BSSRDF: - case CClosurePrimitive::AmbientOcclusion: - break; /* not relevant */ - } + prim->setup(sd, 0, weight); } } } @@ -537,43 +332,9 @@ void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderConte sd->P = TO_FLOAT3(globals->P); } -/* BSDF Closure */ - -int OSLShader::bsdf_sample(const ShaderData *sd, const ShaderClosure *sc, float randu, float randv, float3& eval, float3& omega_in, differential3& domega_in, float& pdf) -{ - CBSDFClosure *sample_bsdf = (CBSDFClosure *)sc->prim; - - pdf = 0.0f; - - return sample_bsdf->sample(sd->Ng, - sd->I, sd->dI.dx, sd->dI.dy, - randu, randv, - omega_in, domega_in.dx, domega_in.dy, - pdf, eval); -} - -float3 OSLShader::bsdf_eval(const ShaderData *sd, const ShaderClosure *sc, const float3& omega_in, float& pdf) -{ - CBSDFClosure *bsdf = (CBSDFClosure *)sc->prim; - float3 bsdf_eval; - - if(dot(sd->Ng, omega_in) >= 0.0f) - bsdf_eval = bsdf->eval_reflect(sd->I, omega_in, pdf); - else - bsdf_eval = bsdf->eval_transmit(sd->I, omega_in, pdf); - - return bsdf_eval; -} - -void OSLShader::bsdf_blur(ShaderClosure *sc, float roughness) -{ - CBSDFClosure *bsdf = (CBSDFClosure *)sc->prim; - bsdf->blur(roughness); -} - /* Attributes */ -int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem) +int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc) { /* for OSL, a hash map is used to lookup the attribute by name. */ int object = sd->object*ATTR_PRIM_TYPES; @@ -587,16 +348,23 @@ int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, if(it != attr_map.end()) { const OSLGlobals::Attribute &osl_attr = it->second; - *elem = osl_attr.elem; + *desc = osl_attr.desc; - if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.elem != ATTR_ELEMENT_MESH) + if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) { + desc->offset = ATTR_STD_NOT_FOUND; return ATTR_STD_NOT_FOUND; + } /* return result */ - return (osl_attr.elem == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : osl_attr.offset; + if(osl_attr.desc.element == ATTR_ELEMENT_NONE) { + desc->offset = ATTR_STD_NOT_FOUND; + } + return desc->offset; } - else + else { + desc->offset = ATTR_STD_NOT_FOUND; return (int)ATTR_STD_NOT_FOUND; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h index 7d26cd40da5..ad06dd6929d 100644 --- a/intern/cycles/kernel/osl/osl_shader.h +++ b/intern/cycles/kernel/osl/osl_shader.h @@ -54,20 +54,12 @@ public: /* eval */ static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag, ShaderContext ctx); - static float3 eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag, ShaderContext ctx); + static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag, ShaderContext ctx); static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag, ShaderContext ctx); static void eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderContext ctx); - /* sample & eval */ - static int bsdf_sample(const ShaderData *sd, const ShaderClosure *sc, - float randu, float randv, - float3& eval, float3& omega_in, differential3& domega_in, float& pdf); - static float3 bsdf_eval(const ShaderData *sd, const ShaderClosure *sc, - const float3& omega_in, float& pdf); - static void bsdf_blur(ShaderClosure *sc, float roughness); - /* attributes */ - static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem); + static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc); }; CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/shaders/node_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_hair_bsdf.osl index c8cb88f0c0b..ef8f2fae894 100644 --- a/intern/cycles/kernel/shaders/node_hair_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_hair_bsdf.osl @@ -24,34 +24,35 @@ shader node_hair_bsdf( float Offset = 0.0, float RoughnessU = 0.1, float RoughnessV = 1.0, - normal Normal = Ng, + normal Tangent = normal(0, 0, 0), output closure color BSDF = 0) { - float IsStrand; float roughnessh = clamp(RoughnessU, 0.001, 1.0); float roughnessv = clamp(RoughnessV, 0.001, 1.0); - getattribute("geom:is_curve", IsStrand); + float offset = -Offset; - if (!IsStrand) { - if (backfacing()) { - BSDF = transparent(); - } - else { - if (component == "reflection") - BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0); - else - BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0); - } + normal T; + float IsCurve = 0; + getattribute("geom:is_curve", IsCurve); + + if (isconnected(Tangent)) { + T = Tangent; + } + else if(!IsCurve) { + T = normalize(dPdv); + offset = 0.0; + } + else { + T = normalize(dPdu); + } + + if (backfacing() && IsCurve) { + BSDF = transparent(); } else { - if (backfacing()) { - BSDF = transparent(); - } - else { - if (component == "reflection") - BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, dPdu, -Offset); - else - BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, dPdu, -Offset); - } + if (component == "reflection") + BSDF = Color * hair_reflection(Ng, roughnessh, roughnessv, T, offset); + else + BSDF = Color * hair_transmission(Ng, roughnessh, roughnessv, T, offset); } } diff --git a/intern/cycles/kernel/shaders/node_ramp_util.h b/intern/cycles/kernel/shaders/node_ramp_util.h new file mode 100644 index 00000000000..917fb65c6df --- /dev/null +++ b/intern/cycles/kernel/shaders/node_ramp_util.h @@ -0,0 +1,89 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ + +color rgb_ramp_lookup(color ramp[], float at, int interpolate, int extrapolate) +{ + float f = at; + int table_size = arraylength(ramp); + + if ((f < 0.0 || f > 1.0) && extrapolate) { + color t0, dy; + if (f < 0.0) { + t0 = ramp[0]; + dy = t0 - ramp[1]; + f = -f; + } + else { + t0 = ramp[table_size - 1]; + dy = t0 - ramp[table_size - 2]; + f = f - 1.0; + } + return t0 + dy * f * (table_size - 1); + } + + f = clamp(at, 0.0, 1.0) * (table_size - 1); + + /* clamp int as well in case of NaN */ + int i = (int)f; + if (i < 0) i = 0; + if (i >= table_size) i = table_size - 1; + float t = f - (float)i; + + color result = ramp[i]; + + if (interpolate && t > 0.0) + result = (1.0 - t) * result + t * ramp[i + 1]; + + return result; +} + +float rgb_ramp_lookup(float ramp[], float at, int interpolate, int extrapolate) +{ + float f = at; + int table_size = arraylength(ramp); + + if ((f < 0.0 || f > 1.0) && extrapolate) { + float t0, dy; + if (f < 0.0) { + t0 = ramp[0]; + dy = t0 - ramp[1]; + f = -f; + } + else { + t0 = ramp[table_size - 1]; + dy = t0 - ramp[table_size - 2]; + f = f - 1.0; + } + return t0 + dy * f * (table_size - 1); + } + + f = clamp(at, 0.0, 1.0) * (table_size - 1); + + /* clamp int as well in case of NaN */ + int i = (int)f; + if (i < 0) i = 0; + if (i >= table_size) i = table_size - 1; + float t = f - (float)i; + + float result = ramp[i]; + + if (interpolate && t > 0.0) + result = (1.0 - t) * result + t * ramp[i + 1]; + + return result; +} diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl index 8e208e8a8f7..c8e7e4f175b 100644 --- a/intern/cycles/kernel/shaders/node_rgb_curves.osl +++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl @@ -14,43 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" -#include "oslutil.h" - -float ramp_lookup(color ramp[], float at, int component) -{ - int table_size = arraylength(ramp); - - if (at < 0.0 || at > 1.0) { - float t0, dy; - if (at < 0.0) { - t0 = ramp[0][component]; - dy = t0 - ramp[1][component]; - at = -at; - } - else { - t0 = ramp[table_size - 1][component]; - dy = t0 - ramp[table_size - 2][component]; - at = at - 1.0; - } - return t0 + dy * at * (table_size - 1); - } - - float f = clamp(at, 0.0, 1.0) * (table_size - 1); - - /* clamp int as well in case of NaN */ - int i = (int)f; - if (i < 0) i = 0; - if (i >= table_size) i = table_size - 1; - float t = f - (float)i; - - float result = ramp[i][component]; - - if (t > 0.0) - result = (1.0 - t) * result + t * ramp[i + 1][component]; - - return result; -} +#include "node_ramp_util.h" shader node_rgb_curves( color ramp[] = {0.0}, @@ -63,9 +27,13 @@ shader node_rgb_curves( { color c = (ColorIn - color(min_x, min_x, min_x)) / (max_x - min_x); - ColorOut[0] = ramp_lookup(ramp, c[0], 0); - ColorOut[1] = ramp_lookup(ramp, c[1], 1); - ColorOut[2] = ramp_lookup(ramp, c[2], 2); + color r = rgb_ramp_lookup(ramp, c[0], 1, 1); + color g = rgb_ramp_lookup(ramp, c[0], 1, 1); + color b = rgb_ramp_lookup(ramp, c[0], 1, 1); + + ColorOut[0] = r[0]; + ColorOut[1] = g[1]; + ColorOut[2] = b[2]; ColorOut = mix(ColorIn, ColorOut, Fac); } diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl index c0ae74d6b33..24b8728b999 100644 --- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl +++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl @@ -14,8 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" -#include "oslutil.h" +#include "node_ramp_util.h" shader node_rgb_ramp( color ramp_color[] = {0.0}, @@ -26,21 +25,7 @@ shader node_rgb_ramp( output color Color = 0.0, output float Alpha = 1.0) { - int table_size = arraylength(ramp_color); - float f = clamp(Fac, 0.0, 1.0) * (table_size - 1); - - /* clamp int as well in case of NaN */ - int i = (int)f; - if (i < 0) i = 0; - if (i >= table_size) i = table_size - 1; - float t = f - (float)i; - - Color = ramp_color[i]; - Alpha = ramp_alpha[i]; - - if (interpolate && t > 0.0) { - Color = (1.0 - t) * Color + t * ramp_color[i + 1]; - Alpha = (1.0 - t) * Alpha + t * ramp_alpha[i + 1]; - } + Color = rgb_ramp_lookup(ramp_color, Fac, interpolate, 0); + Alpha = rgb_ramp_lookup(ramp_alpha, Fac, interpolate, 0); } diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl index cff4efe1d98..d92fa11d439 100644 --- a/intern/cycles/kernel/shaders/node_vector_curves.osl +++ b/intern/cycles/kernel/shaders/node_vector_curves.osl @@ -14,43 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" -#include "oslutil.h" - -float ramp_lookup(color ramp[], float at, int component) -{ - int table_size = arraylength(ramp); - - if (at < 0.0 || at > 1.0) { - float t0, dy; - if (at < 0.0) { - t0 = ramp[0][component]; - dy = t0 - ramp[1][component]; - at = -at; - } - else { - t0 = ramp[table_size - 1][component]; - dy = t0 - ramp[table_size - 2][component]; - at = at - 1.0; - } - return t0 + dy * at * (table_size - 1); - } - - float f = clamp(at, 0.0, 1.0) * (table_size - 1); - - /* clamp int as well in case of NaN */ - int i = (int)f; - if (i < 0) i = 0; - if (i >= table_size) i = table_size - 1; - float t = f - (float)i; - - float result = ramp[i][component]; - - if (t > 0.0) - result = (1.0 - t) * result + t * ramp[i + 1][component]; - - return result; -} +#include "node_ramp_util.h" shader node_vector_curves( color ramp[] = {0.0}, @@ -63,9 +27,13 @@ shader node_vector_curves( { vector c = (VectorIn - vector(min_x, min_x, min_x)) / (max_x - min_x); - VectorOut[0] = ramp_lookup(ramp, c[0], 0); - VectorOut[1] = ramp_lookup(ramp, c[1], 1); - VectorOut[2] = ramp_lookup(ramp, c[2], 2); + color r = rgb_ramp_lookup(ramp, c[0], 1, 1); + color g = rgb_ramp_lookup(ramp, c[0], 1, 1); + color b = rgb_ramp_lookup(ramp, c[0], 1, 1); + + VectorOut[0] = r[0]; + VectorOut[1] = g[1]; + VectorOut[2] = b[2]; VectorOut = mix(VectorIn, VectorOut, Fac); } diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index de7e03e5a19..502994e71f1 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -405,10 +405,8 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a #if NODES_GROUP(NODE_GROUP_LEVEL_3) case NODE_RGB_CURVES: - svm_node_rgb_curves(kg, sd, stack, node, &offset); - break; case NODE_VECTOR_CURVES: - svm_node_vector_curves(kg, sd, stack, node, &offset); + svm_node_curves(kg, sd, stack, node, &offset); break; case NODE_TANGENT: svm_node_tangent(kg, sd, stack, node); diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index 63bbb27d873..de978a423b4 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -18,144 +18,136 @@ CCL_NAMESPACE_BEGIN /* Attribute Node */ -ccl_device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, +ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeType *type, - NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset) + uint *out_offset) { *out_offset = node.z; *type = (NodeAttributeType)node.w; + + AttributeDescriptor desc; + if(ccl_fetch(sd, object) != OBJECT_NONE) { - /* find attribute by unique id */ - uint id = node.y; - uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride; -#ifdef __HAIR__ - attr_offset = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; -#endif - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - - while(attr_map.x != id) { - if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) { - *elem = ATTR_ELEMENT_NONE; - *offset = 0; - *mesh_type = (NodeAttributeType)node.w; - return; - } - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + desc = find_attribute(kg, sd, node.y); + if(desc.offset == ATTR_STD_NOT_FOUND) { + desc.element = ATTR_ELEMENT_NONE; + desc.offset = 0; + desc.type = (NodeAttributeType)node.w; } - - /* return result */ - *elem = (AttributeElement)attr_map.y; - *offset = as_int(attr_map.z); - *mesh_type = (NodeAttributeType)attr_map.w; } else { /* background */ - *elem = ATTR_ELEMENT_NONE; - *offset = 0; - *mesh_type = (NodeAttributeType)node.w; + desc.element = ATTR_ELEMENT_NONE; + desc.offset = 0; + desc.type = (NodeAttributeType)node.w; } + + return desc; } ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type, mesh_type; - AttributeElement elem; + NodeAttributeType type; uint out_offset; - int offset; - - svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); + AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ if(type == NODE_ATTR_FLOAT) { - if(mesh_type == NODE_ATTR_FLOAT) { - float f = primitive_attribute_float(kg, sd, elem, offset, NULL, NULL); + if(desc.type == NODE_ATTR_FLOAT) { + float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); stack_store_float(stack, out_offset, f); } else { - float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); stack_store_float(stack, out_offset, average(f)); } } else { - if(mesh_type == NODE_ATTR_FLOAT3) { - float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); + if(desc.type == NODE_ATTR_FLOAT3) { + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); stack_store_float3(stack, out_offset, f); } else { - float f = primitive_attribute_float(kg, sd, elem, offset, NULL, NULL); + float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); stack_store_float3(stack, out_offset, make_float3(f, f, f)); } } } -ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_noinline +#endif +void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type, mesh_type; - AttributeElement elem; + NodeAttributeType type; uint out_offset; - int offset; - - svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); + AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ if(type == NODE_ATTR_FLOAT) { - if(mesh_type == NODE_ATTR_FLOAT) { + if(desc.type == NODE_ATTR_FLOAT) { float dx; - float f = primitive_attribute_float(kg, sd, elem, offset, &dx, NULL); + float f = primitive_attribute_float(kg, sd, desc, &dx, NULL); stack_store_float(stack, out_offset, f+dx); } else { float3 dx; - float3 f = primitive_attribute_float3(kg, sd, elem, offset, &dx, NULL); + float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL); stack_store_float(stack, out_offset, average(f+dx)); } } else { - if(mesh_type == NODE_ATTR_FLOAT3) { + if(desc.type == NODE_ATTR_FLOAT3) { float3 dx; - float3 f = primitive_attribute_float3(kg, sd, elem, offset, &dx, NULL); + float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL); stack_store_float3(stack, out_offset, f+dx); } else { float dx; - float f = primitive_attribute_float(kg, sd, elem, offset, &dx, NULL); + float f = primitive_attribute_float(kg, sd, desc, &dx, NULL); stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx)); } } } -ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_noinline +#endif +void svm_node_attr_bump_dy(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { - NodeAttributeType type, mesh_type; - AttributeElement elem; + NodeAttributeType type; uint out_offset; - int offset; - - svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); + AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ if(type == NODE_ATTR_FLOAT) { - if(mesh_type == NODE_ATTR_FLOAT) { + if(desc.type == NODE_ATTR_FLOAT) { float dy; - float f = primitive_attribute_float(kg, sd, elem, offset, NULL, &dy); + float f = primitive_attribute_float(kg, sd, desc, NULL, &dy); stack_store_float(stack, out_offset, f+dy); } else { float3 dy; - float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, &dy); + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy); stack_store_float(stack, out_offset, average(f+dy)); } } else { - if(mesh_type == NODE_ATTR_FLOAT3) { + if(desc.type == NODE_ATTR_FLOAT3) { float3 dy; - float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, &dy); + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy); stack_store_float3(stack, out_offset, f+dy); } else { float dy; - float f = primitive_attribute_float(kg, sd, elem, offset, NULL, &dy); + float f = primitive_attribute_float(kg, sd, desc, NULL, &dy); stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy)); } } diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index fae89aade60..017d697f9f8 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -18,104 +18,44 @@ CCL_NAMESPACE_BEGIN /* Closure Nodes */ -ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type, float eta, float roughness, bool refract) +ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int type, float eta, float roughness, bool refract) { if(type == CLOSURE_BSDF_SHARP_GLASS_ID) { if(refract) { - sc->data0 = eta; - sc->data1 = 0.0f; - sc->data2 = 0.0f; - ccl_fetch(sd, flag) |= bsdf_refraction_setup(sc); + bsdf->alpha_y = 0.0f; + bsdf->alpha_x = 0.0f; + bsdf->ior = eta; + ccl_fetch(sd, flag) |= bsdf_refraction_setup(bsdf); } else { - sc->data0 = 0.0f; - sc->data1 = 0.0f; - sc->data2 = 0.0f; - ccl_fetch(sd, flag) |= bsdf_reflection_setup(sc); + bsdf->alpha_y = 0.0f; + bsdf->alpha_x = 0.0f; + bsdf->ior = 0.0f; + ccl_fetch(sd, flag) |= bsdf_reflection_setup(bsdf); } } else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) { - sc->data0 = roughness; - sc->data1 = roughness; - sc->data2 = eta; + bsdf->alpha_x = roughness; + bsdf->alpha_y = roughness; + bsdf->ior = eta; if(refract) - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(bsdf); else - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(bsdf); } else { - sc->data0 = roughness; - sc->data1 = roughness; - sc->data2 = eta; + bsdf->alpha_x = roughness; + bsdf->alpha_y = roughness; + bsdf->ior = eta; if(refract) - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(bsdf); else - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(bsdf); } } -ccl_device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, ClosureType type, float mix_weight) -{ - ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); - - if(ccl_fetch(sd, num_closure) < MAX_CLOSURE) { - sc->weight *= mix_weight; - sc->type = type; - sc->data0 = 0.0f; - sc->data1 = 0.0f; - sc->data2 = 0.0f; -#ifdef __OSL__ - sc->prim = NULL; -#endif - ccl_fetch(sd, num_closure)++; - return sc; - } - - return NULL; -} - -ccl_device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float mix_weight) -{ - ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); - - float3 weight = sc->weight * mix_weight; - float sample_weight = fabsf(average(weight)); - - if(sample_weight > CLOSURE_WEIGHT_CUTOFF && ccl_fetch(sd, num_closure) < MAX_CLOSURE) { - sc->weight = weight; - sc->sample_weight = sample_weight; - ccl_fetch(sd, num_closure)++; -#ifdef __OSL__ - sc->prim = NULL; -#endif - return sc; - } - - return NULL; -} - -ccl_device_inline ShaderClosure *svm_node_closure_get_absorption(ShaderData *sd, float mix_weight) -{ - ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); - - float3 weight = (make_float3(1.0f, 1.0f, 1.0f) - sc->weight) * mix_weight; - float sample_weight = fabsf(average(weight)); - - if(sample_weight > CLOSURE_WEIGHT_CUTOFF && ccl_fetch(sd, num_closure) < MAX_CLOSURE) { - sc->weight = weight; - sc->sample_weight = sample_weight; - ccl_fetch(sd, num_closure)++; -#ifdef __OSL__ - sc->prim = NULL; -#endif - return sc; - } - - return NULL; -} - ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int *offset) { uint type, param1_offset, param2_offset; @@ -137,49 +77,40 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * switch(type) { case CLOSURE_BSDF_DIFFUSE_ID: { - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + OrenNayarBsdf *bsdf = (OrenNayarBsdf*)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight); - if(sc) { - sc->N = N; + if(bsdf) { + bsdf->N = N; float roughness = param1; if(roughness == 0.0f) { - sc->data0 = 0.0f; - sc->data1 = 0.0f; - sc->data2 = 0.0f; - ccl_fetch(sd, flag) |= bsdf_diffuse_setup(sc); + ccl_fetch(sd, flag) |= bsdf_diffuse_setup((DiffuseBsdf*)bsdf); } else { - sc->data0 = roughness; - sc->data1 = 0.0f; - sc->data2 = 0.0f; - ccl_fetch(sd, flag) |= bsdf_oren_nayar_setup(sc); + bsdf->roughness = roughness; + ccl_fetch(sd, flag) |= bsdf_oren_nayar_setup(bsdf); } } break; } case CLOSURE_BSDF_TRANSLUCENT_ID: { - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight); - if(sc) { - sc->data0 = 0.0f; - sc->data1 = 0.0f; - sc->data2 = 0.0f; - sc->N = N; - ccl_fetch(sd, flag) |= bsdf_translucent_setup(sc); + if(bsdf) { + bsdf->N = N; + ccl_fetch(sd, flag) |= bsdf_translucent_setup(bsdf); } break; } case CLOSURE_BSDF_TRANSPARENT_ID: { - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + ShaderClosure *bsdf = bsdf_alloc(sd, sizeof(ShaderClosure), weight); - if(sc) { - sc->data0 = 0.0f; - sc->data1 = 0.0f; - sc->data2 = 0.0f; - sc->N = N; - ccl_fetch(sd, flag) |= bsdf_transparent_setup(sc); + if(bsdf) { + ccl_fetch(sd, flag) |= bsdf_transparent_setup(bsdf); } break; } @@ -192,31 +123,33 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - if(sc) { - sc->N = N; - sc->data0 = param1; - sc->data1 = param1; - sc->data2 = 0.0f; + if(bsdf) { + bsdf->N = N; + bsdf->alpha_x = param1; + bsdf->alpha_y = param1; + bsdf->ior = 0.0f; + bsdf->extra = NULL; /* setup bsdf */ if(type == CLOSURE_BSDF_REFLECTION_ID) - ccl_fetch(sd, flag) |= bsdf_reflection_setup(sc); + ccl_fetch(sd, flag) |= bsdf_reflection_setup(bsdf); else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID) - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(bsdf); else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID) - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(bsdf); else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) { kernel_assert(stack_valid(data_node.z)); - float3 color = stack_load_float3(stack, data_node.z); - sc->custom1 = color.x; - sc->custom2 = color.y; - sc->custom3 = color.z; - ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_setup(sc); + bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(bsdf->extra) { + bsdf->extra->color = stack_load_float3(stack, data_node.z); + ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_setup(bsdf); + } } else - ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_setup(sc); + ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_setup(bsdf); } break; @@ -228,31 +161,33 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - if(sc) { - sc->N = N; + if(bsdf) { + bsdf->N = N; + bsdf->extra = NULL; float eta = fmaxf(param2, 1e-5f); eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; /* setup bsdf */ if(type == CLOSURE_BSDF_REFRACTION_ID) { - sc->data0 = eta; - sc->data1 = 0.0f; - sc->data2 = 0.0f; + bsdf->alpha_x = 0.0f; + bsdf->alpha_y = 0.0f; + bsdf->ior = eta; - ccl_fetch(sd, flag) |= bsdf_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_refraction_setup(bsdf); } else { - sc->data0 = param1; - sc->data1 = param1; - sc->data2 = eta; + bsdf->alpha_x = param1; + bsdf->alpha_y = param1; + bsdf->ior = eta; if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(bsdf); else - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(bsdf); } } @@ -268,7 +203,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * break; } #endif - int num_closure = ccl_fetch(sd, num_closure); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; /* index of refraction */ float eta = fmaxf(param2, 1e-5f); @@ -280,37 +215,30 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float roughness = param1; /* reflection */ - ShaderClosure *sc = ccl_fetch_array(sd, closure, num_closure); - float3 weight = sc->weight; - float sample_weight = sc->sample_weight; - - sc = svm_node_closure_get_bsdf(sd, mix_weight*fresnel); #ifdef __CAUSTICS_TRICKS__ if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) #endif { - if(sc) { - sc->N = N; - svm_node_glass_setup(sd, sc, type, eta, roughness, false); + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight*fresnel); + + if(bsdf) { + bsdf->N = N; + bsdf->extra = NULL; + svm_node_glass_setup(sd, bsdf, type, eta, roughness, false); } } + /* refraction */ #ifdef __CAUSTICS_TRICKS__ - if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) - break; + if(kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) #endif + { + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight*(1.0f - fresnel)); - /* refraction */ - if(num_closure + 1 < MAX_CLOSURE) { - sc = ccl_fetch_array(sd, closure, num_closure + 1); - sc->weight = weight; - sc->sample_weight = sample_weight; - - sc = svm_node_closure_get_bsdf(sd, mix_weight*(1.0f - fresnel)); - - if(sc) { - sc->N = N; - svm_node_glass_setup(sd, sc, type, eta, roughness, true); + if(bsdf) { + bsdf->N = N; + bsdf->extra = NULL; + svm_node_glass_setup(sd, bsdf, type, eta, roughness, true); } } @@ -321,24 +249,25 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(!kernel_data.integrator.caustics_reflective && !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(sc) { - sc->N = N; + if(bsdf && extra) { + bsdf->N = N; + bsdf->extra = extra; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - sc->data0 = param1; - sc->data1 = param1; + bsdf->alpha_x = param1; + bsdf->alpha_y = param1; float eta = fmaxf(param2, 1e-5f); - sc->data2 = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; + bsdf->ior = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; kernel_assert(stack_valid(data_node.z)); - float3 color = stack_load_float3(stack, data_node.z); - sc->custom1 = color.x; - sc->custom2 = color.y; - sc->custom3 = color.z; + bsdf->extra->color = stack_load_float3(stack, data_node.z); /* setup bsdf */ - ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_glass_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); } break; @@ -351,62 +280,63 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); - - if(sc) { - sc->N = N; + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - sc->T = stack_load_float3(stack, data_node.y); + if(bsdf) { + bsdf->N = N; + bsdf->extra = NULL; + bsdf->T = stack_load_float3(stack, data_node.y); /* rotate tangent */ float rotation = stack_load_float(stack, data_node.z); if(rotation != 0.0f) - sc->T = rotate_around_axis(sc->T, sc->N, rotation * M_2PI_F); + bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F); /* compute roughness */ float roughness = param1; float anisotropy = clamp(param2, -0.99f, 0.99f); if(anisotropy < 0.0f) { - sc->data0 = roughness/(1.0f + anisotropy); - sc->data1 = roughness*(1.0f + anisotropy); + bsdf->alpha_x = roughness/(1.0f + anisotropy); + bsdf->alpha_y = roughness*(1.0f + anisotropy); } else { - sc->data0 = roughness*(1.0f - anisotropy); - sc->data1 = roughness/(1.0f - anisotropy); + bsdf->alpha_x = roughness*(1.0f - anisotropy); + bsdf->alpha_y = roughness/(1.0f - anisotropy); } - sc->data2 = 0.0f; + bsdf->ior = 0.0f; - if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_aniso_setup(sc); - else if(type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_aniso_setup(sc); + if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) { + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_aniso_setup(bsdf); + } + else if(type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) { + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_aniso_setup(bsdf); + } else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID) { kernel_assert(stack_valid(data_node.w)); - float3 color = stack_load_float3(stack, data_node.w); - sc->custom1 = color.x; - sc->custom2 = color.y; - sc->custom3 = color.z; - ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_aniso_setup(sc); + bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(bsdf->extra) { + bsdf->extra->color = stack_load_float3(stack, data_node.w); + ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf); + } } else - ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_aniso_setup(sc); + ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_aniso_setup(bsdf); } break; } case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: { - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + VelvetBsdf *bsdf = (VelvetBsdf*)bsdf_alloc(sd, sizeof(VelvetBsdf), weight); - if(sc) { - sc->N = N; + if(bsdf) { + bsdf->N = N; - /* sigma */ - sc->data0 = saturate(param1); - sc->data1 = 0.0f; - sc->data2 = 0.0f; - ccl_fetch(sd, flag) |= bsdf_ashikhmin_velvet_setup(sc); + bsdf->sigma = saturate(param1); + ccl_fetch(sd, flag) |= bsdf_ashikhmin_velvet_setup(bsdf); } break; } @@ -416,68 +346,62 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * break; #endif case CLOSURE_BSDF_DIFFUSE_TOON_ID: { - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + ToonBsdf *bsdf = (ToonBsdf*)bsdf_alloc(sd, sizeof(ToonBsdf), weight); - if(sc) { - /* Normal, Size and Smooth */ - sc->N = N; - sc->data0 = param1; - sc->data1 = param2; - sc->data2 = 0.0f; + if(bsdf) { + bsdf->N = N; + bsdf->size = param1; + bsdf->smooth = param2; if(type == CLOSURE_BSDF_DIFFUSE_TOON_ID) - ccl_fetch(sd, flag) |= bsdf_diffuse_toon_setup(sc); + ccl_fetch(sd, flag) |= bsdf_diffuse_toon_setup(bsdf); else - ccl_fetch(sd, flag) |= bsdf_glossy_toon_setup(sc); + ccl_fetch(sd, flag) |= bsdf_glossy_toon_setup(bsdf); } break; } #ifdef __HAIR__ case CLOSURE_BSDF_HAIR_REFLECTION_ID: case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: { + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; if(ccl_fetch(sd, flag) & SD_BACKFACING && ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + ShaderClosure *bsdf = bsdf_alloc(sd, sizeof(ShaderClosure), weight); - if(sc) { + if(bsdf) { /* todo: giving a fixed weight here will cause issues when * mixing multiple BSDFS. energy will not be conserved and * the throughput can blow up after multiple bounces. we * better figure out a way to skip backfaces from rays * spawned by transmission from the front */ - sc->weight = make_float3(1.0f, 1.0f, 1.0f); - sc->N = N; - sc->data0 = 0.0f; - sc->data1 = 0.0f; - sc->data2 = 0.0f; - ccl_fetch(sd, flag) |= bsdf_transparent_setup(sc); + bsdf->weight = make_float3(1.0f, 1.0f, 1.0f); + ccl_fetch(sd, flag) |= bsdf_transparent_setup(bsdf); } } else { - ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); - sc = svm_node_closure_get_bsdf(sd, mix_weight); + HairBsdf *bsdf = (HairBsdf*)bsdf_alloc(sd, sizeof(HairBsdf), weight); - if(sc) { - sc->N = N; - sc->data0 = param1; - sc->data1 = param2; - sc->data2 = -stack_load_float(stack, data_node.z); + if(bsdf) { + bsdf->roughness1 = param1; + bsdf->roughness2 = param2; + bsdf->offset = -stack_load_float(stack, data_node.z); if(stack_valid(data_node.y)) { - sc->T = normalize(stack_load_float3(stack, data_node.y)); + bsdf->T = normalize(stack_load_float3(stack, data_node.y)); } else if(!(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)) { - sc->T = normalize(ccl_fetch(sd, dPdv)); - sc->data2 = 0.0f; + bsdf->T = normalize(ccl_fetch(sd, dPdv)); + bsdf->offset = 0.0f; } else - sc->T = normalize(ccl_fetch(sd, dPdu)); + bsdf->T = normalize(ccl_fetch(sd, dPdu)); if(type == CLOSURE_BSDF_HAIR_REFLECTION_ID) { - ccl_fetch(sd, flag) |= bsdf_hair_reflection_setup(sc); + ccl_fetch(sd, flag) |= bsdf_hair_reflection_setup(bsdf); } else { - ccl_fetch(sd, flag) |= bsdf_hair_transmission_setup(sc); + ccl_fetch(sd, flag) |= bsdf_hair_transmission_setup(bsdf); } } } @@ -487,17 +411,11 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #endif #ifdef __SUBSURFACE__ -# ifndef __SPLIT_KERNEL__ -# define sc_next(sc) sc++ -# else -# define sc_next(sc) sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)) -# endif case CLOSURE_BSSRDF_CUBIC_ID: case CLOSURE_BSSRDF_GAUSSIAN_ID: case CLOSURE_BSSRDF_BURLEY_ID: { - ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); - float3 albedo = sc->weight; - float3 weight = sc->weight * mix_weight; + float3 albedo = ccl_fetch(sd, svm_closure_weight); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; float sample_weight = fabsf(average(weight)); /* disable in case of diffuse ancestor, can't see it well then and @@ -506,7 +424,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) param1 = 0.0f; - if(sample_weight > CLOSURE_WEIGHT_CUTOFF && ccl_fetch(sd, num_closure)+2 < MAX_CLOSURE) { + if(sample_weight > CLOSURE_WEIGHT_CUTOFF) { /* radius * scale */ float3 radius = stack_load_float3(stack, data_node.z)*param1; /* sharpness */ @@ -515,61 +433,42 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float texture_blur = param2; /* create one closure per color channel */ - if(fabsf(weight.x) > 0.0f) { - sc->weight = make_float3(weight.x, 0.0f, 0.0f); - sc->sample_weight = sample_weight; - sc->data0 = radius.x; - sc->data1 = texture_blur; - sc->data2 = albedo.x; - sc->T.x = sharpness; -# ifdef __OSL__ - sc->prim = NULL; -# endif - sc->N = N; - ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type); - - ccl_fetch(sd, num_closure)++; - sc_next(sc); + Bssrdf *bssrdf = bssrdf_alloc(sd, make_float3(weight.x, 0.0f, 0.0f)); + if(bssrdf) { + bssrdf->sample_weight = sample_weight; + bssrdf->radius = radius.x; + bssrdf->texture_blur = texture_blur; + bssrdf->albedo = albedo.x; + bssrdf->sharpness = sharpness; + bssrdf->N = N; + ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); } - if(fabsf(weight.y) > 0.0f) { - sc->weight = make_float3(0.0f, weight.y, 0.0f); - sc->sample_weight = sample_weight; - sc->data0 = radius.y; - sc->data1 = texture_blur; - sc->data2 = albedo.y; - sc->T.x = sharpness; -# ifdef __OSL__ - sc->prim = NULL; -# endif - sc->N = N; - ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type); - - ccl_fetch(sd, num_closure)++; - sc_next(sc); + bssrdf = bssrdf_alloc(sd, make_float3(0.0f, weight.y, 0.0f)); + if(bssrdf) { + bssrdf->sample_weight = sample_weight; + bssrdf->radius = radius.y; + bssrdf->texture_blur = texture_blur; + bssrdf->albedo = albedo.y; + bssrdf->sharpness = sharpness; + bssrdf->N = N; + ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); } - if(fabsf(weight.z) > 0.0f) { - sc->weight = make_float3(0.0f, 0.0f, weight.z); - sc->sample_weight = sample_weight; - sc->data0 = radius.z; - sc->data1 = texture_blur; - sc->data2 = albedo.z; - sc->T.x = sharpness; -# ifdef __OSL__ - sc->prim = NULL; -# endif - sc->N = N; - ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type); - - ccl_fetch(sd, num_closure)++; - sc_next(sc); + bssrdf = bssrdf_alloc(sd, make_float3(0.0f, 0.0f, weight.z)); + if(bssrdf) { + bssrdf->sample_weight = sample_weight; + bssrdf->radius = radius.z; + bssrdf->texture_blur = texture_blur; + bssrdf->albedo = albedo.z; + bssrdf->sharpness = sharpness; + bssrdf->N = N; + ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); } } break; } -# undef sc_next #endif default: break; @@ -594,7 +493,8 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float switch(type) { case CLOSURE_VOLUME_ABSORPTION_ID: { - ShaderClosure *sc = svm_node_closure_get_absorption(sd, mix_weight * density); + float3 weight = (make_float3(1.0f, 1.0f, 1.0f) - ccl_fetch(sd, svm_closure_weight)) * mix_weight * density; + ShaderClosure *sc = closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_NONE_ID, weight); if(sc) { ccl_fetch(sd, flag) |= volume_absorption_setup(sc); @@ -602,13 +502,12 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float break; } case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: { - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight * density); + float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight * density; + HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc(sd, sizeof(HenyeyGreensteinVolume), weight); - if(sc) { - sc->data0 = param2; /* g */ - sc->data1 = 0.0f; - sc->data2 = 0.0f; - ccl_fetch(sd, flag) |= volume_henyey_greenstein_setup(sc); + if(volume) { + volume->g = param2; /* g */ + ccl_fetch(sd, flag) |= volume_henyey_greenstein_setup(volume); } break; } @@ -628,10 +527,10 @@ ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 no if(mix_weight == 0.0f) return; - svm_node_closure_get_non_bsdf(sd, CLOSURE_EMISSION_ID, mix_weight); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_EMISSION_ID, ccl_fetch(sd, svm_closure_weight) * mix_weight); } else - svm_node_closure_get_non_bsdf(sd, CLOSURE_EMISSION_ID, 1.0f); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_EMISSION_ID, ccl_fetch(sd, svm_closure_weight)); ccl_fetch(sd, flag) |= SD_EMISSION; } @@ -646,10 +545,10 @@ ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 if(mix_weight == 0.0f) return; - svm_node_closure_get_non_bsdf(sd, CLOSURE_BACKGROUND_ID, mix_weight); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BACKGROUND_ID, ccl_fetch(sd, svm_closure_weight) * mix_weight); } else - svm_node_closure_get_non_bsdf(sd, CLOSURE_BACKGROUND_ID, 1.0f); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BACKGROUND_ID, ccl_fetch(sd, svm_closure_weight)); } ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node) @@ -662,10 +561,10 @@ ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 nod if(mix_weight == 0.0f) return; - svm_node_closure_get_non_bsdf(sd, CLOSURE_HOLDOUT_ID, mix_weight); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, ccl_fetch(sd, svm_closure_weight) * mix_weight); } else - svm_node_closure_get_non_bsdf(sd, CLOSURE_HOLDOUT_ID, 1.0f); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, ccl_fetch(sd, svm_closure_weight)); ccl_fetch(sd, flag) |= SD_HOLDOUT; } @@ -680,10 +579,10 @@ ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, if(mix_weight == 0.0f) return; - svm_node_closure_get_non_bsdf(sd, CLOSURE_AMBIENT_OCCLUSION_ID, mix_weight); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_AMBIENT_OCCLUSION_ID, ccl_fetch(sd, svm_closure_weight) * mix_weight); } else - svm_node_closure_get_non_bsdf(sd, CLOSURE_AMBIENT_OCCLUSION_ID, 1.0f); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_AMBIENT_OCCLUSION_ID, ccl_fetch(sd, svm_closure_weight)); ccl_fetch(sd, flag) |= SD_AO; } @@ -692,10 +591,7 @@ ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight) { - if(ccl_fetch(sd, num_closure) < MAX_CLOSURE) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); - sc->weight = weight; - } + ccl_fetch(sd, svm_closure_weight) = weight; } ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b) diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h index bb06254c3a9..7d512f7ff4d 100644 --- a/intern/cycles/kernel/svm/svm_geometry.h +++ b/intern/cycles/kernel/svm/svm_geometry.h @@ -18,7 +18,11 @@ CCL_NAMESPACE_BEGIN /* Geometry Node */ -ccl_device void svm_node_geometry(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device_inline void svm_node_geometry(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint type, + uint out_offset) { float3 data; @@ -94,7 +98,11 @@ ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *s /* Particle Info */ -ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device void svm_node_particle_info(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint type, + uint out_offset) { switch(type) { case NODE_INFO_PAR_INDEX: { @@ -146,7 +154,11 @@ ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float /* Hair Info */ -ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device void svm_node_hair_info(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint type, + uint out_offset) { float data; float3 data3; diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 44732734c31..b6b90dfff81 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -271,9 +271,6 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, case 87: r = kernel_tex_image_interp(__tex_image_byte4_087, x, y); break; case 88: r = kernel_tex_image_interp(__tex_image_byte4_088, x, y); break; case 89: r = kernel_tex_image_interp(__tex_image_byte4_089, x, y); break; - case 90: r = kernel_tex_image_interp(__tex_image_byte4_090, x, y); break; - case 91: r = kernel_tex_image_interp(__tex_image_byte4_091, x, y); break; - case 92: r = kernel_tex_image_interp(__tex_image_byte4_092, x, y); break; default: kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h index 3f7d18a02fe..6d13a0d8e02 100644 --- a/intern/cycles/kernel/svm/svm_math_util.h +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -32,21 +32,17 @@ ccl_device void svm_vector_math(float *Fac, float3 *Vector, NodeVectorMath type, *Fac = average_fac(*Vector); } else if(type == NODE_VECTOR_MATH_AVERAGE) { - *Fac = len(Vector1 + Vector2); - *Vector = normalize(Vector1 + Vector2); + *Vector = safe_normalize_len(Vector1 + Vector2, Fac); } else if(type == NODE_VECTOR_MATH_DOT_PRODUCT) { *Fac = dot(Vector1, Vector2); *Vector = make_float3(0.0f, 0.0f, 0.0f); } else if(type == NODE_VECTOR_MATH_CROSS_PRODUCT) { - float3 c = cross(Vector1, Vector2); - *Fac = len(c); - *Vector = normalize(c); + *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac); } else if(type == NODE_VECTOR_MATH_NORMALIZE) { - *Fac = len(Vector1); - *Vector = normalize(Vector1); + *Vector = safe_normalize_len(Vector1, Fac); } else { *Fac = 0.0f; diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h index 24275d05c4a..368740f64c7 100644 --- a/intern/cycles/kernel/svm/svm_ramp.h +++ b/intern/cycles/kernel/svm/svm_ramp.h @@ -19,12 +19,14 @@ CCL_NAMESPACE_BEGIN -ccl_device float4 rgb_ramp_lookup(KernelGlobals *kg, - int offset, - float f, - bool interpolate, - bool extrapolate, - int table_size) +/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ + +ccl_device_inline float4 rgb_ramp_lookup(KernelGlobals *kg, + int offset, + float f, + bool interpolate, + bool extrapolate, + int table_size) { if((f < 0.0f || f > 1.0f) && extrapolate) { float4 t0, dy; @@ -75,36 +77,7 @@ ccl_device void svm_node_rgb_ramp(KernelGlobals *kg, ShaderData *sd, float *stac *offset += table_size; } -ccl_device void svm_node_rgb_curves(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) -{ - uint fac_offset, color_offset, out_offset; - decode_node_uchar4(node.y, - &fac_offset, - &color_offset, - &out_offset, - NULL); - - uint table_size = read_node(kg, offset).x; - - float fac = stack_load_float(stack, fac_offset); - float3 color = stack_load_float3(stack, color_offset); - - const float min_x = __int_as_float(node.z), - max_x = __int_as_float(node.w); - const float range_x = max_x - min_x; - const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x; - - float r = rgb_ramp_lookup(kg, *offset, relpos.x, true, true, table_size).x; - float g = rgb_ramp_lookup(kg, *offset, relpos.y, true, true, table_size).y; - float b = rgb_ramp_lookup(kg, *offset, relpos.z, true, true, table_size).z; - - color = (1.0f - fac)*color + fac*make_float3(r, g, b); - stack_store_float3(stack, out_offset, color); - - *offset += table_size; -} - -ccl_device void svm_node_vector_curves(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_curves(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { uint fac_offset, color_offset, out_offset; decode_node_uchar4(node.y, diff --git a/intern/cycles/kernel/svm/svm_ramp_util.h b/intern/cycles/kernel/svm/svm_ramp_util.h new file mode 100644 index 00000000000..9f2ce1276f9 --- /dev/null +++ b/intern/cycles/kernel/svm/svm_ramp_util.h @@ -0,0 +1,97 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SVM_RAMP_UTIL_H__ +#define __SVM_RAMP_UTIL_H__ + +CCL_NAMESPACE_BEGIN + +/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ + +ccl_device_inline float3 rgb_ramp_lookup(const float3 *ramp, + float f, + bool interpolate, + bool extrapolate, + int table_size) +{ + if ((f < 0.0f || f > 1.0f) && extrapolate) { + float3 t0, dy; + if (f < 0.0f) { + t0 = ramp[0]; + dy = t0 - ramp[1], + f = -f; + } + else { + t0 = ramp[table_size - 1]; + dy = t0 - ramp[table_size - 2]; + f = f - 1.0f; + } + return t0 + dy * f * (table_size - 1); + } + + f = clamp(f, 0.0f, 1.0f) * (table_size - 1); + + /* clamp int as well in case of NaN */ + int i = clamp(float_to_int(f), 0, table_size-1); + float t = f - (float)i; + + float3 result = ramp[i]; + + if (interpolate && t > 0.0f) + result = (1.0f - t) * result + t * ramp[i + 1]; + + return result; +} + +ccl_device float float_ramp_lookup(const float *ramp, + float f, + bool interpolate, + bool extrapolate, + int table_size) +{ + if ((f < 0.0f || f > 1.0f) && extrapolate) { + float t0, dy; + if (f < 0.0f) { + t0 = ramp[0]; + dy = t0 - ramp[1], + f = -f; + } + else { + t0 = ramp[table_size - 1]; + dy = t0 - ramp[table_size - 2]; + f = f - 1.0f; + } + return t0 + dy * f * (table_size - 1); + } + + f = clamp(f, 0.0f, 1.0f) * (table_size - 1); + + /* clamp int as well in case of NaN */ + int i = clamp(float_to_int(f), 0, table_size-1); + float t = f - (float)i; + + float result = ramp[i]; + + if (interpolate && t > 0.0f) + result = (1.0f - t) * result + t * ramp[i + 1]; + + return result; +} + +CCL_NAMESPACE_END + +#endif /* __SVM_RAMP_UTIL_H__ */ + diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index 276b6f26f5e..01dede3fff5 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -99,12 +99,12 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, stack_store_float3(stack, out_offset, data); } -ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg, - ShaderData *sd, - int path_flag, - float *stack, - uint4 node, - int *offset) +ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, + ShaderData *sd, + int path_flag, + float *stack, + uint4 node, + int *offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; @@ -184,12 +184,12 @@ ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg, #endif } -ccl_device_inline void svm_node_tex_coord_bump_dy(KernelGlobals *kg, - ShaderData *sd, - int path_flag, - float *stack, - uint4 node, - int *offset) +ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, + ShaderData *sd, + int path_flag, + float *stack, + uint4 node, + int *offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; @@ -287,23 +287,22 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st } /* first try to get tangent attribute */ - AttributeElement attr_elem, attr_sign_elem, attr_normal_elem; - int attr_offset = find_attribute(kg, sd, node.z, &attr_elem); - int attr_sign_offset = find_attribute(kg, sd, node.w, &attr_sign_elem); - int attr_normal_offset = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL, &attr_normal_elem); + const AttributeDescriptor attr = find_attribute(kg, sd, node.z); + const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w); + const AttributeDescriptor attr_normal = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL); - if(attr_offset == ATTR_STD_NOT_FOUND || attr_sign_offset == ATTR_STD_NOT_FOUND || attr_normal_offset == ATTR_STD_NOT_FOUND) { + if(attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND || attr_normal.offset == ATTR_STD_NOT_FOUND) { stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f)); return; } /* get _unnormalized_ interpolated normal and tangent */ - float3 tangent = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); - float sign = primitive_attribute_float(kg, sd, attr_sign_elem, attr_sign_offset, NULL, NULL); + float3 tangent = primitive_attribute_float3(kg, sd, attr, NULL, NULL); + float sign = primitive_attribute_float(kg, sd, attr_sign, NULL, NULL); float3 normal; if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { - normal = primitive_attribute_float3(kg, sd, attr_normal_elem, attr_normal_offset, NULL, NULL); + normal = primitive_attribute_float3(kg, sd, attr_normal, NULL, NULL); } else { normal = ccl_fetch(sd, Ng); @@ -356,24 +355,22 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack if(direction_type == NODE_TANGENT_UVMAP) { /* UV map */ - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, node.z, &attr_elem); + const AttributeDescriptor desc = find_attribute(kg, sd, node.z); - if(attr_offset == ATTR_STD_NOT_FOUND) + if(desc.offset == ATTR_STD_NOT_FOUND) tangent = make_float3(0.0f, 0.0f, 0.0f); else - tangent = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); + tangent = primitive_attribute_float3(kg, sd, desc, NULL, NULL); } else { /* radial */ - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, node.z, &attr_elem); + const AttributeDescriptor desc = find_attribute(kg, sd, node.z); float3 generated; - if(attr_offset == ATTR_STD_NOT_FOUND) + if(desc.offset == ATTR_STD_NOT_FOUND) generated = ccl_fetch(sd, P); else - generated = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); + generated = primitive_attribute_float3(kg, sd, desc, NULL, NULL); if(axis == NODE_TANGENT_AXIS_X) tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f)); diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index e1a8ced6a34..51083c31708 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -449,6 +449,9 @@ typedef enum ClosureType { #define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID) #define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID) #define CLOSURE_IS_BSDF_ANISOTROPIC(type) (type >= CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID) +#define CLOSURE_IS_BSDF_MULTISCATTER(type) (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID ||\ + type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID || \ + type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) #define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_BURLEY_ID) #define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_BURLEY_ID) #define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index 30ccd523add..6eed9bc1a99 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN /* Wireframe Node */ -ccl_device float wireframe(KernelGlobals *kg, - ShaderData *sd, - float size, - int pixel_size, - float3 *P) +ccl_device_inline float wireframe(KernelGlobals *kg, + ShaderData *sd, + float size, + int pixel_size, + float3 *P) { #ifdef __HAIR__ if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) |