diff options
Diffstat (limited to 'intern/cycles/kernel')
41 files changed, 563 insertions, 391 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 2ff6b53b0a5..8857f86890c 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -149,48 +149,61 @@ if(WITH_CYCLES_CUDA_BINARIES) set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") # warn for other versions - if(CUDA_VERSION MATCHES "60") + if(CUDA_VERSION MATCHES "65") else() message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " - "build may succeed but only CUDA 6.0 is officially supported") + "build may succeed but only CUDA 6.5 is officially supported") endif() # build for each arch set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) set(cuda_cubins) - foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) - set(cuda_cubin kernel_${arch}.cubin) + macro(CYCLES_CUDA_KERNEL_ADD arch experimental) + if(${experimental}) + set(cuda_extra_flags "-D__KERNEL_CUDA_EXPERIMENTAL__") + set(cuda_cubin kernel_experimental_${arch}.cubin) + else() + set(cuda_extra_flags "") + set(cuda_cubin kernel_${arch}.cubin) + endif() set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") set(cuda_math_flags "--use_fast_math") - if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50") - message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping") - else() - add_custom_command( - OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} - -arch=${arch} - -m${CUDA_BITS} - --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} - --ptxas-options="-v" - ${cuda_arch_flags} - ${cuda_version_flags} - ${cuda_math_flags} - -I${CMAKE_CURRENT_SOURCE_DIR}/../util - -I${CMAKE_CURRENT_SOURCE_DIR}/svm - -DCCL_NAMESPACE_BEGIN= - -DCCL_NAMESPACE_END= - -DNVCC - - DEPENDS ${cuda_sources}) - - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_cubin}) - endif() + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUDA_NVCC_EXECUTABLE} + -arch=${arch} + -m${CUDA_BITS} + --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} + --ptxas-options="-v" + ${cuda_arch_flags} + ${cuda_version_flags} + ${cuda_math_flags} + ${cuda_extra_flags} + -I${CMAKE_CURRENT_SOURCE_DIR}/../util + -I${CMAKE_CURRENT_SOURCE_DIR}/svm + -DCCL_NAMESPACE_BEGIN= + -DCCL_NAMESPACE_END= + -DNVCC + + DEPENDS ${cuda_sources}) + + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_cubin}) + + unset(cuda_extra_flags) + endmacro() + + foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) + # Compile regular kernel + CYCLES_CUDA_KERNEL_ADD(${arch} FALSE) + + # Compile experimental kernel + CYCLES_CUDA_KERNEL_ADD(${arch} TRUE) endforeach() add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index cfe12e8533d..5a9e57c5342 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -69,8 +69,8 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: cuda_major_minor = re.findall(r'release (\d+).(\d+)', output)[0] cuda_version = int(cuda_major_minor[0])*10 + int(cuda_major_minor[1]) - if cuda_version != 60: - print("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported." % (cuda_version/10, cuda_version%10)) + if cuda_version != 65: + print("CUDA version %d.%d detected, build may succeed but only CUDA 6.5 is officially supported." % (cuda_version/10, cuda_version%10)) # nvcc flags nvcc_flags = "-m%s" % (bits) @@ -83,30 +83,35 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h') last_cubin_file = None + configs = (("kernel_%s.cubin", ''), + ("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__')) + # add command for each cuda architecture for arch in cuda_archs: - if cuda_version < 60 and arch == "sm_50": - print("Can't build kernel for CUDA sm_50 architecture, skipping") - continue - - cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch) - - if env['BF_CYCLES_CUDA_ENV']: - MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd" - command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, kernel_file, cubin_file) - else: - command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file) - - kernel.Command(cubin_file, 'kernel.cu', command) - kernel.Depends(cubin_file, dependencies) - - kernel_binaries.append(cubin_file) - - if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']: - # trick to compile one kernel at a time to reduce memory usage - if last_cubin_file: - kernel.Depends(cubin_file, last_cubin_file) - last_cubin_file = cubin_file + for config in configs: + # TODO(sergey): Use dict instead ocouple in order to increase readability? + name = config[0] + extra_flags = config[1] + + cubin_file = os.path.join(build_dir, name % arch) + current_flags = nvcc_flags + extra_flags + + if env['BF_CYCLES_CUDA_ENV']: + MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd" + command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, current_flags, kernel_file, cubin_file) + else: + command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file) + + kernel.Command(cubin_file, 'kernel.cu', command) + kernel.Depends(cubin_file, dependencies) + + kernel_binaries.append(cubin_file) + + if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']: + # trick to compile one kernel at a time to reduce memory usage + if last_cubin_file: + kernel.Depends(cubin_file, last_cubin_file) + last_cubin_file = cubin_file Return('kernel_binaries') diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 81c239ea0c9..9961071c2ac 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -23,9 +23,7 @@ #include "../closure/bsdf_reflection.h" #include "../closure/bsdf_refraction.h" #include "../closure/bsdf_transparent.h" -#ifdef __ANISOTROPIC__ #include "../closure/bsdf_ashikhmin_shirley.h" -#endif #include "../closure/bsdf_westin.h" #include "../closure/bsdf_toon.h" #include "../closure/bsdf_hair.h" @@ -94,13 +92,11 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; -#ifdef __ANISOTROPIC__ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; -#endif case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); @@ -190,12 +186,10 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf); break; -#ifdef __ANISOTROPIC__ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf); break; -#endif case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf); break; @@ -260,12 +254,10 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf); break; -#ifdef __ANISOTROPIC__ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf); break; -#endif case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf); break; @@ -348,12 +340,10 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness) case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: bsdf_microfacet_beckmann_blur(sc, roughness); break; -#ifdef __ANISOTROPIC__ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: bsdf_ashikhmin_shirley_blur(sc, roughness); break; -#endif case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: bsdf_ashikhmin_velvet_blur(sc, roughness); break; diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h index 6a5d0410e01..ad7864cb8ea 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h @@ -77,7 +77,7 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c NdotI = fmaxf(NdotI, 1e-6f); NdotO = fmaxf(NdotO, 1e-6f); float3 H = normalize(omega_in + I); - float HdotI = fmaxf(dot(H, I), 1e-6f); + float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f); float HdotN = fmaxf(dot(H, N), 1e-6f); float pump = 1.0f / fmaxf(1e-6f, (HdotI*fmaxf(NdotO, NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */ diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index df0644becee..a0c59e6cbc0 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -362,7 +362,7 @@ ccl_device_inline float3 microfacet_sample_stretched( * E. Heitz, Research Report 2014 * * Anisotropy is only supported for reflection currently, but adding it for - * tranmission is just a matter of copying code from reflection if needed. */ + * transmission is just a matter of copying code from reflection if needed. */ ccl_device int bsdf_microfacet_ggx_setup(ShaderClosure *sc) { @@ -513,6 +513,10 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con float cosHO = dot(Ht, I); float cosHI = dot(Ht, omega_in); + /* those situations makes chi+ terms in eq. 33, 34 be zero */ + if(dot(Ht, N) <= 0.0f || cosHO * cosNO <= 0.0f || cosHI * cosNI <= 0.0f) + return make_float3(0.0f, 0.0f, 0.0f); + float D, G1o, G1i; /* eq. 33: first we calculate D(m) with m=Ht: */ @@ -862,7 +866,11 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc float cosHO = dot(Ht, I); float cosHI = dot(Ht, omega_in); - /* eq. 33: first we calculate D(m) with m=Ht: */ + /* those situations makes chi+ terms in eq. 25, 27 be zero */ + if(dot(Ht, N) <= 0.0f || cosHO * cosNO <= 0.0f || cosHI * cosNI <= 0.0f) + return make_float3(0.0f, 0.0f, 0.0f); + + /* eq. 25: first we calculate D(m) with m=Ht: */ float alpha2 = alpha_x * alpha_y; float cosThetaM = min(dot(N, Ht), 1.0f); float cosThetaM2 = cosThetaM * cosThetaM; diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h index b3dcb9dcc38..05816bac2c1 100644 --- a/intern/cycles/kernel/closure/bsdf_util.h +++ b/intern/cycles/kernel/closure/bsdf_util.h @@ -111,16 +111,20 @@ ccl_device float fresnel_dielectric_cos(float cosi, float eta) return 1.0f; // TIR(no refracted component) } -ccl_device float fresnel_conductor(float cosi, float eta, float k) +#if 0 +ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k) { - float tmp_f = eta * eta + k * k; - float tmp = tmp_f * cosi * cosi; - float Rparl2 = (tmp - (2.0f * eta * cosi) + 1)/ - (tmp + (2.0f * eta * cosi) + 1); - float Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi * cosi)/ - (tmp_f + (2.0f * eta * cosi) + cosi * cosi); + float3 cosi2 = make_float3(cosi*cosi); + float3 one = make_float3(1.0f, 1.0f, 1.0f); + float3 tmp_f = eta * eta + k * k; + float3 tmp = tmp_f * cosi2; + float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) / + (tmp + (2.0f * eta * cosi) + one); + float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) / + (tmp_f + (2.0f * eta * cosi) + cosi2); return(Rparl2 + Rperp2) * 0.5f; } +#endif ccl_device float smooth_step(float edge0, float edge1, float x) { diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 863836ffcea..c4e9e2ababe 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -936,9 +936,10 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con sd->u = isect->u; sd->v = 0.0f; #endif - + + tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); + if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { - tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); sd->Ng = normalize(-(D - tg * (dot(tg, D)))); } else { @@ -950,7 +951,6 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con float gd = isect->v; if(gd != 0.0f) { - tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); sd->Ng = sd->Ng - gd * tg; sd->Ng = normalize(sd->Ng); } diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 5ab0b731bdd..7409aa0d014 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -272,7 +272,11 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD #endif /* compute face normal */ - float3 Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); + float3 Ng; + if(sd->flag & SD_NEGATIVE_SCALE_APPLIED) + Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0])); + else + Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); sd->Ng = Ng; sd->N = Ng; diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index f2f35c2efd0..41e9d183a96 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -18,7 +18,7 @@ /* Triangle Primitive * * Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH - * ray intersection we use a precomputed triangle storage to accelarate + * ray intersection we use a precomputed triangle storage to accelerate * intersection at the cost of more memory usage */ CCL_NAMESPACE_BEGIN @@ -117,21 +117,24 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat } /* normal on triangle */ -ccl_device_inline float3 triangle_normal(KernelGlobals *kg, int prim) +ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) { /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); /* return normal */ - return normalize(cross(v1 - v0, v2 - v0)); + if(sd->flag & SD_NEGATIVE_SCALE_APPLIED) + return normalize(cross(v2 - v0, v1 - v0)); + else + return normalize(cross(v1 - v0, v2 - v0)); } /* point and normal on triangle */ -ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) +ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) { /* load triangle vertices */ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); @@ -144,8 +147,14 @@ ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float float t = 1.0f - u - v; *P = (u*v0 + v*v1 + t*v2); + /* get object flags, instance-aware */ + int object_flag = kernel_tex_fetch(__object_flag, object >= 0 ? object : ~object); + /* compute normal */ - *Ng = normalize(cross(v1 - v0, v2 - v0)); + if(object_flag & SD_NEGATIVE_SCALE_APPLIED) + *Ng = normalize(cross(v2 - v0, v1 - v0)); + else + *Ng = normalize(cross(v1 - v0, v2 - v0)); /* shader`*/ *shader = __float_as_int(kernel_tex_fetch(__tri_shader, prim)); diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index 2e0a49435a8..4f20ef9ca15 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw, int sample) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw, int sample) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake( int x = sx + get_global_id(0); if(x < sx + sw) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample); } diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 42eb9a62518..fa2113fbb46 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -120,10 +120,10 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu /* Shader Evaluation */ -void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 9208acc232e..d5b5293664c 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -146,7 +146,7 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int sample) +kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; @@ -156,12 +156,12 @@ kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int s extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample) +kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; if(x < sx + sw) - kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample); + kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, offset, sample); } #endif diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 264e5e3e4d0..19e06b88797 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, @@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 @@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 @@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX @@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 @@ -95,7 +95,7 @@ void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index b4f6dcdace9..b0efcdc66a7 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -32,10 +32,11 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v eval->transmission = make_float3(0.0f, 0.0f, 0.0f); eval->transparent = make_float3(0.0f, 0.0f, 0.0f); eval->subsurface = make_float3(0.0f, 0.0f, 0.0f); + eval->scatter = make_float3(0.0f, 0.0f, 0.0f); if(type == CLOSURE_BSDF_TRANSPARENT_ID) eval->transparent = value; - else if(CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_PHASE(type)) + else if(CLOSURE_IS_BSDF_DIFFUSE(type)) eval->diffuse = value; else if(CLOSURE_IS_BSDF_GLOSSY(type)) eval->glossy = value; @@ -43,6 +44,8 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v eval->transmission = value; else if(CLOSURE_IS_BSDF_BSSRDF(type)) eval->subsurface = value; + else if(CLOSURE_IS_PHASE(type)) + eval->scatter = value; } else eval->diffuse = value; @@ -51,11 +54,17 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v #endif } -ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value) +/* TODO(sergey): This is just a workaround for annoying 6.5 compiler bug. */ +#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ < 500 +ccl_device_inline +#else +ccl_device_noinline +#endif +void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value) { #ifdef __PASSES__ if(eval->use_light_pass) { - if(CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_PHASE(type)) + if(CLOSURE_IS_BSDF_DIFFUSE(type)) eval->diffuse += value; else if(CLOSURE_IS_BSDF_GLOSSY(type)) eval->glossy += value; @@ -63,6 +72,8 @@ ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 eval->transmission += value; else if(CLOSURE_IS_BSDF_BSSRDF(type)) eval->subsurface += value; + else if(CLOSURE_IS_PHASE(type)) + eval->scatter += value; /* skipping transparent, this function is used by for eval(), will be zero then */ } @@ -81,7 +92,8 @@ ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval) && is_zero(eval->glossy) && is_zero(eval->transmission) && is_zero(eval->transparent) - && is_zero(eval->subsurface); + && is_zero(eval->subsurface) + && is_zero(eval->scatter); } else return is_zero(eval->diffuse); @@ -98,6 +110,7 @@ ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float3 value) eval->glossy *= value; eval->transmission *= value; eval->subsurface *= value; + eval->scatter *= value; /* skipping transparent, this function is used by for eval(), will be zero then */ } @@ -111,7 +124,7 @@ ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float3 value) /* Path Radiance * * We accumulate different render passes separately. After summing at the end - * to get the combined result, it should be identical. We definte directly + * to get the combined result, it should be identical. We definite directly * visible as the first non-transparent hit, while indirectly visible are the * bounces after that. */ @@ -130,21 +143,25 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass) L->color_glossy = make_float3(0.0f, 0.0f, 0.0f); L->color_transmission = make_float3(0.0f, 0.0f, 0.0f); L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->color_scatter = make_float3(0.0f, 0.0f, 0.0f); L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f); L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f); L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f); L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f); L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f); L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f); L->path_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->path_glossy = make_float3(0.0f, 0.0f, 0.0f); L->path_transmission = make_float3(0.0f, 0.0f, 0.0f); L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->path_scatter = make_float3(0.0f, 0.0f, 0.0f); L->emission = make_float3(0.0f, 0.0f, 0.0f); L->background = make_float3(0.0f, 0.0f, 0.0f); @@ -174,14 +191,16 @@ ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, float3 *throug L->path_glossy = bsdf_eval->glossy*value; L->path_transmission = bsdf_eval->transmission*value; L->path_subsurface = bsdf_eval->subsurface*value; + L->path_scatter = bsdf_eval->scatter*value; - *throughput = L->path_diffuse + L->path_glossy + L->path_transmission + L->path_subsurface; + *throughput = L->path_diffuse + L->path_glossy + L->path_transmission + L->path_subsurface + L->path_scatter; L->direct_throughput = *throughput; } else { /* transparent bounce before first hit, or indirectly visible through BSDF */ - float3 sum = (bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->transparent + bsdf_eval->subsurface)*inverse_pdf; + float3 sum = (bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->transparent + + bsdf_eval->subsurface + bsdf_eval->scatter) * inverse_pdf; *throughput *= sum; } } @@ -241,6 +260,7 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L, float3 through L->direct_glossy += throughput*bsdf_eval->glossy*shadow; L->direct_transmission += throughput*bsdf_eval->transmission*shadow; L->direct_subsurface += throughput*bsdf_eval->subsurface*shadow; + L->direct_scatter += throughput*bsdf_eval->scatter*shadow; if(is_lamp) { L->shadow.x += shadow.x*shadow_fac; @@ -250,7 +270,7 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L, float3 through } else { /* indirectly visible lighting after BSDF bounce */ - float3 sum = bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->subsurface; + float3 sum = bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->subsurface + bsdf_eval->scatter; L->indirect += throughput*sum*shadow; } } @@ -291,12 +311,14 @@ ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L) L->direct_glossy += L->path_glossy*L->direct_emission; L->direct_transmission += L->path_transmission*L->direct_emission; L->direct_subsurface += L->path_subsurface*L->direct_emission; + L->direct_scatter += L->path_scatter*L->direct_emission; L->indirect = safe_divide_color(L->indirect, L->direct_throughput); L->indirect_diffuse += L->path_diffuse*L->indirect; L->indirect_glossy += L->path_glossy*L->indirect; L->indirect_transmission += L->path_transmission*L->indirect; L->indirect_subsurface += L->path_subsurface*L->indirect; + L->indirect_scatter += L->path_scatter*L->indirect; } #endif } @@ -309,6 +331,7 @@ ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L) L->path_glossy = make_float3(0.0f, 0.0f, 0.0f); L->path_transmission = make_float3(0.0f, 0.0f, 0.0f); L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->path_scatter = make_float3(0.0f, 0.0f, 0.0f); L->direct_emission = make_float3(0.0f, 0.0f, 0.0f); L->indirect = make_float3(0.0f, 0.0f, 0.0f); @@ -327,8 +350,8 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi if(L->use_light_pass) { path_radiance_sum_indirect(L); - L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->emission; - L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface; + L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->direct_scatter + L->emission; + L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface + L->indirect_scatter; if(!kernel_data.background.transparent) L_direct += L->background; @@ -344,11 +367,13 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f); L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f); L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f); L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f); L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f); L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f); L->emission = make_float3(0.0f, 0.0f, 0.0f); } @@ -368,6 +393,7 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi L->direct_glossy *= scale; L->direct_transmission *= scale; L->direct_subsurface *= scale; + L->direct_scatter *= scale; L->emission *= scale; L->background *= scale; } @@ -382,6 +408,7 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi L->indirect_glossy *= scale; L->indirect_transmission *= scale; L->indirect_subsurface *= scale; + L->indirect_scatter *= scale; } /* Sum again, after clamping */ @@ -416,11 +443,13 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance L->direct_glossy += L_sample->direct_glossy*fac; L->direct_transmission += L_sample->direct_transmission*fac; L->direct_subsurface += L_sample->direct_subsurface*fac; + L->direct_scatter += L_sample->direct_scatter*fac; L->indirect_diffuse += L_sample->indirect_diffuse*fac; L->indirect_glossy += L_sample->indirect_glossy*fac; L->indirect_transmission += L_sample->indirect_transmission*fac; L->indirect_subsurface += L_sample->indirect_subsurface*fac; + L->indirect_scatter += L_sample->indirect_scatter*fac; L->emission += L_sample->emission*fac; L->background += L_sample->background*fac; diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp index d612a82b785..e7ff21a6f09 100644 --- a/intern/cycles/kernel/kernel_avx.cpp +++ b/intern/cycles/kernel/kernel_avx.cpp @@ -68,10 +68,10 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float /* Shader Evaluate */ -void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernel_avx2.cpp index 339421a002b..cb1662bbfbe 100644 --- a/intern/cycles/kernel/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernel_avx2.cpp @@ -69,10 +69,10 @@ void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index c2d14b7f835..dfbb49db7e6 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -172,7 +172,8 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u) } #endif -ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample) +ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, + ShaderEvalType type, int i, int offset, int sample) { ShaderData sd; uint4 in = input[i * 2]; @@ -197,7 +198,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, int num_samples = kernel_data.integrator.aa_samples; /* random number generator */ - RNG rng = cmj_hash(i, 0); + RNG rng = cmj_hash(offset + i, 0); #if 0 uint rng_state = cmj_hash(i, 0); @@ -215,7 +216,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, int shader; float3 P, Ng; - triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader); + triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); /* dummy initilizations copied from SHADER_EVAL_DISPLACE */ float3 I = Ng; diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 6b03abe9708..5c83358a56d 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -21,16 +21,22 @@ CCL_NAMESPACE_BEGIN ccl_device float2 camera_sample_aperture(KernelGlobals *kg, float u, float v) { float blades = kernel_data.cam.blades; + float2 bokeh; if(blades == 0.0f) { /* sample disk */ - return concentric_sample_disk(u, v); + bokeh = concentric_sample_disk(u, v); } else { /* sample polygon */ float rotation = kernel_data.cam.bladesrotation; - return regular_polygon_sample(blades, rotation, u, v); + bokeh = regular_polygon_sample(blades, rotation, u, v); } + + /* anamorphic lens bokeh */ + bokeh.x *= kernel_data.cam.inv_aperture_ratio; + + return bokeh; } ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray) @@ -183,7 +189,8 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float /* calculate orthonormal coordinates perpendicular to D */ float3 U, V; - make_orthonormals(D, &U, &V); + U = normalize(make_float3(1.0f, 0.0f, 0.0f) - D.x * D); + V = normalize(cross(D, U)); /* update ray for effect of lens */ ray->P = U * lensuv.x + V * lensuv.y; diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index bda98b84da8..4b2bb723ab6 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -108,6 +108,8 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, eval->glossy = make_float3(0.0f, 0.0f, 0.0f); if(ls->shader & SHADER_EXCLUDE_TRANSMIT) eval->transmission = make_float3(0.0f, 0.0f, 0.0f); + if(ls->shader & SHADER_EXCLUDE_SCATTER) + eval->scatter = make_float3(0.0f, 0.0f, 0.0f); } #endif @@ -187,7 +189,8 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *st if(ls.shader & SHADER_EXCLUDE_ANY) { if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || ((ls.shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) || - ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT))) + ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) || + ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER))) continue; } #endif @@ -231,7 +234,8 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *sta if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || ((shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) || ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) || - ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA))) + ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) || + ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER))) return make_float3(0.0f, 0.0f, 0.0f); } diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h index 7a850844bf2..2a5b7689e57 100644 --- a/intern/cycles/kernel/kernel_jitter.h +++ b/intern/cycles/kernel/kernel_jitter.h @@ -14,6 +14,8 @@ * limitations under the License */ +/* TODO(sergey): Consider moving portable ctz/clz stuff to util. */ + CCL_NAMESPACE_BEGIN /* "Correlated Multi-Jittered Sampling" @@ -35,8 +37,16 @@ ccl_device_inline int cmj_fast_mod_pow2(int a, int b) /* a must be > 0 and b must be > 1 */ ccl_device_inline int cmj_fast_div_pow2(int a, int b) { -#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER) + kernel_assert(a > 0); + kernel_assert(b > 1); +#if defined(__KERNEL_SSE2__) +# ifdef _MSC_VER + unsigned long ctz; + _BitScanForward(&ctz, b); + return a >> ctz; +# else return a >> __builtin_ctz(b); +# endif #else return a/b; #endif @@ -44,8 +54,15 @@ ccl_device_inline int cmj_fast_div_pow2(int a, int b) ccl_device_inline uint cmj_w_mask(uint w) { -#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER) + kernel_assert(w > 1); +#if defined(__KERNEL_SSE2__) +# ifdef _MSC_VER + unsigned long leading_zero; + _BitScanReverse(&leading_zero, w); + return ((1 << (1 + leading_zero)) - 1); +# else return ((1 << (32 - __builtin_clz(w))) - 1); +# endif #else w |= w >> 1; w |= w >> 2; @@ -165,7 +182,8 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy) smodm = cmj_fast_mod_pow2(s, m); } else { - sdivm = float_to_int(s * invm); + /* Doing s*inmv gives precision issues here. */ + sdivm = s / m; smodm = s - sdivm*m; } diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 0adf9ed4666..e7f62f230f8 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -27,7 +27,7 @@ typedef struct LightSample { float pdf; /* light sampling probability density function */ float eval_fac; /* intensity multiplier */ int object; /* object id for triangle/curve lights */ - int prim; /* primitive id for triangle/curve ligths */ + int prim; /* primitive id for triangle/curve lights */ int shader; /* shader id */ int lamp; /* lamp id */ LightType type; /* type of light */ @@ -208,8 +208,8 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 return t*t/cos_pi; } -ccl_device bool lamp_light_sample(KernelGlobals *kg, int lamp, - float randu, float randv, float3 P, LightSample *ls, bool for_volume) +ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp, + float randu, float randv, float3 P, LightSample *ls) { float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0); float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1); @@ -224,11 +224,6 @@ ccl_device bool lamp_light_sample(KernelGlobals *kg, int lamp, ls->v = randv; if(type == LIGHT_DISTANT) { -#ifdef __VOLUME__ - if(for_volume) - return false; -#endif - /* distant light */ float3 lightD = make_float3(data0.y, data0.z, data0.w); float3 D = lightD; @@ -249,11 +244,6 @@ ccl_device bool lamp_light_sample(KernelGlobals *kg, int lamp, } #ifdef __BACKGROUND_MIS__ else if(type == LIGHT_BACKGROUND) { -#ifdef __VOLUME__ - if(for_volume) - return false; -#endif - /* infinite area light (e.g. light dome or env light) */ float3 D = background_light_sample(kg, randu, randv, &ls->pdf); @@ -309,8 +299,6 @@ ccl_device bool lamp_light_sample(KernelGlobals *kg, int lamp, ls->eval_fac *= kernel_data.integrator.inv_pdf_lights; ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); } - - return true; } ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls) @@ -469,7 +457,7 @@ ccl_device void triangle_light_sample(KernelGlobals *kg, int prim, int object, v = randv*randu; /* triangle, so get position, normal, shader */ - triangle_point_normal(kg, prim, u, v, &ls->P, &ls->Ng, &ls->shader); + triangle_point_normal(kg, object, prim, u, v, &ls->P, &ls->Ng, &ls->shader); ls->object = object; ls->prim = prim; ls->lamp = LAMP_NONE; @@ -526,7 +514,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float randt) /* Generic Light */ -ccl_device bool light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, LightSample *ls, bool for_volume) +ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, LightSample *ls) { /* sample index */ int index = light_distribution_sample(kg, randt); @@ -545,12 +533,10 @@ ccl_device bool light_sample(KernelGlobals *kg, float randt, float randu, float ls->D = normalize_len(ls->P - P, &ls->t); ls->pdf = triangle_light_pdf(kg, ls->Ng, -ls->D, ls->t); ls->shader |= shader_flag; - - return true; } else { int lamp = -prim-1; - return lamp_light_sample(kg, lamp, randu, randv, P, ls, for_volume); + lamp_light_sample(kg, lamp, randu, randv, P, ls); } } @@ -560,11 +546,6 @@ ccl_device int light_select_num_samples(KernelGlobals *kg, int index) return __float_as_int(data3.x); } -ccl_device bool light_select(KernelGlobals *kg, int index, float randu, float randv, float3 P, LightSample *ls, bool for_volume) -{ - return lamp_light_sample(kg, index, randu, randv, P, ls, for_volume); -} - ccl_device int lamp_light_eval_sample(KernelGlobals *kg, float randt) { /* sample index */ diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 0c033f6234c..515854b6e9c 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -89,6 +89,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, volume_ray.t = (hit)? isect.t: FLT_MAX; bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); + +#ifdef __VOLUME_DECOUPLED__ int sampling_method = volume_stack_sampling_method(kg, state.volume_stack); bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method); @@ -135,28 +137,32 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, kernel_volume_decoupled_free(kg, &volume_segment); if(result == VOLUME_PATH_SCATTERED) { - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f)) + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray)) continue; else break; } } - else { + else +#endif + { /* integrate along volume segment with distance sampling */ ShaderData volume_sd; VolumeIntegrateResult result = kernel_volume_integrate( - kg, &state, &volume_sd, &volume_ray, L, &throughput, rng); + kg, &state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous); +#ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* direct lighting */ - kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L, 1.0f); + kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L); /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f)) + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray)) continue; else break; } +#endif } } #endif @@ -470,6 +476,8 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, volume_ray.t = (hit)? isect.t: FLT_MAX; bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); + +#ifdef __VOLUME_DECOUPLED__ int sampling_method = volume_stack_sampling_method(kg, state.volume_stack); bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method); @@ -516,28 +524,32 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, kernel_volume_decoupled_free(kg, &volume_segment); if(result == VOLUME_PATH_SCATTERED) { - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f)) + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) continue; else break; } } - else { + else +#endif + { /* integrate along volume segment with distance sampling */ ShaderData volume_sd; VolumeIntegrateResult result = kernel_volume_integrate( - kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng); + kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous); +#ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* direct lighting */ - kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L, 1.0f); + kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L); /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f)) + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) continue; else break; } +#endif } } #endif @@ -803,10 +815,11 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in if(state.volume_stack[0].shader != SHADER_NONE) { Ray volume_ray = ray; volume_ray.t = (hit)? isect.t: FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); -#ifdef __KERNEL_CPU__ +#ifdef __VOLUME_DECOUPLED__ /* decoupled ray marching only supported on CPU */ - bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); /* cache steps along volume for repeated sampling */ VolumeSegment volume_segment; @@ -850,16 +863,17 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false); + + (void)result; + kernel_assert(result == VOLUME_PATH_SCATTERED); - if(result == VOLUME_PATH_SCATTERED) { - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) { - kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) { + kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&L); - path_radiance_reset_indirect(&L); - } + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&L); + path_radiance_reset_indirect(&L); } } } @@ -883,21 +897,22 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in PathState ps = state; Ray pray = ray; ShaderData volume_sd; - float3 tp = throughput; + float3 tp = throughput * num_samples_inv; /* branch RNG state */ path_state_branch(&ps, j, num_samples); VolumeIntegrateResult result = kernel_volume_integrate( - kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng); + kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous); +#ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* todo: support equiangular, MIS and all light sampling. * alternatively get decoupled ray marching working on the GPU */ - kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L, num_samples_inv); + kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L); - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) { - kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) { + kernel_path_indirect(kg, rng, pray, tp, num_samples, ps, &L); /* for render passes, sum and reset indirect light pass variables * for the next samples */ @@ -905,6 +920,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in path_radiance_reset_indirect(&L); } } +#endif } /* todo: avoid this calculation using decoupled ray marching */ @@ -941,7 +957,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in /* holdout */ #ifdef __HOLDOUT__ - if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK))) { + if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) { if(kernel_data.background.transparent) { float3 holdout_weight; @@ -1031,6 +1047,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in ray.P = ray_offset(sd.P, -sd.Ng); ray.t -= sd.ray_length; /* clipping works through transparent */ + +#ifdef __RAY_DIFFERENTIALS__ + ray.dP = sd.dP; + ray.dD.dx = -sd.dI.dx; + ray.dD.dy = -sd.dI.dy; +#endif + #ifdef __VOLUME__ /* enter/exit volume */ kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack); diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 7700abfbdae..9553c2da0df 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -50,7 +50,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - light_select(kg, i, light_u, light_v, sd->P, &ls, false); + lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls); if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ @@ -82,7 +82,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN light_t = 0.5f*light_t; LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, false); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ @@ -103,7 +103,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, false); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); /* sample random light */ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { @@ -200,7 +200,7 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG #endif LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, false); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index 8453b79de45..da2d5e6eca8 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -16,11 +16,10 @@ CCL_NAMESPACE_BEGIN -#ifdef __VOLUME__ +#ifdef __VOLUME_SCATTER__ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, - ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L, - float num_samples_adjust) + ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L) { #ifdef __EMISSION__ if(!kernel_data.integrator.use_direct_light) @@ -41,9 +40,8 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, light_ray.time = sd->time; #endif - if(!light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, true)) - return; - else if(ls.pdf == 0.0f) + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + if(ls.pdf == 0.0f) return; if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { @@ -52,15 +50,14 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, if(!shadow_blocked(kg, state, &light_ray, &shadow)) { /* accumulate */ - path_radiance_accum_light(L, throughput * num_samples_adjust, &L_light, shadow, 1.0f, state->bounce, is_lamp); + path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp); } } #endif } ccl_device bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, - ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray, - float num_samples_adjust) + ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray) { /* sample phase function */ float phase_pdf; @@ -135,26 +132,22 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - if(!light_select(kg, i, light_u, light_v, ray->P, &ls, true)) - continue; + lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls); float3 tp = throughput; /* sample position on volume segment */ - if(segment) { - float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE); - float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE); + float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE); + float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE); - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + + (void)result; + kernel_assert(result == VOLUME_PATH_SCATTERED); - if(result != VOLUME_PATH_SCATTERED) - continue; - - /* todo: split up light_sample so we don't have to call it again with new position */ - if(!light_select(kg, i, light_u, light_v, sd->P, &ls, true)) - continue; - } + /* todo: split up light_sample so we don't have to call it again with new position */ + lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls); if(ls.pdf == 0.0f) continue; @@ -190,26 +183,22 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG light_t = 0.5f*light_t; LightSample ls; - if(!light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls, true)) - continue; + light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls); float3 tp = throughput; /* sample position on volume segment */ - if(segment) { - float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE); - float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE); + float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE); + float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE); - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + + (void)result; + kernel_assert(result == VOLUME_PATH_SCATTERED); - if(result != VOLUME_PATH_SCATTERED) - continue; - - /* todo: split up light_sample so we don't have to call it again with new position */ - if(!light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, true)) - continue; - } + /* todo: split up light_sample so we don't have to call it again with new position */ + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); if(ls.pdf == 0.0f) continue; @@ -233,26 +222,22 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - if(!light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls, true)) - return; + light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls); float3 tp = throughput; /* sample position on volume segment */ - if(segment) { - float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE); - float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE); + float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE); + float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE); - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + + (void)result; + kernel_assert(result == VOLUME_PATH_SCATTERED); - if(result != VOLUME_PATH_SCATTERED) - return; - - /* todo: split up light_sample so we don't have to call it again with new position */ - if(!light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, true)) - return; - } + /* todo: split up light_sample so we don't have to call it again with new position */ + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); if(ls.pdf == 0.0f) return; diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 842b9f68840..fc61f1a9c2c 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -86,7 +86,7 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, #endif if(sd->type & PRIMITIVE_TRIANGLE) { /* static triangle */ - float3 Ng = triangle_normal(kg, sd->prim); + float3 Ng = triangle_normal(kg, sd); sd->shader = __float_as_int(kernel_tex_fetch(__tri_shader, sd->prim)); /* vectors */ @@ -165,7 +165,7 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat /* fetch triangle data */ if(sd->type == PRIMITIVE_TRIANGLE) { - float3 Ng = triangle_normal(kg, sd->prim); + float3 Ng = triangle_normal(kg, sd); sd->shader = __float_as_int(kernel_tex_fetch(__tri_shader, sd->prim)); /* static triangle */ @@ -340,7 +340,7 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f); int shader; - triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader); + triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); /* force smooth shading for displacement */ shader |= SHADER_SMOOTH_NORMAL; diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 67bd0943b1b..740998e8c92 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -64,10 +64,10 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index 40d621b66f6..da73a3a1c97 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -66,10 +66,10 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp index 4b48d10b020..5704f60e138 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernel_sse41.cpp @@ -67,10 +67,10 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo /* Shader Evaluate */ -void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index d81909a623a..933202ea498 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -66,6 +66,8 @@ CCL_NAMESPACE_BEGIN #define __SUBSURFACE__ #define __CMJ__ #define __VOLUME__ +#define __VOLUME_DECOUPLED__ +#define __VOLUME_SCATTER__ #define __SHADOW_RECORD_ALL__ #endif @@ -73,10 +75,15 @@ CCL_NAMESPACE_BEGIN #define __KERNEL_SHADING__ #define __KERNEL_ADV_SHADING__ #define __BRANCHED_PATH__ +#define __VOLUME__ +#define __VOLUME_SCATTER__ /* Experimental on GPU */ -//#define __VOLUME__ -//#define __SUBSURFACE__ +#ifdef __KERNEL_CUDA_EXPERIMENTAL__ +#define __SUBSURFACE__ +#define __CMJ__ +#endif + #endif #ifdef __KERNEL_OPENCL__ @@ -103,7 +110,6 @@ CCL_NAMESPACE_BEGIN #define __BACKGROUND_MIS__ #define __LAMP_MIS__ #define __AO__ -#define __ANISOTROPIC__ //#define __CAMERA_MOTION__ //#define __OBJECT_MOTION__ //#define __HAIR__ @@ -134,11 +140,9 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SHADING__ #define __SVM__ #define __EMISSION__ -#define __PROCEDURAL_TEXTURES__ -#define __IMAGE_TEXTURES__ +#define __TEXTURES__ #define __EXTRA_NODES__ #define __HOLDOUT__ -#define __NORMAL_MAP__ #endif #ifdef __KERNEL_ADV_SHADING__ @@ -148,7 +152,6 @@ CCL_NAMESPACE_BEGIN #define __BACKGROUND_MIS__ #define __LAMP_MIS__ #define __AO__ -#define __ANISOTROPIC__ #define __CAMERA_MOTION__ #define __OBJECT_MOTION__ #define __HAIR__ @@ -223,10 +226,9 @@ enum PathTraceDimension { PRNG_PHASE_V = 9, PRNG_PHASE = 10, PRNG_SCATTER_DISTANCE = 11, - PRNG_BOUNCE_NUM = 12, -#else - PRNG_BOUNCE_NUM = 8, #endif + + PRNG_BOUNCE_NUM = 12, }; enum SamplingPattern { @@ -252,17 +254,17 @@ enum PathRayFlag { PATH_RAY_SHADOW_TRANSPARENT = 256, PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT), - PATH_RAY_CURVE = 512, /* visibility flag to define curve segments*/ + PATH_RAY_CURVE = 512, /* visibility flag to define curve segments */ + PATH_RAY_VOLUME_SCATTER = 1024, /* volume scattering */ /* note that these can use maximum 12 bits, the other are for layers */ - PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512), + PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512|1024), - PATH_RAY_MIS_SKIP = 1024, - PATH_RAY_DIFFUSE_ANCESTOR = 2048, - PATH_RAY_GLOSSY_ANCESTOR = 4096, - PATH_RAY_BSSRDF_ANCESTOR = 8192, - PATH_RAY_SINGLE_PASS_DONE = 16384, - PATH_RAY_VOLUME_SCATTER = 32768, + PATH_RAY_MIS_SKIP = 2048, + PATH_RAY_DIFFUSE_ANCESTOR = 4096, + PATH_RAY_GLOSSY_ANCESTOR = 8192, + PATH_RAY_BSSRDF_ANCESTOR = 16384, + PATH_RAY_SINGLE_PASS_DONE = 32768, /* we need layer member flags to be the 20 upper bits */ PATH_RAY_LAYER_SHIFT = (32-20) @@ -332,21 +334,25 @@ typedef struct PathRadiance { float3 color_glossy; float3 color_transmission; float3 color_subsurface; + float3 color_scatter; float3 direct_diffuse; float3 direct_glossy; float3 direct_transmission; float3 direct_subsurface; + float3 direct_scatter; float3 indirect_diffuse; float3 indirect_glossy; float3 indirect_transmission; float3 indirect_subsurface; + float3 indirect_scatter; float3 path_diffuse; float3 path_glossy; float3 path_transmission; float3 path_subsurface; + float3 path_scatter; float4 shadow; float mist; @@ -360,6 +366,7 @@ typedef struct BsdfEval { float3 transmission; float3 transparent; float3 subsurface; + float3 scatter; } BsdfEval; #else @@ -380,7 +387,8 @@ typedef enum ShaderFlag { SHADER_EXCLUDE_GLOSSY = (1 << 26), SHADER_EXCLUDE_TRANSMIT = (1 << 25), SHADER_EXCLUDE_CAMERA = (1 << 24), - SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA), + SHADER_EXCLUDE_SCATTER = (1 << 23), + SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA|SHADER_EXCLUDE_SCATTER), SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS|SHADER_EXCLUDE_ANY) } ShaderFlag; @@ -392,10 +400,8 @@ typedef enum LightType { LIGHT_DISTANT, LIGHT_BACKGROUND, LIGHT_AREA, - LIGHT_AO, LIGHT_SPOT, - LIGHT_TRIANGLE, - LIGHT_STRAND + LIGHT_TRIANGLE } LightType; /* Camera Type */ @@ -526,17 +532,15 @@ typedef struct ShaderClosure { ClosureType type; float3 weight; - float sample_weight; - float data0; float data1; + float data2; float3 N; -#if defined(__ANISOTROPIC__) || defined(__SUBSURFACE__) || defined(__HAIR__) float3 T; -#endif + + float sample_weight; - float data2; #ifdef __OSL__ void *prim; #endif @@ -596,7 +600,8 @@ enum ShaderDataFlag { /* object flags */ SD_HOLDOUT_MASK = 524288, /* holdout for camera rays */ SD_OBJECT_MOTION = 1048576, /* has object motion blur */ - SD_TRANSFORM_APPLIED = 2097152, /* vertices have transform applied */ + SD_TRANSFORM_APPLIED = 2097152, /* vertices have transform applied */ + SD_NEGATIVE_SCALE_APPLIED = 4194304, /* vertices have negative scale applied */ SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED) }; @@ -762,9 +767,12 @@ typedef struct KernelCamera { /* render size */ float width, height; int resolution; + + /* anamorphic lens bokeh */ + float inv_aperture_ratio; + int pad1; int pad2; - int pad3; /* more matrices */ Transform screentoworld; @@ -866,7 +874,8 @@ typedef struct KernelIntegrator { int transparent_shadows; /* caustics */ - int no_caustics; + int caustics_reflective; + int caustics_refractive; float filter_glossy; /* seed */ @@ -927,7 +936,6 @@ typedef enum CurveFlag { } CurveFlag; typedef struct KernelCurves { - /* strand intersect and normal parameters - many can be changed to flags */ int curveflags; int subdivisions; diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index cc4b2e3edf1..ea02ede10cd 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -176,6 +176,8 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* compute extinction at the start */ float t = 0.0f; + float3 sum = make_float3(0.0f, 0.0f, 0.0f); + for(int i = 0; i < max_steps; i++) { /* advance to new position */ float new_t = min(ray->t, (i+1) * step); @@ -190,20 +192,26 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* compute attenuation over segment */ if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) { - /* todo: we could avoid computing expf() for each step by summing, - * because exp(a)*exp(b) = exp(a+b), but we still want a quick - * tp_eps check too */ - tp *= volume_color_transmittance(sigma_t, new_t - t); - - /* stop if nearly all light blocked */ - if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) - break; + /* Compute expf() only for every Nth step, to save some calculations + * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */ + + sum += (-sigma_t * (new_t - t)); + if((i & 0x07) == 0) { /* ToDo: Other interval? */ + tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z)); + + /* stop if nearly all light is blocked */ + if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) + break; + } } /* stop if at the end of the volume */ t = new_t; - if(t == ray->t) + if(t == ray->t) { + /* Update throughput in case we haven't done it above */ + tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z)); break; + } } *throughput = tp; @@ -326,6 +334,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba float t = ray->t; float3 new_tp; +#ifdef __VOLUME_SCATTER__ /* randomly scatter, and if we do t is shortened */ if(closure_flag & SD_SCATTER) { /* extinction coefficient */ @@ -379,7 +388,9 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba new_tp = *throughput * transmittance / pdf; } } - else if(closure_flag & SD_ABSORPTION) { + else +#endif + if(closure_flag & SD_ABSORPTION) { /* absorption only, no sampling needed */ float3 transmittance = volume_color_transmittance(coeff.sigma_a, t); new_tp = *throughput * transmittance; @@ -456,6 +467,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( bool scatter = false; /* distance sampling */ +#ifdef __VOLUME_SCATTER__ if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_ABSORPTION))) { has_scatter = true; @@ -491,7 +503,9 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( xi = 1.0f - (1.0f - xi)/sample_transmittance; } } - else if(closure_flag & SD_ABSORPTION) { + else +#endif + if(closure_flag & SD_ABSORPTION) { /* absorption only, no sampling needed */ float3 sigma_a = coeff.sigma_a; @@ -546,13 +560,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( * between the endpoints. distance sampling is used to decide if we will * scatter or not. */ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg, - PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng) + PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous) { /* workaround to fix correlation bug in T38710, can find better solution * in random number generator later, for now this is done here to not impact * performance of rendering without volumes */ RNG tmp_rng = cmj_hash(*rng, state->rng_offset); - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce); @@ -724,16 +737,15 @@ ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *s * scattering, they always scatter if there is any non-zero scattering * coefficient. * - * these also do not do emission or modify throughput. */ + * these also do not do emission or modify throughput. + * + * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput, float rphase, float rscatter, const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter) { - int closure_flag = segment->closure_flag; - - if(!(closure_flag & SD_SCATTER)) - return VOLUME_PATH_MISSED; + kernel_assert(segment->closure_flag & SD_SCATTER); /* pick random color channel, we use the Veach one-sample * model with balance heuristic for the channels */ @@ -845,15 +857,33 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f); if(segment->numsteps > 1) { - /* todo: optimize using binary search */ float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < segment->numsteps-1; i++, step++) { - if(sample_t < step->t) + int numsteps = segment->numsteps; + int high = numsteps - 1; + int low = 0; + int mid; + + while(low < high) { + mid = (low + high) >> 1; + + if(sample_t < step[mid].t) + high = mid; + else if(sample_t >= step[mid + 1].t) + low = mid + 1; + else { + /* found our interval in step[mid] .. step[mid+1] */ + prev_t = step[mid].t; + prev_cdf_distance = step[mid].cdf_distance; + step += mid+1; break; + } + } - prev_t = step->t; - prev_cdf_distance = step->cdf_distance; + if(low >= numsteps - 1) { + prev_t = step[numsteps - 1].t; + prev_cdf_distance = step[numsteps-1].cdf_distance; + step += numsteps - 1; } /* pdf for picking step with distance sampling */ diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index 94337290d20..84ef85e089d 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -66,18 +66,6 @@ ClosureParam *closure_bssrdf_cubic_params() static ClosureParam params[] = { CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N), CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius), - //CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1), - CLOSURE_STRING_KEYPARAM("label"), - CLOSURE_FINISH_PARAM(CubicBSSRDFClosure) - }; - return params; -} - -ClosureParam *closure_bssrdf_cubic_extended_params() -{ - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N), - CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius), CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1), CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.T.x), CLOSURE_STRING_KEYPARAM("label"), @@ -107,18 +95,6 @@ ClosureParam *closure_bssrdf_gaussian_params() static ClosureParam params[] = { CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N), CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius), - //CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1), - CLOSURE_STRING_KEYPARAM("label"), - CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure) - }; - return params; -} - -ClosureParam *closure_bssrdf_gaussian_extended_params() -{ - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N), - CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius), CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1), CLOSURE_STRING_KEYPARAM("label"), CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure) diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 2d716103b85..d7789edcfff 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -105,7 +105,7 @@ BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, ashikhmin_velvet, LA CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, sc.data0), BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet) -BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, ashikhmin_shirley, LABEL_GLOSSY) +BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, ashikhmin_shirley, LABEL_GLOSSY|LABEL_REFLECT) CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, sc.N), CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, sc.T), CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, sc.data0), @@ -124,37 +124,37 @@ BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, glossy_toon, LABEL_GLOSSY) CLOSURE_FLOAT_PARAM(GlossyToonClosure, sc.data1), BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, microfacet_ggx, LABEL_GLOSSY) +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, microfacet_ggx, LABEL_GLOSSY|LABEL_REFLECT) CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, sc.N), CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, sc.data0), BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, microfacet_ggx, LABEL_GLOSSY) +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, microfacet_ggx, LABEL_GLOSSY|LABEL_REFLECT) CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, sc.N), CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, sc.T), CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, sc.data0), CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, sc.data1), BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, microfacet_beckmann, LABEL_GLOSSY) +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, microfacet_beckmann, LABEL_GLOSSY|LABEL_REFLECT) CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, sc.N), CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, sc.data0), BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, microfacet_beckmann, LABEL_GLOSSY) +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, microfacet_beckmann, LABEL_GLOSSY|LABEL_REFLECT) CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, sc.N), CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, sc.T), CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, sc.data0), CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, sc.data1), BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, microfacet_ggx, LABEL_GLOSSY) +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, microfacet_ggx, LABEL_GLOSSY|LABEL_TRANSMIT) CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, sc.N), CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, sc.data0), CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, sc.data2), BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction) -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, microfacet_beckmann, LABEL_GLOSSY) +BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, microfacet_beckmann, LABEL_GLOSSY|LABEL_TRANSMIT) CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, sc.N), CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, sc.data0), CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, sc.data2), @@ -244,8 +244,6 @@ void OSLShader::register_closures(OSLShadingSystem *ss_) bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare); register_closure(ss, "glossy_toon", id++, bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare); - register_closure(ss, "specular_toon", id++, - bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare); register_closure(ss, "westin_backscatter", id++, bsdf_westin_backscatter_params(), bsdf_westin_backscatter_prepare); register_closure(ss, "westin_sheen", id++, @@ -267,10 +265,6 @@ void OSLShader::register_closures(OSLShadingSystem *ss_) closure_bssrdf_cubic_params(), closure_bssrdf_cubic_prepare); register_closure(ss, "bssrdf_gaussian", id++, closure_bssrdf_gaussian_params(), closure_bssrdf_gaussian_prepare); - register_closure(ss, "bssrdf_cubic", id++, - closure_bssrdf_cubic_extended_params(), closure_bssrdf_cubic_prepare); - register_closure(ss, "bssrdf_gaussian", id++, - closure_bssrdf_gaussian_extended_params(), closure_bssrdf_gaussian_prepare); register_closure(ss, "hair_reflection", id++, bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare); diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h index a543907e884..58d215295dc 100644 --- a/intern/cycles/kernel/osl/osl_closures.h +++ b/intern/cycles/kernel/osl/osl_closures.h @@ -52,8 +52,6 @@ OSL::ClosureParam *closure_westin_backscatter_params(); OSL::ClosureParam *closure_westin_sheen_params(); OSL::ClosureParam *closure_bssrdf_cubic_params(); OSL::ClosureParam *closure_bssrdf_gaussian_params(); -OSL::ClosureParam *closure_bssrdf_cubic_extended_params(); -OSL::ClosureParam *closure_bssrdf_gaussian_extended_params(); OSL::ClosureParam *closure_henyey_greenstein_volume_params(); void closure_emission_prepare(OSL::RendererServices *, int id, void *data); diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 6a59a381f48..38cb5061346 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -126,7 +126,7 @@ void OSLRenderServices::thread_init(KernelGlobals *kernel_globals_, OSL::Texture osl_ts = osl_ts_; } -bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) +bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) { /* this is only used for shader and object space, we don't really have * a concept of shader space, so we just use object space for both. */ @@ -156,7 +156,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr return false; } -bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) +bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) { /* this is only used for shader and object space, we don't really have * a concept of shader space, so we just use object space for both. */ @@ -186,7 +186,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform return false; } -bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from, float time) +bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time) { KernelGlobals *kg = kernel_globals; @@ -218,7 +218,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from, float ti return false; } -bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time) +bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time) { KernelGlobals *kg = kernel_globals; @@ -250,7 +250,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to, fl return false; } -bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) +bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) { /* this is only used for shader and object space, we don't really have * a concept of shader space, so we just use object space for both. */ @@ -275,7 +275,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr return false; } -bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) +bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) { /* this is only used for shader and object space, we don't really have * a concept of shader space, so we just use object space for both. */ @@ -300,7 +300,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform return false; } -bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from) +bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) { KernelGlobals *kg = kernel_globals; @@ -328,7 +328,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from) return false; } -bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to) +bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to) { KernelGlobals *kg = kernel_globals; @@ -356,7 +356,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to) return false; } -bool OSLRenderServices::get_array_attribute(void *renderstate, bool derivatives, +bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object, TypeDesc type, ustring name, int index, void *val) { @@ -479,7 +479,7 @@ static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val) static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val) { - if(type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) { + if(type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) { ustring *sval = (ustring *)val; sval[0] = str; @@ -751,13 +751,19 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData * return false; } -bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustring object_name, +bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val) { - if (renderstate == NULL) + if (sg->renderstate == NULL) return false; - ShaderData *sd = (ShaderData *)renderstate; + ShaderData *sd = (ShaderData *)(sg->renderstate); + return get_attribute(sd, derivatives, object_name, type, name, val); +} + +bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring object_name, + TypeDesc type, ustring name, void *val) +{ KernelGlobals *kg = sd->osl_globals; bool is_curve; int object; @@ -815,12 +821,12 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri } bool OSLRenderServices::get_userdata(bool derivatives, ustring name, TypeDesc type, - void *renderstate, void *val) + OSL::ShaderGlobals *sg, void *val) { return false; /* disabled by lockgeom */ } -bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, void *renderstate) +bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, OSL::ShaderGlobals *sg) { return false; /* never called by OSL */ } @@ -969,7 +975,7 @@ bool OSLRenderServices::environment(ustring filename, TextureOpt &options, return status; } -bool OSLRenderServices::get_texture_info(ustring filename, int subimage, +bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, ustring filename, int subimage, ustring dataname, TypeDesc datatype, void *data) { diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 069722d81b6..6f928a0d103 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -49,27 +49,29 @@ public: void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts); - bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time); - bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time); + bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time); + bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time); - bool get_matrix(OSL::Matrix44 &result, ustring from, float time); - bool get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time); + bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time); + bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time); - bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform); - bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform); + bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform); + bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform); - bool get_matrix(OSL::Matrix44 &result, ustring from); - bool get_inverse_matrix(OSL::Matrix44 &result, ustring from); + bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from); + bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from); - bool get_array_attribute(void *renderstate, bool derivatives, + bool get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object, TypeDesc type, ustring name, int index, void *val); - bool get_attribute(void *renderstate, bool derivatives, ustring object, + bool get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object, + TypeDesc type, ustring name, void *val); + bool get_attribute(ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val); bool get_userdata(bool derivatives, ustring name, TypeDesc type, - void *renderstate, void *val); - bool has_userdata(ustring name, TypeDesc type, void *renderstate); + OSL::ShaderGlobals *sg, void *val); + bool has_userdata(ustring name, TypeDesc type, OSL::ShaderGlobals *sg); int pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 ¢er, float radius, int max_points, bool sort, size_t *out_indices, @@ -106,7 +108,7 @@ public: OSL::ShaderGlobals *sg, const OSL::Vec3 &R, const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, float *result); - bool get_texture_info(ustring filename, int subimage, + bool get_texture_info(OSL::ShaderGlobals *sg, ustring filename, int subimage, ustring dataname, TypeDesc datatype, void *data); static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name, @@ -157,6 +159,70 @@ public: static ustring u_v; static ustring u_empty; +#if OSL_LIBRARY_VERSION_CODE < 10500 + bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) { + return get_matrix(NULL, result, xform, time); + } + + bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) { + return get_inverse_matrix(NULL, result, xform, time); + } + + bool get_matrix(OSL::Matrix44 &result, ustring from, float time) { + return get_matrix(NULL, result, from, time); + } + + bool get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time) { + return get_inverse_matrix(NULL, result, to, time); + } + + bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) { + return get_matrix(NULL, result, xform); + } + + bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) { + return get_inverse_matrix(NULL, result, xform); + } + + bool get_matrix(OSL::Matrix44 &result, ustring from) { + return get_matrix(NULL, result, from); + } + + bool get_inverse_matrix(OSL::Matrix44 &result, ustring to) { + return get_inverse_matrix(NULL, result, to); + } + + bool get_array_attribute(void *renderstate, bool derivatives, + ustring object, TypeDesc type, ustring name, + int index, void *val) { + OSL::ShaderGlobals sg; + sg.renderstate = renderstate; + return get_array_attribute(&sg, derivatives, + object, type, name, + index, val); + } + + bool get_attribute(void *renderstate, bool derivatives, ustring object_name, + TypeDesc type, ustring name, void *val) { + OSL::ShaderGlobals sg; + sg.renderstate = renderstate; + return get_attribute(&sg, derivatives, object_name, type, name, val); + } + + bool has_userdata(ustring name, TypeDesc type, void *renderstate) { + return has_userdata(name, type, (OSL::ShaderGlobals *) renderstate); + } + + bool get_userdata(bool derivatives, ustring name, TypeDesc type, + void *renderstate, void *val) { + return get_userdata(derivatives, name, type, (OSL::ShaderGlobals *) renderstate, val); + } + + bool get_texture_info(ustring filename, int subimage, + ustring dataname, TypeDesc datatype, void *data) { + return get_texture_info(NULL, filename, subimage, dataname, datatype, data); + } +#endif private: KernelGlobals *kernel_globals; OSL::TextureSystem *osl_ts; diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 28135784db9..48498116874 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -164,11 +164,14 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag, CBSDFClosure *bsdf = (CBSDFClosure *)prim; int scattering = bsdf->scattering(); - /* no caustics option */ - if(scattering == LABEL_GLOSSY && (path_flag & PATH_RAY_DIFFUSE)) { + /* caustic options */ + if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { KernelGlobals *kg = sd->osl_globals; - if(kernel_data.integrator.no_caustics) + + if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || + (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) { return; + } } /* sample weight */ diff --git a/intern/cycles/kernel/shaders/node_fresnel.h b/intern/cycles/kernel/shaders/node_fresnel.h index 447a84255ef..9f10ba8023e 100644 --- a/intern/cycles/kernel/shaders/node_fresnel.h +++ b/intern/cycles/kernel/shaders/node_fresnel.h @@ -34,3 +34,16 @@ float fresnel_dielectric_cos(float cosi, float eta) return result; } +color fresnel_conductor(float cosi, color eta, color k) +{ + color cosi2 = color(cosi*cosi); + color one = color(1, 1, 1); + color tmp_f = eta * eta + k * k; + color tmp = tmp_f * cosi2; + color Rparl2 = (tmp - (2.0 * eta * cosi) + one) / + (tmp + (2.0 * eta * cosi) + one); + color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) / + (tmp_f + (2.0 * eta * cosi) + cosi2); + return (Rparl2 + Rperp2) * 0.5; +} + diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index e39db8097f2..f8e5fd510ee 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -507,12 +507,8 @@ closure color hair_transmission(normal N, float roughnessu, float roughnessv, ve closure color henyey_greenstein(float g) BUILTIN; closure color absorption() BUILTIN; -// Backwards compatibility -closure color bssrdf_cubic(normal N, vector radius) BUILTIN; -closure color bssrdf_gaussian(normal N, vector radius) BUILTIN; -closure color specular_toon(normal N, float size, float smooth) BUILTIN; - // Renderer state +int backfacing () BUILTIN; int raytype (string typename) BUILTIN; // the individual 'isFOOray' functions are deprecated int iscameraray () { return raytype("camera"); } diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index d6663aae9db..c13eae813d6 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -236,7 +236,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade if(stack_load_float(stack, node.z) == 1.0f) offset += node.y; break; -#ifdef __IMAGE_TEXTURES__ +#ifdef __TEXTURES__ case NODE_TEX_IMAGE: svm_node_tex_image(kg, sd, stack, node); break; @@ -246,8 +246,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade case NODE_TEX_ENVIRONMENT: svm_node_tex_environment(kg, sd, stack, node); break; -#endif -#ifdef __PROCEDURAL_TEXTURES__ case NODE_TEX_SKY: svm_node_tex_sky(kg, sd, stack, node, &offset); break; @@ -420,17 +418,13 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade case NODE_LIGHT_FALLOFF: svm_node_light_falloff(sd, stack, node); break; -#endif -#ifdef __ANISOTROPIC__ +#endif case NODE_TANGENT: svm_node_tangent(kg, sd, stack, node); break; -#endif -#ifdef __NORMAL_MAP__ case NODE_NORMAL_MAP: svm_node_normal_map(kg, sd, stack, node); - break; -#endif + break; case NODE_END: default: return; diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 5fcc44e478b..30110db3ef9 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -179,7 +179,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: { #ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE)) + if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) break; #endif ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); @@ -207,7 +207,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: { #ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE)) + if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) break; #endif ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); @@ -244,8 +244,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: { #ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE)) + if(!kernel_data.integrator.caustics_reflective && + !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) { break; + } #endif /* index of refraction */ float eta = fmaxf(param2, 1e-5f); @@ -262,12 +264,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float sample_weight = sc->sample_weight; sc = svm_node_closure_get_bsdf(sd, mix_weight*fresnel); - - if(sc) { - sc->N = N; - svm_node_glass_setup(sd, sc, type, eta, roughness, false); +#ifdef __CAUSTICS_TRICKS__ + if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) +#endif + { + if(sc) { + sc->N = N; + svm_node_glass_setup(sd, sc, type, eta, roughness, false); + } } +#ifdef __CAUSTICS_TRICKS__ + if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) + break; +#endif + /* refraction */ sc = &sd->closure[sd->num_closure]; sc->weight = weight; @@ -286,7 +297,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: { #ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE)) + if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) break; #endif ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); @@ -294,7 +305,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(sc) { sc->N = N; -#ifdef __ANISOTROPIC__ sc->T = stack_load_float3(stack, data_node.y); /* rotate tangent */ @@ -324,9 +334,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sd->flag |= bsdf_microfacet_ggx_aniso_setup(sc); else sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(sc); -#else - sd->flag |= bsdf_diffuse_setup(sc); -#endif } break; } @@ -371,7 +378,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(sc) { /* todo: giving a fixed weight here will cause issues when - * mixing multiple BSDFS. energey will not be conserved and + * mixing multiple BSDFS. energy will not be conserved and * the throughput can blow up after multiple bounces. we * better figure out a way to skip backfaces from rays * spawned by transmission from the front */ diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h index 2503912c5c6..b221e0728ec 100644 --- a/intern/cycles/kernel/svm/svm_convert.h +++ b/intern/cycles/kernel/svm/svm_convert.h @@ -45,13 +45,13 @@ ccl_device void svm_node_convert(ShaderData *sd, float *stack, uint type, uint f } case NODE_CONVERT_VF: { float3 f = stack_load_float3(stack, from); - float g = (f.x + f.y + f.z)*(1.0f/3.0f); + float g = average(f); stack_store_float(stack, to, g); break; } case NODE_CONVERT_VI: { float3 f = stack_load_float3(stack, from); - int i = (int)((f.x + f.y + f.z)*(1.0f/3.0f)); + int i = (int)average(f); stack_store_int(stack, to, i); break; } |