Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt71
-rw-r--r--intern/cycles/kernel/SConscript53
-rw-r--r--intern/cycles/kernel/closure/bsdf.h10
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h12
-rw-r--r--intern/cycles/kernel/closure/bsdf_util.h18
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h6
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle.h6
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h21
-rw-r--r--intern/cycles/kernel/kernel.cl6
-rw-r--r--intern/cycles/kernel/kernel.cpp4
-rw-r--r--intern/cycles/kernel/kernel.cu6
-rw-r--r--intern/cycles/kernel/kernel.h12
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h49
-rw-r--r--intern/cycles/kernel/kernel_avx.cpp4
-rw-r--r--intern/cycles/kernel/kernel_avx2.cpp4
-rw-r--r--intern/cycles/kernel/kernel_bake.h7
-rw-r--r--intern/cycles/kernel/kernel_camera.h13
-rw-r--r--intern/cycles/kernel/kernel_emission.h8
-rw-r--r--intern/cycles/kernel/kernel_jitter.h24
-rw-r--r--intern/cycles/kernel/kernel_light.h31
-rw-r--r--intern/cycles/kernel/kernel_path.h75
-rw-r--r--intern/cycles/kernel/kernel_path_surface.h8
-rw-r--r--intern/cycles/kernel/kernel_path_volume.h87
-rw-r--r--intern/cycles/kernel/kernel_shader.h6
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp4
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp4
-rw-r--r--intern/cycles/kernel/kernel_sse41.cpp4
-rw-r--r--intern/cycles/kernel/kernel_types.h70
-rw-r--r--intern/cycles/kernel/kernel_volume.h76
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.cpp24
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp20
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h2
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp38
-rw-r--r--intern/cycles/kernel/osl/osl_services.h92
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp9
-rw-r--r--intern/cycles/kernel/shaders/node_fresnel.h13
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h6
-rw-r--r--intern/cycles/kernel/svm/svm.h12
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h33
-rw-r--r--intern/cycles/kernel/svm/svm_convert.h4
41 files changed, 563 insertions, 391 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 2ff6b53b0a5..8857f86890c 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -149,48 +149,61 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# warn for other versions
- if(CUDA_VERSION MATCHES "60")
+ if(CUDA_VERSION MATCHES "65")
else()
message(WARNING
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
- "build may succeed but only CUDA 6.0 is officially supported")
+ "build may succeed but only CUDA 6.5 is officially supported")
endif()
# build for each arch
set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
set(cuda_cubins)
- foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
- set(cuda_cubin kernel_${arch}.cubin)
+ macro(CYCLES_CUDA_KERNEL_ADD arch experimental)
+ if(${experimental})
+ set(cuda_extra_flags "-D__KERNEL_CUDA_EXPERIMENTAL__")
+ set(cuda_cubin kernel_experimental_${arch}.cubin)
+ else()
+ set(cuda_extra_flags "")
+ set(cuda_cubin kernel_${arch}.cubin)
+ endif()
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
set(cuda_math_flags "--use_fast_math")
- if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50")
- message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping")
- else()
- add_custom_command(
- OUTPUT ${cuda_cubin}
- COMMAND ${CUDA_NVCC_EXECUTABLE}
- -arch=${arch}
- -m${CUDA_BITS}
- --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
- -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
- --ptxas-options="-v"
- ${cuda_arch_flags}
- ${cuda_version_flags}
- ${cuda_math_flags}
- -I${CMAKE_CURRENT_SOURCE_DIR}/../util
- -I${CMAKE_CURRENT_SOURCE_DIR}/svm
- -DCCL_NAMESPACE_BEGIN=
- -DCCL_NAMESPACE_END=
- -DNVCC
-
- DEPENDS ${cuda_sources})
-
- delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
- list(APPEND cuda_cubins ${cuda_cubin})
- endif()
+ add_custom_command(
+ OUTPUT ${cuda_cubin}
+ COMMAND ${CUDA_NVCC_EXECUTABLE}
+ -arch=${arch}
+ -m${CUDA_BITS}
+ --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
+ -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
+ --ptxas-options="-v"
+ ${cuda_arch_flags}
+ ${cuda_version_flags}
+ ${cuda_math_flags}
+ ${cuda_extra_flags}
+ -I${CMAKE_CURRENT_SOURCE_DIR}/../util
+ -I${CMAKE_CURRENT_SOURCE_DIR}/svm
+ -DCCL_NAMESPACE_BEGIN=
+ -DCCL_NAMESPACE_END=
+ -DNVCC
+
+ DEPENDS ${cuda_sources})
+
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
+ list(APPEND cuda_cubins ${cuda_cubin})
+
+ unset(cuda_extra_flags)
+ endmacro()
+
+ foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
+ # Compile regular kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} FALSE)
+
+ # Compile experimental kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} TRUE)
endforeach()
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript
index cfe12e8533d..5a9e57c5342 100644
--- a/intern/cycles/kernel/SConscript
+++ b/intern/cycles/kernel/SConscript
@@ -69,8 +69,8 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
cuda_major_minor = re.findall(r'release (\d+).(\d+)', output)[0]
cuda_version = int(cuda_major_minor[0])*10 + int(cuda_major_minor[1])
- if cuda_version != 60:
- print("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported." % (cuda_version/10, cuda_version%10))
+ if cuda_version != 65:
+ print("CUDA version %d.%d detected, build may succeed but only CUDA 6.5 is officially supported." % (cuda_version/10, cuda_version%10))
# nvcc flags
nvcc_flags = "-m%s" % (bits)
@@ -83,30 +83,35 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h')
last_cubin_file = None
+ configs = (("kernel_%s.cubin", ''),
+ ("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__'))
+
# add command for each cuda architecture
for arch in cuda_archs:
- if cuda_version < 60 and arch == "sm_50":
- print("Can't build kernel for CUDA sm_50 architecture, skipping")
- continue
-
- cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
-
- if env['BF_CYCLES_CUDA_ENV']:
- MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
- command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, kernel_file, cubin_file)
- else:
- command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file)
-
- kernel.Command(cubin_file, 'kernel.cu', command)
- kernel.Depends(cubin_file, dependencies)
-
- kernel_binaries.append(cubin_file)
-
- if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']:
- # trick to compile one kernel at a time to reduce memory usage
- if last_cubin_file:
- kernel.Depends(cubin_file, last_cubin_file)
- last_cubin_file = cubin_file
+ for config in configs:
+ # TODO(sergey): Use dict instead ocouple in order to increase readability?
+ name = config[0]
+ extra_flags = config[1]
+
+ cubin_file = os.path.join(build_dir, name % arch)
+ current_flags = nvcc_flags + extra_flags
+
+ if env['BF_CYCLES_CUDA_ENV']:
+ MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
+ command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, current_flags, kernel_file, cubin_file)
+ else:
+ command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file)
+
+ kernel.Command(cubin_file, 'kernel.cu', command)
+ kernel.Depends(cubin_file, dependencies)
+
+ kernel_binaries.append(cubin_file)
+
+ if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']:
+ # trick to compile one kernel at a time to reduce memory usage
+ if last_cubin_file:
+ kernel.Depends(cubin_file, last_cubin_file)
+ last_cubin_file = cubin_file
Return('kernel_binaries')
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 81c239ea0c9..9961071c2ac 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -23,9 +23,7 @@
#include "../closure/bsdf_reflection.h"
#include "../closure/bsdf_refraction.h"
#include "../closure/bsdf_transparent.h"
-#ifdef __ANISOTROPIC__
#include "../closure/bsdf_ashikhmin_shirley.h"
-#endif
#include "../closure/bsdf_westin.h"
#include "../closure/bsdf_toon.h"
#include "../closure/bsdf_hair.h"
@@ -94,13 +92,11 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
-#ifdef __ANISOTROPIC__
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
-#endif
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
@@ -190,12 +186,10 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
break;
-#ifdef __ANISOTROPIC__
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
break;
-#endif
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
break;
@@ -260,12 +254,10 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
break;
-#ifdef __ANISOTROPIC__
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
break;
-#endif
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
break;
@@ -348,12 +340,10 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness)
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
bsdf_microfacet_beckmann_blur(sc, roughness);
break;
-#ifdef __ANISOTROPIC__
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
bsdf_ashikhmin_shirley_blur(sc, roughness);
break;
-#endif
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
bsdf_ashikhmin_velvet_blur(sc, roughness);
break;
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index 6a5d0410e01..ad7864cb8ea 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -77,7 +77,7 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c
NdotI = fmaxf(NdotI, 1e-6f);
NdotO = fmaxf(NdotO, 1e-6f);
float3 H = normalize(omega_in + I);
- float HdotI = fmaxf(dot(H, I), 1e-6f);
+ float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
float HdotN = fmaxf(dot(H, N), 1e-6f);
float pump = 1.0f / fmaxf(1e-6f, (HdotI*fmaxf(NdotO, NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index df0644becee..a0c59e6cbc0 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -362,7 +362,7 @@ ccl_device_inline float3 microfacet_sample_stretched(
* E. Heitz, Research Report 2014
*
* Anisotropy is only supported for reflection currently, but adding it for
- * tranmission is just a matter of copying code from reflection if needed. */
+ * transmission is just a matter of copying code from reflection if needed. */
ccl_device int bsdf_microfacet_ggx_setup(ShaderClosure *sc)
{
@@ -513,6 +513,10 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con
float cosHO = dot(Ht, I);
float cosHI = dot(Ht, omega_in);
+ /* those situations makes chi+ terms in eq. 33, 34 be zero */
+ if(dot(Ht, N) <= 0.0f || cosHO * cosNO <= 0.0f || cosHI * cosNI <= 0.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
float D, G1o, G1i;
/* eq. 33: first we calculate D(m) with m=Ht: */
@@ -862,7 +866,11 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc
float cosHO = dot(Ht, I);
float cosHI = dot(Ht, omega_in);
- /* eq. 33: first we calculate D(m) with m=Ht: */
+ /* those situations makes chi+ terms in eq. 25, 27 be zero */
+ if(dot(Ht, N) <= 0.0f || cosHO * cosNO <= 0.0f || cosHI * cosNI <= 0.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ /* eq. 25: first we calculate D(m) with m=Ht: */
float alpha2 = alpha_x * alpha_y;
float cosThetaM = min(dot(N, Ht), 1.0f);
float cosThetaM2 = cosThetaM * cosThetaM;
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index b3dcb9dcc38..05816bac2c1 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -111,16 +111,20 @@ ccl_device float fresnel_dielectric_cos(float cosi, float eta)
return 1.0f; // TIR(no refracted component)
}
-ccl_device float fresnel_conductor(float cosi, float eta, float k)
+#if 0
+ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k)
{
- float tmp_f = eta * eta + k * k;
- float tmp = tmp_f * cosi * cosi;
- float Rparl2 = (tmp - (2.0f * eta * cosi) + 1)/
- (tmp + (2.0f * eta * cosi) + 1);
- float Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi * cosi)/
- (tmp_f + (2.0f * eta * cosi) + cosi * cosi);
+ float3 cosi2 = make_float3(cosi*cosi);
+ float3 one = make_float3(1.0f, 1.0f, 1.0f);
+ float3 tmp_f = eta * eta + k * k;
+ float3 tmp = tmp_f * cosi2;
+ float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) /
+ (tmp + (2.0f * eta * cosi) + one);
+ float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) /
+ (tmp_f + (2.0f * eta * cosi) + cosi2);
return(Rparl2 + Rperp2) * 0.5f;
}
+#endif
ccl_device float smooth_step(float edge0, float edge1, float x)
{
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 863836ffcea..c4e9e2ababe 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -936,9 +936,10 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
sd->u = isect->u;
sd->v = 0.0f;
#endif
-
+
+ tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
+
if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
- tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
sd->Ng = normalize(-(D - tg * (dot(tg, D))));
}
else {
@@ -950,7 +951,6 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
float gd = isect->v;
if(gd != 0.0f) {
- tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
sd->Ng = sd->Ng - gd * tg;
sd->Ng = normalize(sd->Ng);
}
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 5ab0b731bdd..7409aa0d014 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -272,7 +272,11 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
#endif
/* compute face normal */
- float3 Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+ float3 Ng;
+ if(sd->flag & SD_NEGATIVE_SCALE_APPLIED)
+ Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
+ else
+ Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
sd->Ng = Ng;
sd->N = Ng;
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index f2f35c2efd0..41e9d183a96 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -18,7 +18,7 @@
/* Triangle Primitive
*
* Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH
- * ray intersection we use a precomputed triangle storage to accelarate
+ * ray intersection we use a precomputed triangle storage to accelerate
* intersection at the cost of more memory usage */
CCL_NAMESPACE_BEGIN
@@ -117,21 +117,24 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
}
/* normal on triangle */
-ccl_device_inline float3 triangle_normal(KernelGlobals *kg, int prim)
+ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd)
{
/* load triangle vertices */
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* return normal */
- return normalize(cross(v1 - v0, v2 - v0));
+ if(sd->flag & SD_NEGATIVE_SCALE_APPLIED)
+ return normalize(cross(v2 - v0, v1 - v0));
+ else
+ return normalize(cross(v1 - v0, v2 - v0));
}
/* point and normal on triangle */
-ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
+ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
{
/* load triangle vertices */
float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
@@ -144,8 +147,14 @@ ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float
float t = 1.0f - u - v;
*P = (u*v0 + v*v1 + t*v2);
+ /* get object flags, instance-aware */
+ int object_flag = kernel_tex_fetch(__object_flag, object >= 0 ? object : ~object);
+
/* compute normal */
- *Ng = normalize(cross(v1 - v0, v2 - v0));
+ if(object_flag & SD_NEGATIVE_SCALE_APPLIED)
+ *Ng = normalize(cross(v2 - v0, v1 - v0));
+ else
+ *Ng = normalize(cross(v1 - v0, v2 - v0));
/* shader`*/
*shader = __float_as_int(kernel_tex_fetch(__tri_shader, prim));
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl
index 2e0a49435a8..4f20ef9ca15 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernel.cl
@@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader(
ccl_global type *name,
#include "kernel_textures.h"
- int type, int sx, int sw, int sample)
+ int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake(
ccl_global type *name,
#include "kernel_textures.h"
- int type, int sx, int sw, int sample)
+ int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake(
int x = sx + get_global_id(0);
if(x < sx + sw)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample);
}
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index 42eb9a62518..fa2113fbb46 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -120,10 +120,10 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu
/* Shader Evaluation */
-void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
+void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index 9208acc232e..d5b5293664c 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -146,7 +146,7 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int sample)
+kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
@@ -156,12 +156,12 @@ kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int s
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample)
+kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
if(x < sx + sw)
- kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
+ kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, offset, sample);
}
#endif
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index 264e5e3e4d0..19e06b88797 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int sample);
+ int type, int i, int offset, int sample);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
@@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int sample);
+ int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
@@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int sample);
+ int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
@@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu
void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int sample);
+ int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
@@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff
void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int sample);
+ int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
@@ -95,7 +95,7 @@ void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int sample);
+ int type, int i, int offset, int sample);
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index b4f6dcdace9..b0efcdc66a7 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -32,10 +32,11 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
eval->transparent = make_float3(0.0f, 0.0f, 0.0f);
eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
if(type == CLOSURE_BSDF_TRANSPARENT_ID)
eval->transparent = value;
- else if(CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_PHASE(type))
+ else if(CLOSURE_IS_BSDF_DIFFUSE(type))
eval->diffuse = value;
else if(CLOSURE_IS_BSDF_GLOSSY(type))
eval->glossy = value;
@@ -43,6 +44,8 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
eval->transmission = value;
else if(CLOSURE_IS_BSDF_BSSRDF(type))
eval->subsurface = value;
+ else if(CLOSURE_IS_PHASE(type))
+ eval->scatter = value;
}
else
eval->diffuse = value;
@@ -51,11 +54,17 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
#endif
}
-ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
+/* TODO(sergey): This is just a workaround for annoying 6.5 compiler bug. */
+#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ < 500
+ccl_device_inline
+#else
+ccl_device_noinline
+#endif
+void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
{
#ifdef __PASSES__
if(eval->use_light_pass) {
- if(CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_PHASE(type))
+ if(CLOSURE_IS_BSDF_DIFFUSE(type))
eval->diffuse += value;
else if(CLOSURE_IS_BSDF_GLOSSY(type))
eval->glossy += value;
@@ -63,6 +72,8 @@ ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3
eval->transmission += value;
else if(CLOSURE_IS_BSDF_BSSRDF(type))
eval->subsurface += value;
+ else if(CLOSURE_IS_PHASE(type))
+ eval->scatter += value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -81,7 +92,8 @@ ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
&& is_zero(eval->glossy)
&& is_zero(eval->transmission)
&& is_zero(eval->transparent)
- && is_zero(eval->subsurface);
+ && is_zero(eval->subsurface)
+ && is_zero(eval->scatter);
}
else
return is_zero(eval->diffuse);
@@ -98,6 +110,7 @@ ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float3 value)
eval->glossy *= value;
eval->transmission *= value;
eval->subsurface *= value;
+ eval->scatter *= value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -111,7 +124,7 @@ ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float3 value)
/* Path Radiance
*
* We accumulate different render passes separately. After summing at the end
- * to get the combined result, it should be identical. We definte directly
+ * to get the combined result, it should be identical. We definite directly
* visible as the first non-transparent hit, while indirectly visible are the
* bounces after that. */
@@ -130,21 +143,25 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
L->color_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->color_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->color_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->path_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->path_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->path_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->path_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->emission = make_float3(0.0f, 0.0f, 0.0f);
L->background = make_float3(0.0f, 0.0f, 0.0f);
@@ -174,14 +191,16 @@ ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, float3 *throug
L->path_glossy = bsdf_eval->glossy*value;
L->path_transmission = bsdf_eval->transmission*value;
L->path_subsurface = bsdf_eval->subsurface*value;
+ L->path_scatter = bsdf_eval->scatter*value;
- *throughput = L->path_diffuse + L->path_glossy + L->path_transmission + L->path_subsurface;
+ *throughput = L->path_diffuse + L->path_glossy + L->path_transmission + L->path_subsurface + L->path_scatter;
L->direct_throughput = *throughput;
}
else {
/* transparent bounce before first hit, or indirectly visible through BSDF */
- float3 sum = (bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->transparent + bsdf_eval->subsurface)*inverse_pdf;
+ float3 sum = (bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->transparent +
+ bsdf_eval->subsurface + bsdf_eval->scatter) * inverse_pdf;
*throughput *= sum;
}
}
@@ -241,6 +260,7 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L, float3 through
L->direct_glossy += throughput*bsdf_eval->glossy*shadow;
L->direct_transmission += throughput*bsdf_eval->transmission*shadow;
L->direct_subsurface += throughput*bsdf_eval->subsurface*shadow;
+ L->direct_scatter += throughput*bsdf_eval->scatter*shadow;
if(is_lamp) {
L->shadow.x += shadow.x*shadow_fac;
@@ -250,7 +270,7 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L, float3 through
}
else {
/* indirectly visible lighting after BSDF bounce */
- float3 sum = bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->subsurface;
+ float3 sum = bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->subsurface + bsdf_eval->scatter;
L->indirect += throughput*sum*shadow;
}
}
@@ -291,12 +311,14 @@ ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
L->direct_glossy += L->path_glossy*L->direct_emission;
L->direct_transmission += L->path_transmission*L->direct_emission;
L->direct_subsurface += L->path_subsurface*L->direct_emission;
+ L->direct_scatter += L->path_scatter*L->direct_emission;
L->indirect = safe_divide_color(L->indirect, L->direct_throughput);
L->indirect_diffuse += L->path_diffuse*L->indirect;
L->indirect_glossy += L->path_glossy*L->indirect;
L->indirect_transmission += L->path_transmission*L->indirect;
L->indirect_subsurface += L->path_subsurface*L->indirect;
+ L->indirect_scatter += L->path_scatter*L->indirect;
}
#endif
}
@@ -309,6 +331,7 @@ ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
L->path_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->path_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->path_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
L->indirect = make_float3(0.0f, 0.0f, 0.0f);
@@ -327,8 +350,8 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
if(L->use_light_pass) {
path_radiance_sum_indirect(L);
- L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->emission;
- L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface;
+ L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->direct_scatter + L->emission;
+ L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface + L->indirect_scatter;
if(!kernel_data.background.transparent)
L_direct += L->background;
@@ -344,11 +367,13 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->emission = make_float3(0.0f, 0.0f, 0.0f);
}
@@ -368,6 +393,7 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
L->direct_glossy *= scale;
L->direct_transmission *= scale;
L->direct_subsurface *= scale;
+ L->direct_scatter *= scale;
L->emission *= scale;
L->background *= scale;
}
@@ -382,6 +408,7 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
L->indirect_glossy *= scale;
L->indirect_transmission *= scale;
L->indirect_subsurface *= scale;
+ L->indirect_scatter *= scale;
}
/* Sum again, after clamping */
@@ -416,11 +443,13 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance
L->direct_glossy += L_sample->direct_glossy*fac;
L->direct_transmission += L_sample->direct_transmission*fac;
L->direct_subsurface += L_sample->direct_subsurface*fac;
+ L->direct_scatter += L_sample->direct_scatter*fac;
L->indirect_diffuse += L_sample->indirect_diffuse*fac;
L->indirect_glossy += L_sample->indirect_glossy*fac;
L->indirect_transmission += L_sample->indirect_transmission*fac;
L->indirect_subsurface += L_sample->indirect_subsurface*fac;
+ L->indirect_scatter += L_sample->indirect_scatter*fac;
L->emission += L_sample->emission*fac;
L->background += L_sample->background*fac;
diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp
index d612a82b785..e7ff21a6f09 100644
--- a/intern/cycles/kernel/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernel_avx.cpp
@@ -68,10 +68,10 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float
/* Shader Evaluate */
-void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
+void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernel_avx2.cpp
index 339421a002b..cb1662bbfbe 100644
--- a/intern/cycles/kernel/kernel_avx2.cpp
+++ b/intern/cycles/kernel/kernel_avx2.cpp
@@ -69,10 +69,10 @@ void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
-void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
+void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index c2d14b7f835..dfbb49db7e6 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -172,7 +172,8 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u)
}
#endif
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
+ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output,
+ ShaderEvalType type, int i, int offset, int sample)
{
ShaderData sd;
uint4 in = input[i * 2];
@@ -197,7 +198,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
int num_samples = kernel_data.integrator.aa_samples;
/* random number generator */
- RNG rng = cmj_hash(i, 0);
+ RNG rng = cmj_hash(offset + i, 0);
#if 0
uint rng_state = cmj_hash(i, 0);
@@ -215,7 +216,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
int shader;
float3 P, Ng;
- triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader);
+ triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
/* dummy initilizations copied from SHADER_EVAL_DISPLACE */
float3 I = Ng;
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 6b03abe9708..5c83358a56d 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -21,16 +21,22 @@ CCL_NAMESPACE_BEGIN
ccl_device float2 camera_sample_aperture(KernelGlobals *kg, float u, float v)
{
float blades = kernel_data.cam.blades;
+ float2 bokeh;
if(blades == 0.0f) {
/* sample disk */
- return concentric_sample_disk(u, v);
+ bokeh = concentric_sample_disk(u, v);
}
else {
/* sample polygon */
float rotation = kernel_data.cam.bladesrotation;
- return regular_polygon_sample(blades, rotation, u, v);
+ bokeh = regular_polygon_sample(blades, rotation, u, v);
}
+
+ /* anamorphic lens bokeh */
+ bokeh.x *= kernel_data.cam.inv_aperture_ratio;
+
+ return bokeh;
}
ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray)
@@ -183,7 +189,8 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float
/* calculate orthonormal coordinates perpendicular to D */
float3 U, V;
- make_orthonormals(D, &U, &V);
+ U = normalize(make_float3(1.0f, 0.0f, 0.0f) - D.x * D);
+ V = normalize(cross(D, U));
/* update ray for effect of lens */
ray->P = U * lensuv.x + V * lensuv.y;
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index bda98b84da8..4b2bb723ab6 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -108,6 +108,8 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
if(ls->shader & SHADER_EXCLUDE_TRANSMIT)
eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
+ if(ls->shader & SHADER_EXCLUDE_SCATTER)
+ eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
}
#endif
@@ -187,7 +189,8 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *st
if(ls.shader & SHADER_EXCLUDE_ANY) {
if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
((ls.shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) ||
- ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)))
+ ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
+ ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
continue;
}
#endif
@@ -231,7 +234,8 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *sta
if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
((shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) ||
((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
- ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)))
+ ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) ||
+ ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
return make_float3(0.0f, 0.0f, 0.0f);
}
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 7a850844bf2..2a5b7689e57 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -14,6 +14,8 @@
* limitations under the License
*/
+/* TODO(sergey): Consider moving portable ctz/clz stuff to util. */
+
CCL_NAMESPACE_BEGIN
/* "Correlated Multi-Jittered Sampling"
@@ -35,8 +37,16 @@ ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
/* a must be > 0 and b must be > 1 */
ccl_device_inline int cmj_fast_div_pow2(int a, int b)
{
-#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER)
+ kernel_assert(a > 0);
+ kernel_assert(b > 1);
+#if defined(__KERNEL_SSE2__)
+# ifdef _MSC_VER
+ unsigned long ctz;
+ _BitScanForward(&ctz, b);
+ return a >> ctz;
+# else
return a >> __builtin_ctz(b);
+# endif
#else
return a/b;
#endif
@@ -44,8 +54,15 @@ ccl_device_inline int cmj_fast_div_pow2(int a, int b)
ccl_device_inline uint cmj_w_mask(uint w)
{
-#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER)
+ kernel_assert(w > 1);
+#if defined(__KERNEL_SSE2__)
+# ifdef _MSC_VER
+ unsigned long leading_zero;
+ _BitScanReverse(&leading_zero, w);
+ return ((1 << (1 + leading_zero)) - 1);
+# else
return ((1 << (32 - __builtin_clz(w))) - 1);
+# endif
#else
w |= w >> 1;
w |= w >> 2;
@@ -165,7 +182,8 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
smodm = cmj_fast_mod_pow2(s, m);
}
else {
- sdivm = float_to_int(s * invm);
+ /* Doing s*inmv gives precision issues here. */
+ sdivm = s / m;
smodm = s - sdivm*m;
}
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 0adf9ed4666..e7f62f230f8 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -27,7 +27,7 @@ typedef struct LightSample {
float pdf; /* light sampling probability density function */
float eval_fac; /* intensity multiplier */
int object; /* object id for triangle/curve lights */
- int prim; /* primitive id for triangle/curve ligths */
+ int prim; /* primitive id for triangle/curve lights */
int shader; /* shader id */
int lamp; /* lamp id */
LightType type; /* type of light */
@@ -208,8 +208,8 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3
return t*t/cos_pi;
}
-ccl_device bool lamp_light_sample(KernelGlobals *kg, int lamp,
- float randu, float randv, float3 P, LightSample *ls, bool for_volume)
+ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
+ float randu, float randv, float3 P, LightSample *ls)
{
float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0);
float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1);
@@ -224,11 +224,6 @@ ccl_device bool lamp_light_sample(KernelGlobals *kg, int lamp,
ls->v = randv;
if(type == LIGHT_DISTANT) {
-#ifdef __VOLUME__
- if(for_volume)
- return false;
-#endif
-
/* distant light */
float3 lightD = make_float3(data0.y, data0.z, data0.w);
float3 D = lightD;
@@ -249,11 +244,6 @@ ccl_device bool lamp_light_sample(KernelGlobals *kg, int lamp,
}
#ifdef __BACKGROUND_MIS__
else if(type == LIGHT_BACKGROUND) {
-#ifdef __VOLUME__
- if(for_volume)
- return false;
-#endif
-
/* infinite area light (e.g. light dome or env light) */
float3 D = background_light_sample(kg, randu, randv, &ls->pdf);
@@ -309,8 +299,6 @@ ccl_device bool lamp_light_sample(KernelGlobals *kg, int lamp,
ls->eval_fac *= kernel_data.integrator.inv_pdf_lights;
ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
}
-
- return true;
}
ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
@@ -469,7 +457,7 @@ ccl_device void triangle_light_sample(KernelGlobals *kg, int prim, int object,
v = randv*randu;
/* triangle, so get position, normal, shader */
- triangle_point_normal(kg, prim, u, v, &ls->P, &ls->Ng, &ls->shader);
+ triangle_point_normal(kg, object, prim, u, v, &ls->P, &ls->Ng, &ls->shader);
ls->object = object;
ls->prim = prim;
ls->lamp = LAMP_NONE;
@@ -526,7 +514,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float randt)
/* Generic Light */
-ccl_device bool light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, LightSample *ls, bool for_volume)
+ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, LightSample *ls)
{
/* sample index */
int index = light_distribution_sample(kg, randt);
@@ -545,12 +533,10 @@ ccl_device bool light_sample(KernelGlobals *kg, float randt, float randu, float
ls->D = normalize_len(ls->P - P, &ls->t);
ls->pdf = triangle_light_pdf(kg, ls->Ng, -ls->D, ls->t);
ls->shader |= shader_flag;
-
- return true;
}
else {
int lamp = -prim-1;
- return lamp_light_sample(kg, lamp, randu, randv, P, ls, for_volume);
+ lamp_light_sample(kg, lamp, randu, randv, P, ls);
}
}
@@ -560,11 +546,6 @@ ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
return __float_as_int(data3.x);
}
-ccl_device bool light_select(KernelGlobals *kg, int index, float randu, float randv, float3 P, LightSample *ls, bool for_volume)
-{
- return lamp_light_sample(kg, index, randu, randv, P, ls, for_volume);
-}
-
ccl_device int lamp_light_eval_sample(KernelGlobals *kg, float randt)
{
/* sample index */
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 0c033f6234c..515854b6e9c 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -89,6 +89,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
volume_ray.t = (hit)? isect.t: FLT_MAX;
bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
+
+#ifdef __VOLUME_DECOUPLED__
int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
@@ -135,28 +137,32 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) {
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f))
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray))
continue;
else
break;
}
}
- else {
+ else
+#endif
+ {
/* integrate along volume segment with distance sampling */
ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &state, &volume_sd, &volume_ray, L, &throughput, rng);
+ kg, &state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
+#ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
- kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L, 1.0f);
+ kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L);
/* indirect light bounce */
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f))
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray))
continue;
else
break;
}
+#endif
}
}
#endif
@@ -470,6 +476,8 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
volume_ray.t = (hit)? isect.t: FLT_MAX;
bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
+
+#ifdef __VOLUME_DECOUPLED__
int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
@@ -516,28 +524,32 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) {
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f))
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
continue;
else
break;
}
}
- else {
+ else
+#endif
+ {
/* integrate along volume segment with distance sampling */
ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng);
+ kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
+#ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
- kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L, 1.0f);
+ kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
/* indirect light bounce */
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f))
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
continue;
else
break;
}
+#endif
}
}
#endif
@@ -803,10 +815,11 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
-#ifdef __KERNEL_CPU__
+#ifdef __VOLUME_DECOUPLED__
/* decoupled ray marching only supported on CPU */
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
@@ -850,16 +863,17 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
&ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+ (void)result;
+ kernel_assert(result == VOLUME_PATH_SCATTERED);
- if(result == VOLUME_PATH_SCATTERED) {
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
- kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
+ kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(&L);
- path_radiance_reset_indirect(&L);
- }
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&L);
+ path_radiance_reset_indirect(&L);
}
}
}
@@ -883,21 +897,22 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
PathState ps = state;
Ray pray = ray;
ShaderData volume_sd;
- float3 tp = throughput;
+ float3 tp = throughput * num_samples_inv;
/* branch RNG state */
path_state_branch(&ps, j, num_samples);
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng);
+ kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
+#ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* todo: support equiangular, MIS and all light sampling.
* alternatively get decoupled ray marching working on the GPU */
- kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L, num_samples_inv);
+ kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
- kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
+ kernel_path_indirect(kg, rng, pray, tp, num_samples, ps, &L);
/* for render passes, sum and reset indirect light pass variables
* for the next samples */
@@ -905,6 +920,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
path_radiance_reset_indirect(&L);
}
}
+#endif
}
/* todo: avoid this calculation using decoupled ray marching */
@@ -941,7 +957,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
/* holdout */
#ifdef __HOLDOUT__
- if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK))) {
+ if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) {
if(kernel_data.background.transparent) {
float3 holdout_weight;
@@ -1031,6 +1047,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
ray.P = ray_offset(sd.P, -sd.Ng);
ray.t -= sd.ray_length; /* clipping works through transparent */
+
+#ifdef __RAY_DIFFERENTIALS__
+ ray.dP = sd.dP;
+ ray.dD.dx = -sd.dI.dx;
+ ray.dD.dy = -sd.dI.dy;
+#endif
+
#ifdef __VOLUME__
/* enter/exit volume */
kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 7700abfbdae..9553c2da0df 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -50,7 +50,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN
path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
LightSample ls;
- light_select(kg, i, light_u, light_v, sd->P, &ls, false);
+ lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls);
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
/* trace shadow ray */
@@ -82,7 +82,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN
light_t = 0.5f*light_t;
LightSample ls;
- light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, false);
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
/* trace shadow ray */
@@ -103,7 +103,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
LightSample ls;
- light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, false);
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
/* sample random light */
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
@@ -200,7 +200,7 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG
#endif
LightSample ls;
- light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, false);
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
/* trace shadow ray */
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index 8453b79de45..da2d5e6eca8 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -16,11 +16,10 @@
CCL_NAMESPACE_BEGIN
-#ifdef __VOLUME__
+#ifdef __VOLUME_SCATTER__
ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L,
- float num_samples_adjust)
+ ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L)
{
#ifdef __EMISSION__
if(!kernel_data.integrator.use_direct_light)
@@ -41,9 +40,8 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
light_ray.time = sd->time;
#endif
- if(!light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, true))
- return;
- else if(ls.pdf == 0.0f)
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
+ if(ls.pdf == 0.0f)
return;
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
@@ -52,15 +50,14 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
/* accumulate */
- path_radiance_accum_light(L, throughput * num_samples_adjust, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
}
}
#endif
}
ccl_device bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray,
- float num_samples_adjust)
+ ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
{
/* sample phase function */
float phase_pdf;
@@ -135,26 +132,22 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
LightSample ls;
- if(!light_select(kg, i, light_u, light_v, ray->P, &ls, true))
- continue;
+ lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
float3 tp = throughput;
/* sample position on volume segment */
- if(segment) {
- float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE);
- float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+ float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE);
+ float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE);
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+
+ (void)result;
+ kernel_assert(result == VOLUME_PATH_SCATTERED);
- if(result != VOLUME_PATH_SCATTERED)
- continue;
-
- /* todo: split up light_sample so we don't have to call it again with new position */
- if(!light_select(kg, i, light_u, light_v, sd->P, &ls, true))
- continue;
- }
+ /* todo: split up light_sample so we don't have to call it again with new position */
+ lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls);
if(ls.pdf == 0.0f)
continue;
@@ -190,26 +183,22 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
light_t = 0.5f*light_t;
LightSample ls;
- if(!light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls, true))
- continue;
+ light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls);
float3 tp = throughput;
/* sample position on volume segment */
- if(segment) {
- float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE);
- float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+ float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE);
+ float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE);
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+
+ (void)result;
+ kernel_assert(result == VOLUME_PATH_SCATTERED);
- if(result != VOLUME_PATH_SCATTERED)
- continue;
-
- /* todo: split up light_sample so we don't have to call it again with new position */
- if(!light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, true))
- continue;
- }
+ /* todo: split up light_sample so we don't have to call it again with new position */
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
if(ls.pdf == 0.0f)
continue;
@@ -233,26 +222,22 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
LightSample ls;
- if(!light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls, true))
- return;
+ light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls);
float3 tp = throughput;
/* sample position on volume segment */
- if(segment) {
- float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
- float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
+ float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
+ float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+
+ (void)result;
+ kernel_assert(result == VOLUME_PATH_SCATTERED);
- if(result != VOLUME_PATH_SCATTERED)
- return;
-
- /* todo: split up light_sample so we don't have to call it again with new position */
- if(!light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, true))
- return;
- }
+ /* todo: split up light_sample so we don't have to call it again with new position */
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
if(ls.pdf == 0.0f)
return;
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 842b9f68840..fc61f1a9c2c 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -86,7 +86,7 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
#endif
if(sd->type & PRIMITIVE_TRIANGLE) {
/* static triangle */
- float3 Ng = triangle_normal(kg, sd->prim);
+ float3 Ng = triangle_normal(kg, sd);
sd->shader = __float_as_int(kernel_tex_fetch(__tri_shader, sd->prim));
/* vectors */
@@ -165,7 +165,7 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
/* fetch triangle data */
if(sd->type == PRIMITIVE_TRIANGLE) {
- float3 Ng = triangle_normal(kg, sd->prim);
+ float3 Ng = triangle_normal(kg, sd);
sd->shader = __float_as_int(kernel_tex_fetch(__tri_shader, sd->prim));
/* static triangle */
@@ -340,7 +340,7 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
int shader;
- triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader);
+ triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
/* force smooth shading for displacement */
shader |= SHADER_SMOOTH_NORMAL;
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 67bd0943b1b..740998e8c92 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -64,10 +64,10 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
-void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
+void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 40d621b66f6..da73a3a1c97 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -66,10 +66,10 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
-void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
+void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index 4b48d10b020..5704f60e138 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -67,10 +67,10 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo
/* Shader Evaluate */
-void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
+void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index d81909a623a..933202ea498 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -66,6 +66,8 @@ CCL_NAMESPACE_BEGIN
#define __SUBSURFACE__
#define __CMJ__
#define __VOLUME__
+#define __VOLUME_DECOUPLED__
+#define __VOLUME_SCATTER__
#define __SHADOW_RECORD_ALL__
#endif
@@ -73,10 +75,15 @@ CCL_NAMESPACE_BEGIN
#define __KERNEL_SHADING__
#define __KERNEL_ADV_SHADING__
#define __BRANCHED_PATH__
+#define __VOLUME__
+#define __VOLUME_SCATTER__
/* Experimental on GPU */
-//#define __VOLUME__
-//#define __SUBSURFACE__
+#ifdef __KERNEL_CUDA_EXPERIMENTAL__
+#define __SUBSURFACE__
+#define __CMJ__
+#endif
+
#endif
#ifdef __KERNEL_OPENCL__
@@ -103,7 +110,6 @@ CCL_NAMESPACE_BEGIN
#define __BACKGROUND_MIS__
#define __LAMP_MIS__
#define __AO__
-#define __ANISOTROPIC__
//#define __CAMERA_MOTION__
//#define __OBJECT_MOTION__
//#define __HAIR__
@@ -134,11 +140,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SHADING__
#define __SVM__
#define __EMISSION__
-#define __PROCEDURAL_TEXTURES__
-#define __IMAGE_TEXTURES__
+#define __TEXTURES__
#define __EXTRA_NODES__
#define __HOLDOUT__
-#define __NORMAL_MAP__
#endif
#ifdef __KERNEL_ADV_SHADING__
@@ -148,7 +152,6 @@ CCL_NAMESPACE_BEGIN
#define __BACKGROUND_MIS__
#define __LAMP_MIS__
#define __AO__
-#define __ANISOTROPIC__
#define __CAMERA_MOTION__
#define __OBJECT_MOTION__
#define __HAIR__
@@ -223,10 +226,9 @@ enum PathTraceDimension {
PRNG_PHASE_V = 9,
PRNG_PHASE = 10,
PRNG_SCATTER_DISTANCE = 11,
- PRNG_BOUNCE_NUM = 12,
-#else
- PRNG_BOUNCE_NUM = 8,
#endif
+
+ PRNG_BOUNCE_NUM = 12,
};
enum SamplingPattern {
@@ -252,17 +254,17 @@ enum PathRayFlag {
PATH_RAY_SHADOW_TRANSPARENT = 256,
PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),
- PATH_RAY_CURVE = 512, /* visibility flag to define curve segments*/
+ PATH_RAY_CURVE = 512, /* visibility flag to define curve segments */
+ PATH_RAY_VOLUME_SCATTER = 1024, /* volume scattering */
/* note that these can use maximum 12 bits, the other are for layers */
- PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512),
+ PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512|1024),
- PATH_RAY_MIS_SKIP = 1024,
- PATH_RAY_DIFFUSE_ANCESTOR = 2048,
- PATH_RAY_GLOSSY_ANCESTOR = 4096,
- PATH_RAY_BSSRDF_ANCESTOR = 8192,
- PATH_RAY_SINGLE_PASS_DONE = 16384,
- PATH_RAY_VOLUME_SCATTER = 32768,
+ PATH_RAY_MIS_SKIP = 2048,
+ PATH_RAY_DIFFUSE_ANCESTOR = 4096,
+ PATH_RAY_GLOSSY_ANCESTOR = 8192,
+ PATH_RAY_BSSRDF_ANCESTOR = 16384,
+ PATH_RAY_SINGLE_PASS_DONE = 32768,
/* we need layer member flags to be the 20 upper bits */
PATH_RAY_LAYER_SHIFT = (32-20)
@@ -332,21 +334,25 @@ typedef struct PathRadiance {
float3 color_glossy;
float3 color_transmission;
float3 color_subsurface;
+ float3 color_scatter;
float3 direct_diffuse;
float3 direct_glossy;
float3 direct_transmission;
float3 direct_subsurface;
+ float3 direct_scatter;
float3 indirect_diffuse;
float3 indirect_glossy;
float3 indirect_transmission;
float3 indirect_subsurface;
+ float3 indirect_scatter;
float3 path_diffuse;
float3 path_glossy;
float3 path_transmission;
float3 path_subsurface;
+ float3 path_scatter;
float4 shadow;
float mist;
@@ -360,6 +366,7 @@ typedef struct BsdfEval {
float3 transmission;
float3 transparent;
float3 subsurface;
+ float3 scatter;
} BsdfEval;
#else
@@ -380,7 +387,8 @@ typedef enum ShaderFlag {
SHADER_EXCLUDE_GLOSSY = (1 << 26),
SHADER_EXCLUDE_TRANSMIT = (1 << 25),
SHADER_EXCLUDE_CAMERA = (1 << 24),
- SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA),
+ SHADER_EXCLUDE_SCATTER = (1 << 23),
+ SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA|SHADER_EXCLUDE_SCATTER),
SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS|SHADER_EXCLUDE_ANY)
} ShaderFlag;
@@ -392,10 +400,8 @@ typedef enum LightType {
LIGHT_DISTANT,
LIGHT_BACKGROUND,
LIGHT_AREA,
- LIGHT_AO,
LIGHT_SPOT,
- LIGHT_TRIANGLE,
- LIGHT_STRAND
+ LIGHT_TRIANGLE
} LightType;
/* Camera Type */
@@ -526,17 +532,15 @@ typedef struct ShaderClosure {
ClosureType type;
float3 weight;
- float sample_weight;
-
float data0;
float data1;
+ float data2;
float3 N;
-#if defined(__ANISOTROPIC__) || defined(__SUBSURFACE__) || defined(__HAIR__)
float3 T;
-#endif
+
+ float sample_weight;
- float data2;
#ifdef __OSL__
void *prim;
#endif
@@ -596,7 +600,8 @@ enum ShaderDataFlag {
/* object flags */
SD_HOLDOUT_MASK = 524288, /* holdout for camera rays */
SD_OBJECT_MOTION = 1048576, /* has object motion blur */
- SD_TRANSFORM_APPLIED = 2097152, /* vertices have transform applied */
+ SD_TRANSFORM_APPLIED = 2097152, /* vertices have transform applied */
+ SD_NEGATIVE_SCALE_APPLIED = 4194304, /* vertices have negative scale applied */
SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED)
};
@@ -762,9 +767,12 @@ typedef struct KernelCamera {
/* render size */
float width, height;
int resolution;
+
+ /* anamorphic lens bokeh */
+ float inv_aperture_ratio;
+
int pad1;
int pad2;
- int pad3;
/* more matrices */
Transform screentoworld;
@@ -866,7 +874,8 @@ typedef struct KernelIntegrator {
int transparent_shadows;
/* caustics */
- int no_caustics;
+ int caustics_reflective;
+ int caustics_refractive;
float filter_glossy;
/* seed */
@@ -927,7 +936,6 @@ typedef enum CurveFlag {
} CurveFlag;
typedef struct KernelCurves {
- /* strand intersect and normal parameters - many can be changed to flags */
int curveflags;
int subdivisions;
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index cc4b2e3edf1..ea02ede10cd 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -176,6 +176,8 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* compute extinction at the start */
float t = 0.0f;
+ float3 sum = make_float3(0.0f, 0.0f, 0.0f);
+
for(int i = 0; i < max_steps; i++) {
/* advance to new position */
float new_t = min(ray->t, (i+1) * step);
@@ -190,20 +192,26 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* compute attenuation over segment */
if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
- /* todo: we could avoid computing expf() for each step by summing,
- * because exp(a)*exp(b) = exp(a+b), but we still want a quick
- * tp_eps check too */
- tp *= volume_color_transmittance(sigma_t, new_t - t);
-
- /* stop if nearly all light blocked */
- if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
- break;
+ /* Compute expf() only for every Nth step, to save some calculations
+ * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
+
+ sum += (-sigma_t * (new_t - t));
+ if((i & 0x07) == 0) { /* ToDo: Other interval? */
+ tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
+
+ /* stop if nearly all light is blocked */
+ if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
+ break;
+ }
}
/* stop if at the end of the volume */
t = new_t;
- if(t == ray->t)
+ if(t == ray->t) {
+ /* Update throughput in case we haven't done it above */
+ tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
break;
+ }
}
*throughput = tp;
@@ -326,6 +334,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
float t = ray->t;
float3 new_tp;
+#ifdef __VOLUME_SCATTER__
/* randomly scatter, and if we do t is shortened */
if(closure_flag & SD_SCATTER) {
/* extinction coefficient */
@@ -379,7 +388,9 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
new_tp = *throughput * transmittance / pdf;
}
}
- else if(closure_flag & SD_ABSORPTION) {
+ else
+#endif
+ if(closure_flag & SD_ABSORPTION) {
/* absorption only, no sampling needed */
float3 transmittance = volume_color_transmittance(coeff.sigma_a, t);
new_tp = *throughput * transmittance;
@@ -456,6 +467,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
bool scatter = false;
/* distance sampling */
+#ifdef __VOLUME_SCATTER__
if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_ABSORPTION))) {
has_scatter = true;
@@ -491,7 +503,9 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
xi = 1.0f - (1.0f - xi)/sample_transmittance;
}
}
- else if(closure_flag & SD_ABSORPTION) {
+ else
+#endif
+ if(closure_flag & SD_ABSORPTION) {
/* absorption only, no sampling needed */
float3 sigma_a = coeff.sigma_a;
@@ -546,13 +560,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
* between the endpoints. distance sampling is used to decide if we will
* scatter or not. */
ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
- PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng)
+ PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
{
/* workaround to fix correlation bug in T38710, can find better solution
* in random number generator later, for now this is done here to not impact
* performance of rendering without volumes */
RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
@@ -724,16 +737,15 @@ ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *s
* scattering, they always scatter if there is any non-zero scattering
* coefficient.
*
- * these also do not do emission or modify throughput. */
+ * these also do not do emission or modify throughput.
+ *
+ * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
float3 *throughput, float rphase, float rscatter,
const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
{
- int closure_flag = segment->closure_flag;
-
- if(!(closure_flag & SD_SCATTER))
- return VOLUME_PATH_MISSED;
+ kernel_assert(segment->closure_flag & SD_SCATTER);
/* pick random color channel, we use the Veach one-sample
* model with balance heuristic for the channels */
@@ -845,15 +857,33 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
if(segment->numsteps > 1) {
- /* todo: optimize using binary search */
float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < segment->numsteps-1; i++, step++) {
- if(sample_t < step->t)
+ int numsteps = segment->numsteps;
+ int high = numsteps - 1;
+ int low = 0;
+ int mid;
+
+ while(low < high) {
+ mid = (low + high) >> 1;
+
+ if(sample_t < step[mid].t)
+ high = mid;
+ else if(sample_t >= step[mid + 1].t)
+ low = mid + 1;
+ else {
+ /* found our interval in step[mid] .. step[mid+1] */
+ prev_t = step[mid].t;
+ prev_cdf_distance = step[mid].cdf_distance;
+ step += mid+1;
break;
+ }
+ }
- prev_t = step->t;
- prev_cdf_distance = step->cdf_distance;
+ if(low >= numsteps - 1) {
+ prev_t = step[numsteps - 1].t;
+ prev_cdf_distance = step[numsteps-1].cdf_distance;
+ step += numsteps - 1;
}
/* pdf for picking step with distance sampling */
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index 94337290d20..84ef85e089d 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -66,18 +66,6 @@ ClosureParam *closure_bssrdf_cubic_params()
static ClosureParam params[] = {
CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N),
CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius),
- //CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1),
- CLOSURE_STRING_KEYPARAM("label"),
- CLOSURE_FINISH_PARAM(CubicBSSRDFClosure)
- };
- return params;
-}
-
-ClosureParam *closure_bssrdf_cubic_extended_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N),
- CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius),
CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1),
CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.T.x),
CLOSURE_STRING_KEYPARAM("label"),
@@ -107,18 +95,6 @@ ClosureParam *closure_bssrdf_gaussian_params()
static ClosureParam params[] = {
CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N),
CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius),
- //CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1),
- CLOSURE_STRING_KEYPARAM("label"),
- CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure)
- };
- return params;
-}
-
-ClosureParam *closure_bssrdf_gaussian_extended_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N),
- CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius),
CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1),
CLOSURE_STRING_KEYPARAM("label"),
CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure)
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index 2d716103b85..d7789edcfff 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -105,7 +105,7 @@ BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, ashikhmin_velvet, LA
CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, sc.data0),
BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
-BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, ashikhmin_shirley, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, ashikhmin_shirley, LABEL_GLOSSY|LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, sc.N),
CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, sc.T),
CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, sc.data0),
@@ -124,37 +124,37 @@ BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, glossy_toon, LABEL_GLOSSY)
CLOSURE_FLOAT_PARAM(GlossyToonClosure, sc.data1),
BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, microfacet_ggx, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, microfacet_ggx, LABEL_GLOSSY|LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, sc.N),
CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, sc.data0),
BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, microfacet_ggx, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, microfacet_ggx, LABEL_GLOSSY|LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, sc.N),
CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, sc.T),
CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, sc.data0),
CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, sc.data1),
BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, microfacet_beckmann, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, microfacet_beckmann, LABEL_GLOSSY|LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, sc.N),
CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, sc.data0),
BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, microfacet_beckmann, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, microfacet_beckmann, LABEL_GLOSSY|LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, sc.N),
CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, sc.T),
CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, sc.data0),
CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, sc.data1),
BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, microfacet_ggx, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, microfacet_ggx, LABEL_GLOSSY|LABEL_TRANSMIT)
CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, sc.N),
CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, sc.data0),
CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, sc.data2),
BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, microfacet_beckmann, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, microfacet_beckmann, LABEL_GLOSSY|LABEL_TRANSMIT)
CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, sc.N),
CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, sc.data0),
CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, sc.data2),
@@ -244,8 +244,6 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare);
register_closure(ss, "glossy_toon", id++,
bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
- register_closure(ss, "specular_toon", id++,
- bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
register_closure(ss, "westin_backscatter", id++,
bsdf_westin_backscatter_params(), bsdf_westin_backscatter_prepare);
register_closure(ss, "westin_sheen", id++,
@@ -267,10 +265,6 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
closure_bssrdf_cubic_params(), closure_bssrdf_cubic_prepare);
register_closure(ss, "bssrdf_gaussian", id++,
closure_bssrdf_gaussian_params(), closure_bssrdf_gaussian_prepare);
- register_closure(ss, "bssrdf_cubic", id++,
- closure_bssrdf_cubic_extended_params(), closure_bssrdf_cubic_prepare);
- register_closure(ss, "bssrdf_gaussian", id++,
- closure_bssrdf_gaussian_extended_params(), closure_bssrdf_gaussian_prepare);
register_closure(ss, "hair_reflection", id++,
bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare);
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index a543907e884..58d215295dc 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -52,8 +52,6 @@ OSL::ClosureParam *closure_westin_backscatter_params();
OSL::ClosureParam *closure_westin_sheen_params();
OSL::ClosureParam *closure_bssrdf_cubic_params();
OSL::ClosureParam *closure_bssrdf_gaussian_params();
-OSL::ClosureParam *closure_bssrdf_cubic_extended_params();
-OSL::ClosureParam *closure_bssrdf_gaussian_extended_params();
OSL::ClosureParam *closure_henyey_greenstein_volume_params();
void closure_emission_prepare(OSL::RendererServices *, int id, void *data);
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 6a59a381f48..38cb5061346 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -126,7 +126,7 @@ void OSLRenderServices::thread_init(KernelGlobals *kernel_globals_, OSL::Texture
osl_ts = osl_ts_;
}
-bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
@@ -156,7 +156,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr
return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
@@ -186,7 +186,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform
return false;
}
-bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from, float time)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time)
{
KernelGlobals *kg = kernel_globals;
@@ -218,7 +218,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from, float ti
return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time)
{
KernelGlobals *kg = kernel_globals;
@@ -250,7 +250,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to, fl
return false;
}
-bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
@@ -275,7 +275,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr
return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
@@ -300,7 +300,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform
return false;
}
-bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from)
{
KernelGlobals *kg = kernel_globals;
@@ -328,7 +328,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from)
return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to)
{
KernelGlobals *kg = kernel_globals;
@@ -356,7 +356,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to)
return false;
}
-bool OSLRenderServices::get_array_attribute(void *renderstate, bool derivatives,
+bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives,
ustring object, TypeDesc type, ustring name,
int index, void *val)
{
@@ -479,7 +479,7 @@ static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val)
static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val)
{
- if(type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
+ if(type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
ustring *sval = (ustring *)val;
sval[0] = str;
@@ -751,13 +751,19 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *
return false;
}
-bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustring object_name,
+bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name,
TypeDesc type, ustring name, void *val)
{
- if (renderstate == NULL)
+ if (sg->renderstate == NULL)
return false;
- ShaderData *sd = (ShaderData *)renderstate;
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ return get_attribute(sd, derivatives, object_name, type, name, val);
+}
+
+bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring object_name,
+ TypeDesc type, ustring name, void *val)
+{
KernelGlobals *kg = sd->osl_globals;
bool is_curve;
int object;
@@ -815,12 +821,12 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri
}
bool OSLRenderServices::get_userdata(bool derivatives, ustring name, TypeDesc type,
- void *renderstate, void *val)
+ OSL::ShaderGlobals *sg, void *val)
{
return false; /* disabled by lockgeom */
}
-bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, void *renderstate)
+bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, OSL::ShaderGlobals *sg)
{
return false; /* never called by OSL */
}
@@ -969,7 +975,7 @@ bool OSLRenderServices::environment(ustring filename, TextureOpt &options,
return status;
}
-bool OSLRenderServices::get_texture_info(ustring filename, int subimage,
+bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, ustring filename, int subimage,
ustring dataname,
TypeDesc datatype, void *data)
{
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 069722d81b6..6f928a0d103 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -49,27 +49,29 @@ public:
void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts);
- bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time);
- bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time);
+ bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time);
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time);
- bool get_matrix(OSL::Matrix44 &result, ustring from, float time);
- bool get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time);
+ bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time);
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time);
- bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform);
- bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform);
+ bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform);
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform);
- bool get_matrix(OSL::Matrix44 &result, ustring from);
- bool get_inverse_matrix(OSL::Matrix44 &result, ustring from);
+ bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from);
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from);
- bool get_array_attribute(void *renderstate, bool derivatives,
+ bool get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives,
ustring object, TypeDesc type, ustring name,
int index, void *val);
- bool get_attribute(void *renderstate, bool derivatives, ustring object,
+ bool get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object,
+ TypeDesc type, ustring name, void *val);
+ bool get_attribute(ShaderData *sd, bool derivatives, ustring object_name,
TypeDesc type, ustring name, void *val);
bool get_userdata(bool derivatives, ustring name, TypeDesc type,
- void *renderstate, void *val);
- bool has_userdata(ustring name, TypeDesc type, void *renderstate);
+ OSL::ShaderGlobals *sg, void *val);
+ bool has_userdata(ustring name, TypeDesc type, OSL::ShaderGlobals *sg);
int pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 &center,
float radius, int max_points, bool sort, size_t *out_indices,
@@ -106,7 +108,7 @@ public:
OSL::ShaderGlobals *sg, const OSL::Vec3 &R,
const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, float *result);
- bool get_texture_info(ustring filename, int subimage,
+ bool get_texture_info(OSL::ShaderGlobals *sg, ustring filename, int subimage,
ustring dataname, TypeDesc datatype, void *data);
static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
@@ -157,6 +159,70 @@ public:
static ustring u_v;
static ustring u_empty;
+#if OSL_LIBRARY_VERSION_CODE < 10500
+ bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) {
+ return get_matrix(NULL, result, xform, time);
+ }
+
+ bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) {
+ return get_inverse_matrix(NULL, result, xform, time);
+ }
+
+ bool get_matrix(OSL::Matrix44 &result, ustring from, float time) {
+ return get_matrix(NULL, result, from, time);
+ }
+
+ bool get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time) {
+ return get_inverse_matrix(NULL, result, to, time);
+ }
+
+ bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) {
+ return get_matrix(NULL, result, xform);
+ }
+
+ bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) {
+ return get_inverse_matrix(NULL, result, xform);
+ }
+
+ bool get_matrix(OSL::Matrix44 &result, ustring from) {
+ return get_matrix(NULL, result, from);
+ }
+
+ bool get_inverse_matrix(OSL::Matrix44 &result, ustring to) {
+ return get_inverse_matrix(NULL, result, to);
+ }
+
+ bool get_array_attribute(void *renderstate, bool derivatives,
+ ustring object, TypeDesc type, ustring name,
+ int index, void *val) {
+ OSL::ShaderGlobals sg;
+ sg.renderstate = renderstate;
+ return get_array_attribute(&sg, derivatives,
+ object, type, name,
+ index, val);
+ }
+
+ bool get_attribute(void *renderstate, bool derivatives, ustring object_name,
+ TypeDesc type, ustring name, void *val) {
+ OSL::ShaderGlobals sg;
+ sg.renderstate = renderstate;
+ return get_attribute(&sg, derivatives, object_name, type, name, val);
+ }
+
+ bool has_userdata(ustring name, TypeDesc type, void *renderstate) {
+ return has_userdata(name, type, (OSL::ShaderGlobals *) renderstate);
+ }
+
+ bool get_userdata(bool derivatives, ustring name, TypeDesc type,
+ void *renderstate, void *val) {
+ return get_userdata(derivatives, name, type, (OSL::ShaderGlobals *) renderstate, val);
+ }
+
+ bool get_texture_info(ustring filename, int subimage,
+ ustring dataname, TypeDesc datatype, void *data) {
+ return get_texture_info(NULL, filename, subimage, dataname, datatype, data);
+ }
+#endif
private:
KernelGlobals *kernel_globals;
OSL::TextureSystem *osl_ts;
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index 28135784db9..48498116874 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -164,11 +164,14 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
CBSDFClosure *bsdf = (CBSDFClosure *)prim;
int scattering = bsdf->scattering();
- /* no caustics option */
- if(scattering == LABEL_GLOSSY && (path_flag & PATH_RAY_DIFFUSE)) {
+ /* caustic options */
+ if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
KernelGlobals *kg = sd->osl_globals;
- if(kernel_data.integrator.no_caustics)
+
+ if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
+ (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) {
return;
+ }
}
/* sample weight */
diff --git a/intern/cycles/kernel/shaders/node_fresnel.h b/intern/cycles/kernel/shaders/node_fresnel.h
index 447a84255ef..9f10ba8023e 100644
--- a/intern/cycles/kernel/shaders/node_fresnel.h
+++ b/intern/cycles/kernel/shaders/node_fresnel.h
@@ -34,3 +34,16 @@ float fresnel_dielectric_cos(float cosi, float eta)
return result;
}
+color fresnel_conductor(float cosi, color eta, color k)
+{
+ color cosi2 = color(cosi*cosi);
+ color one = color(1, 1, 1);
+ color tmp_f = eta * eta + k * k;
+ color tmp = tmp_f * cosi2;
+ color Rparl2 = (tmp - (2.0 * eta * cosi) + one) /
+ (tmp + (2.0 * eta * cosi) + one);
+ color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) /
+ (tmp_f + (2.0 * eta * cosi) + cosi2);
+ return (Rparl2 + Rperp2) * 0.5;
+}
+
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index e39db8097f2..f8e5fd510ee 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -507,12 +507,8 @@ closure color hair_transmission(normal N, float roughnessu, float roughnessv, ve
closure color henyey_greenstein(float g) BUILTIN;
closure color absorption() BUILTIN;
-// Backwards compatibility
-closure color bssrdf_cubic(normal N, vector radius) BUILTIN;
-closure color bssrdf_gaussian(normal N, vector radius) BUILTIN;
-closure color specular_toon(normal N, float size, float smooth) BUILTIN;
-
// Renderer state
+int backfacing () BUILTIN;
int raytype (string typename) BUILTIN;
// the individual 'isFOOray' functions are deprecated
int iscameraray () { return raytype("camera"); }
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index d6663aae9db..c13eae813d6 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -236,7 +236,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
if(stack_load_float(stack, node.z) == 1.0f)
offset += node.y;
break;
-#ifdef __IMAGE_TEXTURES__
+#ifdef __TEXTURES__
case NODE_TEX_IMAGE:
svm_node_tex_image(kg, sd, stack, node);
break;
@@ -246,8 +246,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_TEX_ENVIRONMENT:
svm_node_tex_environment(kg, sd, stack, node);
break;
-#endif
-#ifdef __PROCEDURAL_TEXTURES__
case NODE_TEX_SKY:
svm_node_tex_sky(kg, sd, stack, node, &offset);
break;
@@ -420,17 +418,13 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_LIGHT_FALLOFF:
svm_node_light_falloff(sd, stack, node);
break;
-#endif
-#ifdef __ANISOTROPIC__
+#endif
case NODE_TANGENT:
svm_node_tangent(kg, sd, stack, node);
break;
-#endif
-#ifdef __NORMAL_MAP__
case NODE_NORMAL_MAP:
svm_node_normal_map(kg, sd, stack, node);
- break;
-#endif
+ break;
case NODE_END:
default:
return;
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 5fcc44e478b..30110db3ef9 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -179,7 +179,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE))
+ if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
break;
#endif
ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight);
@@ -207,7 +207,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE))
+ if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
break;
#endif
ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight);
@@ -244,8 +244,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE))
+ if(!kernel_data.integrator.caustics_reflective &&
+ !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) {
break;
+ }
#endif
/* index of refraction */
float eta = fmaxf(param2, 1e-5f);
@@ -262,12 +264,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
float sample_weight = sc->sample_weight;
sc = svm_node_closure_get_bsdf(sd, mix_weight*fresnel);
-
- if(sc) {
- sc->N = N;
- svm_node_glass_setup(sd, sc, type, eta, roughness, false);
+#ifdef __CAUSTICS_TRICKS__
+ if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
+#endif
+ {
+ if(sc) {
+ sc->N = N;
+ svm_node_glass_setup(sd, sc, type, eta, roughness, false);
+ }
}
+#ifdef __CAUSTICS_TRICKS__
+ if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
+ break;
+#endif
+
/* refraction */
sc = &sd->closure[sd->num_closure];
sc->weight = weight;
@@ -286,7 +297,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE))
+ if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
break;
#endif
ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight);
@@ -294,7 +305,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(sc) {
sc->N = N;
-#ifdef __ANISOTROPIC__
sc->T = stack_load_float3(stack, data_node.y);
/* rotate tangent */
@@ -324,9 +334,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sd->flag |= bsdf_microfacet_ggx_aniso_setup(sc);
else
sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(sc);
-#else
- sd->flag |= bsdf_diffuse_setup(sc);
-#endif
}
break;
}
@@ -371,7 +378,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(sc) {
/* todo: giving a fixed weight here will cause issues when
- * mixing multiple BSDFS. energey will not be conserved and
+ * mixing multiple BSDFS. energy will not be conserved and
* the throughput can blow up after multiple bounces. we
* better figure out a way to skip backfaces from rays
* spawned by transmission from the front */
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
index 2503912c5c6..b221e0728ec 100644
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@@ -45,13 +45,13 @@ ccl_device void svm_node_convert(ShaderData *sd, float *stack, uint type, uint f
}
case NODE_CONVERT_VF: {
float3 f = stack_load_float3(stack, from);
- float g = (f.x + f.y + f.z)*(1.0f/3.0f);
+ float g = average(f);
stack_store_float(stack, to, g);
break;
}
case NODE_CONVERT_VI: {
float3 f = stack_load_float3(stack, from);
- int i = (int)((f.x + f.y + f.z)*(1.0f/3.0f));
+ int i = (int)average(f);
stack_store_int(stack, to, i);
break;
}