Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt9
-rw-r--r--intern/cycles/kernel/closure/bsdf.h5
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h18
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_multi.h78
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_diffuse.h8
-rw-r--r--intern/cycles/kernel/filter/filter_features.h12
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_cpu.h5
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_gpu.h5
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h23
-rw-r--r--intern/cycles/kernel/filter/filter_reconstruction.h56
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h52
-rw-r--r--intern/cycles/kernel/kernel_passes.h2
-rw-r--r--intern/cycles/kernel/kernel_path_state.h2
-rw-r--r--intern/cycles/kernel/kernel_queues.h15
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h2
-rw-r--r--intern/cycles/kernel/kernel_types.h10
-rw-r--r--intern/cycles/kernel/kernel_volume.h2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h4
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h2
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu3
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu2
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl3
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl15
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_path_init.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split.cl1
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_function.h72
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl10
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.cpp2
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp24
-rw-r--r--intern/cycles/kernel/shaders/node_principled_bsdf.osl8
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h2
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h52
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h19
-rw-r--r--intern/cycles/kernel/split/kernel_enqueue_inactive.h46
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h2
-rw-r--r--intern/cycles/kernel/split/kernel_queue_enqueue.h3
-rw-r--r--intern/cycles/kernel/split/kernel_scene_intersect.h16
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h8
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h15
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h23
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h31
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h35
59 files changed, 632 insertions, 267 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index bef869f34b4..23e9bd311c4 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -45,6 +45,7 @@ set(SRC
kernels/opencl/kernel_direct_lighting.cl
kernels/opencl/kernel_shadow_blocked_ao.cl
kernels/opencl/kernel_shadow_blocked_dl.cl
+ kernels/opencl/kernel_enqueue_inactive.cl
kernels/opencl/kernel_next_iteration_setup.cl
kernels/opencl/kernel_indirect_subsurface.cl
kernels/opencl/kernel_buffer_update.cl
@@ -121,6 +122,10 @@ set(SRC_KERNELS_CUDA_HEADERS
kernels/cuda/kernel_config.h
)
+set(SRC_KERNELS_OPENCL_HEADERS
+ kernels/opencl/kernel_split_function.h
+)
+
set(SRC_CLOSURE_HEADERS
closure/alloc.h
closure/bsdf.h
@@ -278,6 +283,7 @@ set(SRC_SPLIT_HEADERS
split/kernel_data_init.h
split/kernel_direct_lighting.h
split/kernel_do_volume.h
+ split/kernel_enqueue_inactive.h
split/kernel_holdout_emission_blurring_pathtermination_ao.h
split/kernel_indirect_background.h
split/kernel_indirect_subsurface.h
@@ -450,6 +456,7 @@ add_library(cycles_kernel
${SRC_HEADERS}
${SRC_KERNELS_CPU_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPENCL_HEADERS}
${SRC_BVH_HEADERS}
${SRC_CLOSURE_HEADERS}
${SRC_FILTER_HEADERS}
@@ -490,9 +497,11 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_sc
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_enqueue_inactive.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split_function.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/filter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index a04c157dc40..86a00d2124d 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -423,6 +423,11 @@ ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b)
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
return bsdf_hair_merge(a, b);
+#ifdef __PRINCIPLED__
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+ return bsdf_principled_diffuse_merge(a, b);
+#endif
#ifdef __VOLUME__
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
return volume_henyey_greenstein_merge(a, b);
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 30cc8b90330..b12e248f0a3 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -288,12 +288,16 @@ ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
-ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
bsdf->alpha_x = saturate(bsdf->alpha_x);
bsdf->alpha_y = bsdf->alpha_x;
@@ -302,12 +306,16 @@ ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
-ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F;
+
bsdf->alpha_x = saturate(bsdf->alpha_x);
bsdf->alpha_y = bsdf->alpha_x;
@@ -343,12 +351,16 @@ ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
-ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
bsdf->alpha_x = saturate(bsdf->alpha_x);
bsdf->alpha_y = saturate(bsdf->alpha_y);
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index b07b515c405..22d0092093a 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -245,35 +245,69 @@ ccl_device_forceinline float mf_ggx_albedo(float r)
return saturate(albedo);
}
+ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior)
+{
+ if(ior < 1.0f) {
+ ior = 1.0f/ior;
+ }
+ a = saturate(a);
+ ior = clamp(ior, 1.0f, 3.0f);
+ float I_1 = 0.0476898f*expf(-0.978352f*(ior-0.65657f)*(ior-0.65657f)) - 0.033756f*ior + 0.993261f;
+ float R_1 = (((0.116991f*a - 0.270369f)*a + 0.0501366f)*a - 0.00411511f)*a + 1.00008f;
+ float I_2 = (((-2.08704f*ior + 26.3298f)*ior - 127.906f)*ior + 292.958f)*ior - 287.946f + 199.803f/(ior*ior) - 101.668f/(ior*ior*ior);
+ float R_2 = ((((5.3725f*a -24.9307f)*a + 22.7437f)*a - 3.40751f)*a + 0.0986325f)*a + 0.00493504f;
+
+ return saturate(1.0f + I_2*R_2*0.0019127f - (1.0f - I_1)*(1.0f - R_1)*9.3205f);
+}
+
ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha)
{
float D = D_ggx(normalize(wi+wo), alpha);
float lambda = mf_lambda(wi, make_float2(alpha, alpha));
+ float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
+
+ float multiscatter = wo.z * M_1_PI_F;
+
float albedo = mf_ggx_albedo(alpha);
- return 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f) + (1.0f - albedo) * wo.z;
+ return albedo*singlescatter + (1.0f - albedo)*multiscatter;
}
ccl_device_forceinline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha)
{
- return 0.25f * D_ggx_aniso(normalize(wi+wo), alpha) / ((1.0f + mf_lambda(wi, alpha)) * wi.z) + (1.0f - mf_ggx_albedo(sqrtf(alpha.x*alpha.y))) * wo.z;
+ float D = D_ggx_aniso(normalize(wi+wo), alpha);
+ float lambda = mf_lambda(wi, alpha);
+ float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
+
+ float multiscatter = wo.z * M_1_PI_F;
+
+ float albedo = mf_ggx_albedo(sqrtf(alpha.x*alpha.y));
+ return albedo*singlescatter + (1.0f - albedo)*multiscatter;
}
ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta)
{
- float3 wh;
- float fresnel;
- if(wi.z*wo.z > 0.0f) {
- wh = normalize(wi + wo);
- fresnel = fresnel_dielectric_cos(dot(wi, wh), eta);
- }
- else {
- wh = normalize(wi + wo*eta);
- fresnel = 1.0f - fresnel_dielectric_cos(dot(wi, wh), eta);
- }
+ bool reflective = (wi.z*wo.z > 0.0f);
+
+ float wh_len;
+ float3 wh = normalize_len(wi + (reflective? wo : (wo*eta)), &wh_len);
if(wh.z < 0.0f)
wh = -wh;
float3 r_wi = (wi.z < 0.0f)? -wi: wi;
- return fresnel * max(0.0f, dot(r_wi, wh)) * D_ggx(wh, alpha) / ((1.0f + mf_lambda(r_wi, make_float2(alpha, alpha))) * r_wi.z) + fabsf(wo.z);
+ float lambda = mf_lambda(r_wi, make_float2(alpha, alpha));
+ float D = D_ggx(wh, alpha);
+ float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta);
+
+ float multiscatter = fabsf(wo.z * M_1_PI_F);
+ if(reflective) {
+ float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f);
+ float albedo = mf_ggx_albedo(alpha);
+ return fresnel * (albedo*singlescatter + (1.0f - albedo)*multiscatter);
+ }
+ else {
+ float singlescatter = fabsf(dot(r_wi, wh)*dot(wo, wh) * D * eta*eta / max((1.0f + lambda) * r_wi.z * wh_len*wh_len, 1e-7f));
+ float albedo = mf_ggx_transmission_albedo(alpha, eta);
+ return (1.0f - fresnel) * (albedo*singlescatter + (1.0f - albedo)*multiscatter);
+ }
}
/* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */
@@ -326,13 +360,17 @@ ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf)
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
-ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
if(is_zero(bsdf->T))
bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
@@ -345,12 +383,16 @@ ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
-ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->alpha_y = bsdf->alpha_x;
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
@@ -455,7 +497,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
}
-ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
bsdf->alpha_y = bsdf->alpha_x;
@@ -469,6 +511,10 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsd
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
}
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 215c32e1ffb..f8ca64293b0 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -58,6 +58,14 @@ ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
+ccl_device bool bsdf_principled_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
+{
+ const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf*)a;
+ const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf*)b;
+
+ return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness);
+}
+
ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I,
const float3 omega_in, float *pdf)
{
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index 53d703de143..6226ed2c2ef 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -78,16 +78,10 @@ ccl_device_inline void filter_calculate_scale(float *scale)
scale[3] = scale[4] = scale[5] = 1.0f/max(sqrtf(scale[3]), 0.01f);
}
-ccl_device_inline float3 filter_get_pixel_color(const ccl_global float *ccl_restrict buffer,
- int pass_stride)
+ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer,
+ int pass_stride)
{
- return make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2));
-}
-
-ccl_device_inline float filter_get_pixel_variance(const ccl_global float *ccl_restrict buffer,
- int pass_stride)
-{
- return average(make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2)));
+ return make_float3(ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10));
}
ccl_device_inline void design_row_add(float *design_row,
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
index 5cb4038bc33..3e752bce68f 100644
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h
@@ -101,7 +101,7 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict d
for(int x = rect.x; x < rect.z; x++) {
const int low = max(rect.x, x-f);
const int high = min(rect.z, x+f+1);
- out_image[y*w+x] = expf(-max(out_image[y*w+x] * (1.0f/(high - low)), 0.0f));
+ out_image[y*w+x] = fast_expf(-max(out_image[y*w+x] * (1.0f/(high - low)), 0.0f));
}
}
}
@@ -133,8 +133,6 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy,
const float *ccl_restrict difference_image,
const float *ccl_restrict buffer,
- float *color_pass,
- float *variance_pass,
float *transform,
int *rank,
float *XtWX,
@@ -167,7 +165,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy,
dx, dy, w, h,
pass_stride,
buffer,
- color_pass, variance_pass,
l_transform, l_rank,
weight, l_XtWX, l_XtWY, 0);
}
diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h
index 078c5f56763..2c5ac807051 100644
--- a/intern/cycles/kernel/filter/filter_nlm_gpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h
@@ -66,7 +66,7 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y,
sum += difference_image[y*w+x1];
}
sum *= 1.0f/(high-low);
- out_image[y*w+x] = expf(-max(sum, 0.0f));
+ out_image[y*w+x] = fast_expf(-max(sum, 0.0f));
}
ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
@@ -97,8 +97,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy,
int dx, int dy,
const ccl_global float *ccl_restrict difference_image,
const ccl_global float *ccl_restrict buffer,
- ccl_global float *color_pass,
- ccl_global float *variance_pass,
const ccl_global float *ccl_restrict transform,
ccl_global int *rank,
ccl_global float *XtWX,
@@ -130,7 +128,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy,
dx, dy, w, h,
pass_stride,
buffer,
- color_pass, variance_pass,
transform, rank,
weight, XtWX, XtWY,
localIdx);
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index 82cc36625ec..d5ae1b73927 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -142,13 +142,22 @@ ccl_device void kernel_filter_detect_outliers(int x, int y,
float ref = 2.0f*values[(int)(n*0.75f)];
float fac = 1.0f;
if(L > ref) {
- /* If the pixel is an outlier, negate the depth value to mark it as one.
- * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
- depth[idx] = -depth[idx];
- fac = ref/L;
- variance[idx ] *= fac*fac;
- variance[idx + pass_stride] *= fac*fac;
- variance[idx+2*pass_stride] *= fac*fac;
+ /* The pixel appears to be an outlier.
+ * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
+ * should actually be at the reference value:
+ * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
+ * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
+ */
+ float stddev = sqrtf(average(make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride])));
+ if(L - 3*stddev < ref) {
+ /* The pixel is an outlier, so negate the depth value to mark it as one.
+ * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
+ depth[idx] = -depth[idx];
+ fac = ref/L;
+ variance[idx ] *= fac*fac;
+ variance[idx + pass_stride] *= fac*fac;
+ variance[idx+2*pass_stride] *= fac*fac;
+ }
}
out[idx ] = fac*image[idx];
out[idx + pass_stride] = fac*image[idx + pass_stride];
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
index 4a4c81b7ba3..25a3025056c 100644
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ b/intern/cycles/kernel/filter/filter_reconstruction.h
@@ -22,8 +22,6 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
int w, int h,
int pass_stride,
const ccl_global float *ccl_restrict buffer,
- ccl_global float *color_pass,
- ccl_global float *variance_pass,
const ccl_global float *ccl_restrict transform,
ccl_global int *rank,
float weight,
@@ -31,38 +29,31 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
ccl_global float3 *XtWY,
int localIdx)
{
+ if(weight < 1e-3f) {
+ return;
+ }
+
int p_offset = y *w + x;
int q_offset = (y+dy)*w + (x+dx);
-#ifdef __KERNEL_CPU__
- const int stride = 1;
- (void)storage_stride;
- (void)localIdx;
- float design_row[DENOISE_FEATURES+1];
-#elif defined(__KERNEL_CUDA__)
+#ifdef __KERNEL_GPU__
const int stride = storage_stride;
+#else
+ const int stride = 1;
+ (void) storage_stride;
+#endif
+
+#ifdef __KERNEL_CUDA__
ccl_local float shared_design_row[(DENOISE_FEATURES+1)*CCL_MAX_LOCAL_SIZE];
ccl_local_param float *design_row = shared_design_row + localIdx*(DENOISE_FEATURES+1);
#else
- const int stride = storage_stride;
float design_row[DENOISE_FEATURES+1];
#endif
- float3 p_color = filter_get_pixel_color(color_pass + p_offset, pass_stride);
- float3 q_color = filter_get_pixel_color(color_pass + q_offset, pass_stride);
+ float3 q_color = filter_get_color(buffer + q_offset, pass_stride);
- float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride));
- float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride));
-
- /* If the pixel was flagged as an outlier during prefiltering, skip it.
- * Otherwise, perform the regular confidence interval test unless
- * the center pixel is an outlier (in that case, using the confidence
- * interval test could result in no pixels being used at all). */
- bool p_outlier = (ccl_get_feature(buffer + p_offset, 0) < 0.0f);
- bool q_outlier = (ccl_get_feature(buffer + q_offset, 0) < 0.0f);
- bool outside_of_interval = (average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f));
-
- if(q_outlier || (!p_outlier && outside_of_interval)) {
+ /* If the pixel was flagged as an outlier during prefiltering, skip it. */
+ if(ccl_get_feature(buffer + q_offset, 0) < 0.0f) {
return;
}
@@ -83,13 +74,19 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
int4 buffer_params,
int sample)
{
-#ifdef __KERNEL_CPU__
- const int stride = 1;
- (void)storage_stride;
-#else
+#ifdef __KERNEL_GPU__
const int stride = storage_stride;
+#else
+ const int stride = 1;
+ (void) storage_stride;
#endif
+ if(XtWX[0] < 1e-3f) {
+ /* There is not enough information to determine a denoised result.
+ * As a fallback, keep the original value of the pixel. */
+ return;
+ }
+
/* The weighted average of pixel colors (essentially, the NLM-filtered image).
* In case the solution of the linear model fails due to numerical issues,
* fall back to this value. */
@@ -102,6 +99,9 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
final_color = mean_color;
}
+ /* Clamp pixel value to positive values. */
+ final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
+
ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
final_color *= sample;
if(buffer_params.w) {
@@ -114,6 +114,4 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
combined_buffer[2] = final_color.z;
}
-#undef STORAGE_TYPE
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 06728415c15..175bd6b9737 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -621,25 +621,43 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance
{
float fac = 1.0f/num_samples;
+#ifdef __SPLIT_KERNEL__
+# define safe_float3_add(f, v) \
+ do { \
+ ccl_global float *p = (ccl_global float*)(&(f)); \
+ atomic_add_and_fetch_float(p+0, (v).x); \
+ atomic_add_and_fetch_float(p+1, (v).y); \
+ atomic_add_and_fetch_float(p+2, (v).z); \
+ } while(0)
+#else
+# define safe_float3_add(f, v) (f) += (v)
+#endif /* __SPLIT_KERNEL__ */
+
#ifdef __PASSES__
- L->direct_diffuse += L_sample->direct_diffuse*fac;
- L->direct_glossy += L_sample->direct_glossy*fac;
- L->direct_transmission += L_sample->direct_transmission*fac;
- L->direct_subsurface += L_sample->direct_subsurface*fac;
- L->direct_scatter += L_sample->direct_scatter*fac;
-
- L->indirect_diffuse += L_sample->indirect_diffuse*fac;
- L->indirect_glossy += L_sample->indirect_glossy*fac;
- L->indirect_transmission += L_sample->indirect_transmission*fac;
- L->indirect_subsurface += L_sample->indirect_subsurface*fac;
- L->indirect_scatter += L_sample->indirect_scatter*fac;
-
- L->background += L_sample->background*fac;
- L->ao += L_sample->ao*fac;
- L->shadow += L_sample->shadow*fac;
+ safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse*fac);
+ safe_float3_add(L->direct_glossy, L_sample->direct_glossy*fac);
+ safe_float3_add(L->direct_transmission, L_sample->direct_transmission*fac);
+ safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface*fac);
+ safe_float3_add(L->direct_scatter, L_sample->direct_scatter*fac);
+
+ safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse*fac);
+ safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy*fac);
+ safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission*fac);
+ safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface*fac);
+ safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter*fac);
+
+ safe_float3_add(L->background, L_sample->background*fac);
+ safe_float3_add(L->ao, L_sample->ao*fac);
+ safe_float3_add(L->shadow, L_sample->shadow*fac);
+# ifdef __SPLIT_KERNEL__
+ atomic_add_and_fetch_float(&L->mist, L_sample->mist*fac);
+# else
L->mist += L_sample->mist*fac;
-#endif
- L->emission += L_sample->emission * fac;
+# endif /* __SPLIT_KERNEL__ */
+#endif /* __PASSES__ */
+ safe_float3_add(L->emission, L_sample->emission*fac);
+
+#undef safe_float3_add
}
#ifdef __SHADOW_TRICKS__
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 9d52834ffcc..9cd7ffb181d 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -142,7 +142,7 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_glob
ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
ShaderData *sd,
- ccl_global PathState *state,
+ ccl_addr_space PathState *state,
PathRadiance *L)
{
#ifdef __DENOISING_FEATURES__
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index 0fa77d9e8bd..5d92fd12201 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -139,9 +139,11 @@ ccl_device_inline void path_state_next(KernelGlobals *kg, ccl_addr_space PathSta
/* random number generator next bounce */
state->rng_offset += PRNG_BOUNCE_NUM;
+#ifdef __DENOISING_FEATURES__
if((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) {
state->flag &= ~PATH_RAY_STORE_SHADOW_INFO;
}
+#endif
}
ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *state)
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index 96bc636d5ac..e32d4bbbc1b 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -128,6 +128,21 @@ ccl_device unsigned int get_global_queue_index(
return my_gqidx;
}
+ccl_device int dequeue_ray_index(
+ int queue_number,
+ ccl_global int *queues,
+ int queue_size,
+ ccl_global int *queue_index)
+{
+ int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1;
+
+ if(index < 0) {
+ return QUEUE_EMPTY_SLOT;
+ }
+
+ return queues[index + queue_number * queue_size];
+}
+
CCL_NAMESPACE_END
#endif // __KERNEL_QUEUE_H__
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 1026cde7b29..6475d4b66fd 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -418,7 +418,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
}
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
-ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_global PathState *state,
+ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state,
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index dbeaffdfb24..34affab1b9d 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -135,7 +135,7 @@ CCL_NAMESPACE_BEGIN
* this is because megakernel in device_opencl does not support
* custom cflags depending on the scene features.
*/
-# endif /* __KERNEL_OPENCL_NVIDIA__ */
+# endif /* __KERNEL_OPENCL_APPLE__ */
# ifdef __KERNEL_OPENCL_AMD__
# define __CL_USE_NATIVE__
@@ -236,6 +236,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __NO_PRINCIPLED__
# undef __PRINCIPLED__
#endif
+#ifdef __NO_DENOISING__
+# undef __DENOISING_FEATURES__
+#endif
/* Random Numbers */
@@ -1387,6 +1390,8 @@ enum QueueNumber {
#ifdef __BRANCHED_PATH__
/* All rays moving to next iteration of the indirect loop for light */
QUEUE_LIGHT_INDIRECT_ITER,
+ /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */
+ QUEUE_INACTIVE_RAYS,
# ifdef __VOLUME__
/* All rays moving to next iteration of the indirect loop for volumes */
QUEUE_VOLUME_INDIRECT_ITER,
@@ -1429,6 +1434,9 @@ enum RayState {
RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT),
+
+ /* Ray is evaluating an iteration of an indirect loop for another thread */
+ RAY_BRANCHED_INDIRECT_SHARED = (1 << 7),
};
#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 9c0878249d4..1e472aaf51a 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -660,6 +660,7 @@ typedef struct VolumeSegment {
* but the entire segment is needed to do always scattering, rather than probabilistically
* hitting or missing the volume. if we don't know the transmittance at the end of the
* volume we can't generate stratified distance samples up to that transmittance */
+#ifdef __VOLUME_DECOUPLED__
ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
{
@@ -829,6 +830,7 @@ ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *s
#endif
}
}
+#endif /* __VOLUME_DECOUPLED__ */
/* scattering for homogeneous and heterogeneous volumes, using decoupled ray
* marching.
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index ffd34c293fc..2ed713299fd 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -107,8 +107,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int dy,
float *difference_image,
float *buffer,
- float *color_pass,
- float *variance_pass,
float *transform,
int *rank,
float *XtWX,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index 261176846b1..8dc1a8d583c 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -213,8 +213,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int dy,
float *difference_image,
float *buffer,
- float *color_pass,
- float *variance_pass,
float *transform,
int *rank,
float *XtWX,
@@ -229,7 +227,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
#else
- kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, color_pass, variance_pass, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride);
+ kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 9895080d328..c8938534fe8 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -85,6 +85,7 @@ DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
+DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive)
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 9b85a864153..d4315ee5ec4 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -53,6 +53,7 @@
# include "kernel/split/kernel_direct_lighting.h"
# include "kernel/split/kernel_shadow_blocked_ao.h"
# include "kernel/split/kernel_shadow_blocked_dl.h"
+# include "kernel/split/kernel_enqueue_inactive.h"
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
@@ -230,6 +231,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 2edbff08087..009c3fde9d5 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -207,8 +207,6 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_nlm_construct_gramian(int dx, int dy,
const float *ccl_restrict difference_image,
const float *ccl_restrict buffer,
- float *color_pass,
- float *variance_pass,
float const* __restrict__ transform,
int *rank,
float *XtWX,
@@ -225,7 +223,6 @@ kernel_cuda_filter_nlm_construct_gramian(int dx, int dy,
dx, dy,
difference_image,
buffer,
- color_pass, variance_pass,
transform, rank,
XtWX, XtWY,
rect, filter_rect,
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 8b7f1a8d405..628891b1458 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -39,6 +39,7 @@
#include "kernel/split/kernel_direct_lighting.h"
#include "kernel/split/kernel_shadow_blocked_ao.h"
#include "kernel/split/kernel_shadow_blocked_dl.h"
+#include "kernel/split/kernel_enqueue_inactive.h"
#include "kernel/split/kernel_next_iteration_setup.h"
#include "kernel/split/kernel_indirect_subsurface.h"
#include "kernel/split/kernel_buffer_update.h"
@@ -118,6 +119,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index 0462ca6f9bc..ba53ba4b26f 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -207,8 +207,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
int dy,
const ccl_global float *ccl_restrict difference_image,
const ccl_global float *ccl_restrict buffer,
- ccl_global float *color_pass,
- ccl_global float *variance_pass,
const ccl_global float *ccl_restrict transform,
ccl_global int *rank,
ccl_global float *XtWX,
@@ -227,7 +225,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
dx, dy,
difference_image,
buffer,
- color_pass, variance_pass,
transform, rank,
XtWX, XtWY,
rect, filter_rect,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
index db65c91baf7..dcea2630aef 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_buffer_update.h"
-__kernel void kernel_ocl_path_trace_buffer_update(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local unsigned int local_queue_atomics;
- kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics);
-}
+#define KERNEL_NAME buffer_update
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
index eb34f750881..ed64ae01aae 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_direct_lighting.h"
-__kernel void kernel_ocl_path_trace_direct_lighting(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local unsigned int local_queue_atomics;
- kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics);
-}
+#define KERNEL_NAME direct_lighting
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
index 83ef5f5f3f2..8afaa686e28 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_do_volume.h"
-__kernel void kernel_ocl_path_trace_do_volume(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_do_volume((KernelGlobals*)kg);
-}
+#define KERNEL_NAME do_volume
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl
new file mode 100644
index 00000000000..e68d4104a91
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_enqueue_inactive.h"
+
+#define KERNEL_NAME enqueue_inactive
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
index d071b39aa6f..9e1e57beba6 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
@@ -18,12 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
-__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local BackgroundAOLocals locals;
- kernel_holdout_emission_blurring_pathtermination_ao(
- (KernelGlobals*)kg,
- &locals);
-}
+#define KERNEL_NAME holdout_emission_blurring_pathtermination_ao
+#define LOCALS_TYPE BackgroundAOLocals
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
index 8c213ff5cb2..192d01444ba 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_indirect_background.h"
-__kernel void kernel_ocl_path_trace_indirect_background(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_indirect_background((KernelGlobals*)kg);
-}
+#define KERNEL_NAME indirect_background
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
index 998ebc4c0c3..84938b889e5 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_indirect_subsurface.h"
-__kernel void kernel_ocl_path_trace_indirect_subsurface(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_indirect_subsurface((KernelGlobals*)kg);
-}
+#define KERNEL_NAME indirect_subsurface
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
index 822d2287715..c314dc96c33 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_lamp_emission.h"
-__kernel void kernel_ocl_path_trace_lamp_emission(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_lamp_emission((KernelGlobals*)kg);
-}
+#define KERNEL_NAME lamp_emission
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
index 6d207253a40..8b1332bf013 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_next_iteration_setup.h"
-__kernel void kernel_ocl_path_trace_next_iteration_setup(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local unsigned int local_queue_atomics;
- kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics);
-}
+#define KERNEL_NAME next_iteration_setup
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
index bd9aa9538c8..fa210e747c0 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_path_init.h"
-__kernel void kernel_ocl_path_trace_path_init(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_path_init((KernelGlobals*)kg);
-}
+#define KERNEL_NAME path_init
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
index 9be154e3d75..68ee6f1d536 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_queue_enqueue.h"
-__kernel void kernel_ocl_path_trace_queue_enqueue(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local QueueEnqueueLocals locals;
- kernel_queue_enqueue((KernelGlobals*)kg, &locals);
-}
+#define KERNEL_NAME queue_enqueue
+#define LOCALS_TYPE QueueEnqueueLocals
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
index eb4fb4d153a..10d09377ba9 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_scene_intersect.h"
-__kernel void kernel_ocl_path_trace_scene_intersect(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_scene_intersect((KernelGlobals*)kg);
-}
+#define KERNEL_NAME scene_intersect
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
index 5bfb31b193a..40eaa561863 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_shader_eval.h"
-__kernel void kernel_ocl_path_trace_shader_eval(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_shader_eval((KernelGlobals*)kg);
-}
+#define KERNEL_NAME shader_eval
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
index 38bfd04ad4c..8c36100f762 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_shader_setup.h"
-__kernel void kernel_ocl_path_trace_shader_setup(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local unsigned int local_queue_atomics;
- kernel_shader_setup((KernelGlobals*)kg, &local_queue_atomics);
-}
+#define KERNEL_NAME shader_setup
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
index 6f722915d45..bcacaa4a054 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
@@ -19,10 +19,9 @@
#include "kernel/split/kernel_shader_sort.h"
__attribute__((reqd_work_group_size(64, 1, 1)))
-__kernel void kernel_ocl_path_trace_shader_sort(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local ShaderSortLocals locals;
- kernel_shader_sort((KernelGlobals*)kg, &locals);
-}
+#define KERNEL_NAME shader_sort
+#define LOCALS_TYPE ShaderSortLocals
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
index 6a8ef81b32a..8de250a375c 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_shadow_blocked_ao.h"
-__kernel void kernel_ocl_path_trace_shadow_blocked_ao(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_shadow_blocked_ao((KernelGlobals*)kg);
-}
+#define KERNEL_NAME shadow_blocked_ao
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
index b255cc5ef8b..29da77022ed 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_shadow_blocked_dl.h"
-__kernel void kernel_ocl_path_trace_shadow_blocked_dl(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_shadow_blocked_dl((KernelGlobals*)kg);
-}
+#define KERNEL_NAME shadow_blocked_dl
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
index 8de82db7afe..651addb02f4 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
@@ -31,6 +31,7 @@
#include "kernel/kernels/opencl/kernel_direct_lighting.cl"
#include "kernel/kernels/opencl/kernel_shadow_blocked_ao.cl"
#include "kernel/kernels/opencl/kernel_shadow_blocked_dl.cl"
+#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl"
#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
#include "kernel/kernels/opencl/kernel_buffer_update.cl"
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
new file mode 100644
index 00000000000..f1e914a70d4
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define KERNEL_NAME_JOIN(a, b) a ## _ ## b
+#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b)
+
+__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
+ ccl_global char *kg_global,
+ ccl_constant KernelData *data,
+
+ ccl_global void *split_data_buffer,
+ ccl_global char *ray_state,
+ ccl_global uint *rng_state,
+
+#define KERNEL_TEX(type, ttype, name) \
+ ccl_global type *name,
+#include "kernel/kernel_textures.h"
+
+ ccl_global int *queue_index,
+ ccl_global char *use_queues_flag,
+ ccl_global unsigned int *work_pools,
+ ccl_global float *buffer
+ )
+{
+#ifdef LOCALS_TYPE
+ ccl_local LOCALS_TYPE locals;
+#endif
+
+ KernelGlobals *kg = (KernelGlobals*)kg_global;
+
+ if(ccl_local_id(0) + ccl_local_id(1) == 0) {
+ kg->data = data;
+
+ kernel_split_params.rng_state = rng_state;
+ kernel_split_params.queue_index = queue_index;
+ kernel_split_params.use_queues_flag = use_queues_flag;
+ kernel_split_params.work_pools = work_pools;
+ kernel_split_params.buffer = buffer;
+
+ split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state);
+
+#define KERNEL_TEX(type, ttype, name) \
+ kg->name = name;
+#include "kernel/kernel_textures.h"
+ }
+
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ KERNEL_NAME_EVAL(kernel, KERNEL_NAME)(
+ kg
+#ifdef LOCALS_TYPE
+ , &locals
+#endif
+ );
+}
+
+#undef KERNEL_NAME_JOIN
+#undef KERNEL_NAME_EVAL
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
index 99b74a1802b..2b3be38df84 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_subsurface_scatter.h"
-__kernel void kernel_ocl_path_trace_subsurface_scatter(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_subsurface_scatter((KernelGlobals*)kg);
-}
+#define KERNEL_NAME subsurface_scatter
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index 188c3960a5f..27a96720c1e 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -191,7 +191,7 @@ class PrincipledBSSRDFClosure : public CBSSRDFClosure {
public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
- alloc(sd, path_flag, weight * albedo, CLOSURE_BSSRDF_PRINCIPLED_ID);
+ alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID);
}
};
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index 5b66793a05d..14c5c1c3db5 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -156,7 +156,7 @@ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refra
BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(HairReflectionClosure, unused),
+ CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N),
CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1),
CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2),
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
@@ -164,7 +164,7 @@ BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY
BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, unused),
+ CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N),
CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1),
CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2),
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
@@ -191,7 +191,7 @@ BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen)
class PrincipledClearcoatClosure : public CBSDFClosure {
public:
MicrofacetBsdf params;
- float clearcoat, clearcoat_gloss;
+ float clearcoat, clearcoat_roughness;
MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
{
@@ -202,8 +202,8 @@ public:
bsdf->ior = 1.5f;
- bsdf->alpha_x = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss;
- bsdf->alpha_y = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss;
+ bsdf->alpha_x = clearcoat_roughness;
+ bsdf->alpha_y = clearcoat_roughness;
bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
bsdf->extra->clearcoat = clearcoat;
@@ -217,7 +217,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_ggx_clearcoat_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd) : 0;
}
};
@@ -226,7 +226,7 @@ ClosureParam *closure_bsdf_principled_clearcoat_params()
static ClosureParam params[] = {
CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N),
CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat),
- CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_gloss),
+ CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness),
CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"),
CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)
};
@@ -389,7 +389,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_ggx_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_ggx_fresnel_setup(bsdf, sd) : 0;
}
};
@@ -413,7 +413,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd) : 0;
}
};
@@ -566,7 +566,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd) : 0;
}
};
@@ -590,7 +590,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd) : 0;
}
};
@@ -618,7 +618,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd) : 0;
}
};
diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
index 57f40789d49..2bb981c3918 100644
--- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
@@ -32,7 +32,7 @@ shader node_principled_bsdf(
float Sheen = 0.0,
float SheenTint = 0.5,
float Clearcoat = 0.0,
- float ClearcoatGloss = 1.0,
+ float ClearcoatRoughness = 0.03,
float IOR = 1.45,
float Transmission = 0.0,
float TransmissionRoughness = 0.0,
@@ -57,8 +57,8 @@ shader node_principled_bsdf(
if (diffuse_weight > 1e-5) {
if (Subsurface > 1e-5) {
- color Albedo = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface);
- BSDF = bssrdf_principled(Normal, Subsurface * SubsurfaceRadius, 0.0, Albedo, Roughness);
+ color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface);
+ BSDF = mixed_ss_base_color * bssrdf_principled(Normal, Subsurface * SubsurfaceRadius, 0.0, SubsurfaceColor, Roughness);
} else {
BSDF = BaseColor * principled_diffuse(Normal, Roughness);
}
@@ -114,7 +114,7 @@ shader node_principled_bsdf(
}
if (Clearcoat > 1e-5) {
- BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatGloss);
+ BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness);
}
}
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index 289d1091b0a..c91d2918687 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -546,7 +546,7 @@ closure color holdout() BUILTIN;
closure color ambient_occlusion() BUILTIN;
closure color principled_diffuse(normal N, float roughness) BUILTIN;
closure color principled_sheen(normal N) BUILTIN;
-closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_gloss) BUILTIN;
+closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN;
// BSSRDF
closure color bssrdf_cubic(normal N, vector radius, float texture_blur, float sharpness) BUILTIN;
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index dc74a2ada53..e2762a85fc8 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -63,12 +63,49 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobal
REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
}
+ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index)
+{
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+ int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
+ kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index);
+
+ if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
+ return false;
+ }
+
+#define SPLIT_DATA_ENTRY(type, name, num) \
+ kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index];
+ SPLIT_DATA_ENTRIES_BRANCHED_SHARED
+#undef SPLIT_DATA_ENTRY
+
+ kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
+ kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
+ kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
+
+ path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
+ inactive_L->direct_throughput = L->direct_throughput;
+ path_radiance_copy_indirect(inactive_L, L);
+
+ ray_state[inactive_ray] = RAY_REGENERATED;
+ ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
+ ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
+
+ atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count);
+
+ return true;
+}
+
/* bounce off surface and integrate indirect light */
ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
int ray_index,
float num_samples_adjust,
ShaderData *saved_sd,
- bool reset_path_state)
+ bool reset_path_state,
+ bool wait_for_shared)
{
SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
@@ -155,12 +192,25 @@ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(
/* start the indirect path */
*tp *= num_samples_inv;
+ if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+
return true;
}
branched_state->next_sample = 0;
}
+ branched_state->next_closure = sd->num_closure;
+
+ if(wait_for_shared) {
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+ }
+
return false;
}
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 694b777f429..9f8dd2392d9 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -75,11 +75,30 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
branched_state->next_sample = j+1;
branched_state->num_samples = num_samples;
+ /* Attempting to share too many samples is slow for volumes as it causes us to
+ * loop here more and have many calls to kernel_volume_integrate which evaluates
+ * shaders. The many expensive shader evaluations cause the work load to become
+ * unbalanced and many threads to become idle in this kernel. Limiting the
+ * number of shared samples here helps quite a lot.
+ */
+ if(branched_state->shared_sample_count < 2) {
+ if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+ }
+
return true;
}
# endif
}
+ branched_state->next_sample = num_samples;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
kernel_split_branched_path_indirect_loop_end(kg, ray_index);
/* todo: avoid this calculation using decoupled ray marching */
diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
new file mode 100644
index 00000000000..496355bbc3a
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_enqueue_inactive(KernelGlobals *kg,
+ ccl_local_param unsigned int *local_queue_atomics)
+{
+#ifdef __BRANCHED_PATH__
+ /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+ char enqueue_flag = 0;
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
+ enqueue_flag = 1;
+ }
+
+ enqueue_ray_index_local(ray_index,
+ QUEUE_INACTIVE_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+#endif /* __BRANCHED_PATH__ */
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 71017fed19e..7758e35fd32 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -147,6 +147,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ray_index,
1.0f,
&kernel_split_state.branched_state[ray_index].sd,
+ true,
true))
{
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
@@ -193,6 +194,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ray_index,
1.0f,
&kernel_split_state.branched_state[ray_index].sd,
+ true,
true))
{
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index e2e841f36d3..66ce2dfb6f1 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -51,7 +51,8 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
int queue_number = -1;
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
}
else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index 5dc94caec85..45984ca509b 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -43,11 +43,21 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
}
/* All regenerated rays become active here */
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED))
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+#ifdef __BRANCHED_PATH__
+ if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else
+#endif /* __BRANCHED_PATH__ */
+ {
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ }
+ }
- if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE))
+ if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
return;
+ }
#ifdef __KERNEL_DEBUG__
DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index 386fbbc4d09..78e61709b01 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -29,6 +29,14 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
}
+#ifdef __BRANCHED_PATH__
+ /* TODO(mai): move this somewhere else? */
+ if(thread_index == 0) {
+ /* Clear QUEUE_INACTIVE_RAYS before next kernel. */
+ kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
+ }
+#endif /* __BRANCHED_PATH__ */
+
if(ray_index == QUEUE_EMPTY_SLOT)
return;
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 57f070d51e0..08f0124b529 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -56,7 +56,20 @@ ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
ccl_global char *ray_state = kernel_split_state.ray_state;
#ifdef __BRANCHED_PATH__
- if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+ if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
+ int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
+
+ path_radiance_sum_indirect(L);
+ path_radiance_accum_sample(orig_ray_L, L, 1);
+
+ atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
}
else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index bb1aca2acbf..4bb2f0d3d80 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -95,6 +95,10 @@ typedef ccl_global struct SplitBranchedState {
VolumeStack volume_stack[VOLUME_STACK_SIZE];
# endif /* __VOLUME__ */
#endif /*__SUBSURFACE__ */
+
+ int shared_sample_count; /* number of branched samples shared with other threads */
+ int original_ray; /* index of original ray when sharing branched samples */
+ bool waiting_on_shared_samples;
} SplitBranchedState;
#define SPLIT_DATA_BRANCHED_ENTRIES \
@@ -137,6 +141,25 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_BRANCHED_ENTRIES \
SPLIT_DATA_DEBUG_ENTRIES \
+/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
+#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
+ SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
+ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+ SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \
+ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+ SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+ SPLIT_DATA_SUBSURFACE_ENTRIES \
+ SPLIT_DATA_VOLUME_ENTRIES \
+ SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_DEBUG_ENTRIES \
+
/* struct that holds pointers to data in the shared state buffer */
typedef struct SplitData {
#define SPLIT_DATA_ENTRY(type, name, num) type *name;
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 1dffe1b179e..d5083b23f80 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -169,6 +169,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
ray_index,
num_samples_inv,
bssrdf_sd,
+ false,
false))
{
branched_state->ss_next_closure = i;
@@ -187,6 +188,13 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
branched_state->ss_next_sample = 0;
}
+ branched_state->ss_next_closure = sd->num_closure;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
kernel_split_branched_path_indirect_loop_end(kg, ray_index);
return false;
@@ -257,21 +265,20 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb);
-
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg,
- &rng,
- state,
- PRNG_BSDF_U,
- &bssrdf_u, &bssrdf_v);
+ &rng,
+ state,
+ PRNG_BSDF_U,
+ &bssrdf_u, &bssrdf_v);
subsurface_scatter_step(kg,
- sd,
- state,
- state->flag,
- sc,
- &lcg_state,
- bssrdf_u, bssrdf_v,
- false);
+ sd,
+ state,
+ state->flag,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ false);
}
}
else {
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index f04f765686e..9578fcf2687 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -79,13 +79,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
#ifdef __PRINCIPLED__
case CLOSURE_BSDF_PRINCIPLED_ID: {
uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset,
- sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset, eta_offset, transmission_offset,
+ sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, eta_offset, transmission_offset,
anisotropic_rotation_offset, transmission_roughness_offset;
uint4 data_node2 = read_node(kg, offset);
float3 T = stack_load_float3(stack, data_node.y);
decode_node_uchar4(data_node.z, &specular_offset, &roughness_offset, &specular_tint_offset, &anisotropic_offset);
- decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_gloss_offset);
+ decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_roughness_offset);
decode_node_uchar4(data_node2.x, &eta_offset, &transmission_offset, &anisotropic_rotation_offset, &transmission_roughness_offset);
// get Disney principled parameters
@@ -98,7 +98,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
float sheen = stack_load_float(stack, sheen_offset);
float sheen_tint = stack_load_float(stack, sheen_tint_offset);
float clearcoat = stack_load_float(stack, clearcoat_offset);
- float clearcoat_gloss = stack_load_float(stack, clearcoat_gloss_offset);
+ float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset);
float transmission = stack_load_float(stack, transmission_offset);
float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset);
float transmission_roughness = stack_load_float(stack, transmission_roughness_offset);
@@ -141,8 +141,8 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
float3 weight = sd->svm_closure_weight * mix_weight;
#ifdef __SUBSURFACE__
- float3 albedo = subsurface_color * subsurface + base_color * (1.0f - subsurface);
- float3 subsurf_weight = weight * albedo * diffuse_weight;
+ float3 mixed_ss_base_color = subsurface_color * subsurface + base_color * (1.0f - subsurface);
+ float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
float subsurf_sample_weight = fabsf(average(subsurf_weight));
/* disable in case of diffuse ancestor, can't see it well then and
@@ -154,11 +154,11 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
/* need to set the base color in this case such that the
* rays get the correctly mixed color after transmitting
* the object */
- base_color = albedo;
+ base_color = mixed_ss_base_color;
}
/* diffuse */
- if(fabsf(average(base_color)) > CLOSURE_WEIGHT_CUTOFF) {
+ if(fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
if(subsurface < CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
float3 diff_weight = weight * base_color * diffuse_weight;
@@ -186,7 +186,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bssrdf->sample_weight = subsurf_sample_weight;
bssrdf->radius = radius.x;
bssrdf->texture_blur = texture_blur;
- bssrdf->albedo = albedo.x;
+ bssrdf->albedo = subsurface_color.x;
bssrdf->sharpness = sharpness;
bssrdf->N = N;
bssrdf->roughness = roughness;
@@ -200,7 +200,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bssrdf->sample_weight = subsurf_sample_weight;
bssrdf->radius = radius.y;
bssrdf->texture_blur = texture_blur;
- bssrdf->albedo = albedo.y;
+ bssrdf->albedo = subsurface_color.y;
bssrdf->sharpness = sharpness;
bssrdf->N = N;
bssrdf->roughness = roughness;
@@ -214,7 +214,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bssrdf->sample_weight = subsurf_sample_weight;
bssrdf->radius = radius.z;
bssrdf->texture_blur = texture_blur;
- bssrdf->albedo = albedo.z;
+ bssrdf->albedo = subsurface_color.z;
bssrdf->sharpness = sharpness;
bssrdf->N = N;
bssrdf->roughness = roughness;
@@ -292,9 +292,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
/* setup bsdf */
if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */
- sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf);
+ sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
else /* use multi-scatter GGX */
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf);
+ sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
}
}
#ifdef __CAUSTICS_TRICKS__
@@ -332,7 +332,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bsdf->extra->cspec0 = cspec0;
/* setup bsdf */
- sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf);
+ sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
}
}
@@ -377,7 +377,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bsdf->extra->cspec0 = cspec0;
/* setup bsdf */
- sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf);
+ sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
}
}
}
@@ -398,14 +398,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bsdf->ior = 1.5f;
bsdf->extra = extra;
- bsdf->alpha_x = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss;
- bsdf->alpha_y = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss;
+ bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness;
+ bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness;
bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
bsdf->extra->clearcoat = clearcoat;
/* setup bsdf */
- sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf);
+ sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
}
}
#ifdef __CAUSTICS_TRICKS__
@@ -725,6 +725,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
HairBsdf *bsdf = (HairBsdf*)bsdf_alloc(sd, sizeof(HairBsdf), weight);
if(bsdf) {
+ bsdf->N = N;
bsdf->roughness1 = param1;
bsdf->roughness2 = param2;
bsdf->offset = -stack_load_float(stack, data_node.z);