diff options
Diffstat (limited to 'intern/cycles/kernel')
59 files changed, 632 insertions, 267 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index bef869f34b4..23e9bd311c4 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -45,6 +45,7 @@ set(SRC kernels/opencl/kernel_direct_lighting.cl kernels/opencl/kernel_shadow_blocked_ao.cl kernels/opencl/kernel_shadow_blocked_dl.cl + kernels/opencl/kernel_enqueue_inactive.cl kernels/opencl/kernel_next_iteration_setup.cl kernels/opencl/kernel_indirect_subsurface.cl kernels/opencl/kernel_buffer_update.cl @@ -121,6 +122,10 @@ set(SRC_KERNELS_CUDA_HEADERS kernels/cuda/kernel_config.h ) +set(SRC_KERNELS_OPENCL_HEADERS + kernels/opencl/kernel_split_function.h +) + set(SRC_CLOSURE_HEADERS closure/alloc.h closure/bsdf.h @@ -278,6 +283,7 @@ set(SRC_SPLIT_HEADERS split/kernel_data_init.h split/kernel_direct_lighting.h split/kernel_do_volume.h + split/kernel_enqueue_inactive.h split/kernel_holdout_emission_blurring_pathtermination_ao.h split/kernel_indirect_background.h split/kernel_indirect_subsurface.h @@ -450,6 +456,7 @@ add_library(cycles_kernel ${SRC_HEADERS} ${SRC_KERNELS_CPU_HEADERS} ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_KERNELS_OPENCL_HEADERS} ${SRC_BVH_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_FILTER_HEADERS} @@ -490,9 +497,11 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_sc delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_enqueue_inactive.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split_function.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/filter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index a04c157dc40..86a00d2124d 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -423,6 +423,11 @@ ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b) case CLOSURE_BSDF_HAIR_REFLECTION_ID: case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: return bsdf_hair_merge(a, b); +#ifdef __PRINCIPLED__ + case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: + return bsdf_principled_diffuse_merge(a, b); +#endif #ifdef __VOLUME__ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: return volume_henyey_greenstein_merge(a, b); diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index 30cc8b90330..b12e248f0a3 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -288,12 +288,16 @@ ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + bsdf->alpha_x = saturate(bsdf->alpha_x); bsdf->alpha_y = bsdf->alpha_x; @@ -302,12 +306,16 @@ ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F; + bsdf->alpha_x = saturate(bsdf->alpha_x); bsdf->alpha_y = bsdf->alpha_x; @@ -343,12 +351,16 @@ ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + bsdf->alpha_x = saturate(bsdf->alpha_x); bsdf->alpha_y = saturate(bsdf->alpha_y); diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h index b07b515c405..22d0092093a 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h @@ -245,35 +245,69 @@ ccl_device_forceinline float mf_ggx_albedo(float r) return saturate(albedo); } +ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior) +{ + if(ior < 1.0f) { + ior = 1.0f/ior; + } + a = saturate(a); + ior = clamp(ior, 1.0f, 3.0f); + float I_1 = 0.0476898f*expf(-0.978352f*(ior-0.65657f)*(ior-0.65657f)) - 0.033756f*ior + 0.993261f; + float R_1 = (((0.116991f*a - 0.270369f)*a + 0.0501366f)*a - 0.00411511f)*a + 1.00008f; + float I_2 = (((-2.08704f*ior + 26.3298f)*ior - 127.906f)*ior + 292.958f)*ior - 287.946f + 199.803f/(ior*ior) - 101.668f/(ior*ior*ior); + float R_2 = ((((5.3725f*a -24.9307f)*a + 22.7437f)*a - 3.40751f)*a + 0.0986325f)*a + 0.00493504f; + + return saturate(1.0f + I_2*R_2*0.0019127f - (1.0f - I_1)*(1.0f - R_1)*9.3205f); +} + ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha) { float D = D_ggx(normalize(wi+wo), alpha); float lambda = mf_lambda(wi, make_float2(alpha, alpha)); + float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f); + + float multiscatter = wo.z * M_1_PI_F; + float albedo = mf_ggx_albedo(alpha); - return 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f) + (1.0f - albedo) * wo.z; + return albedo*singlescatter + (1.0f - albedo)*multiscatter; } ccl_device_forceinline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha) { - return 0.25f * D_ggx_aniso(normalize(wi+wo), alpha) / ((1.0f + mf_lambda(wi, alpha)) * wi.z) + (1.0f - mf_ggx_albedo(sqrtf(alpha.x*alpha.y))) * wo.z; + float D = D_ggx_aniso(normalize(wi+wo), alpha); + float lambda = mf_lambda(wi, alpha); + float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f); + + float multiscatter = wo.z * M_1_PI_F; + + float albedo = mf_ggx_albedo(sqrtf(alpha.x*alpha.y)); + return albedo*singlescatter + (1.0f - albedo)*multiscatter; } ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta) { - float3 wh; - float fresnel; - if(wi.z*wo.z > 0.0f) { - wh = normalize(wi + wo); - fresnel = fresnel_dielectric_cos(dot(wi, wh), eta); - } - else { - wh = normalize(wi + wo*eta); - fresnel = 1.0f - fresnel_dielectric_cos(dot(wi, wh), eta); - } + bool reflective = (wi.z*wo.z > 0.0f); + + float wh_len; + float3 wh = normalize_len(wi + (reflective? wo : (wo*eta)), &wh_len); if(wh.z < 0.0f) wh = -wh; float3 r_wi = (wi.z < 0.0f)? -wi: wi; - return fresnel * max(0.0f, dot(r_wi, wh)) * D_ggx(wh, alpha) / ((1.0f + mf_lambda(r_wi, make_float2(alpha, alpha))) * r_wi.z) + fabsf(wo.z); + float lambda = mf_lambda(r_wi, make_float2(alpha, alpha)); + float D = D_ggx(wh, alpha); + float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta); + + float multiscatter = fabsf(wo.z * M_1_PI_F); + if(reflective) { + float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f); + float albedo = mf_ggx_albedo(alpha); + return fresnel * (albedo*singlescatter + (1.0f - albedo)*multiscatter); + } + else { + float singlescatter = fabsf(dot(r_wi, wh)*dot(wo, wh) * D * eta*eta / max((1.0f + lambda) * r_wi.z * wh_len*wh_len, 1e-7f)); + float albedo = mf_ggx_transmission_albedo(alpha, eta); + return (1.0f - fresnel) * (albedo*singlescatter + (1.0f - albedo)*multiscatter); + } } /* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */ @@ -326,13 +360,17 @@ ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf) return bsdf_microfacet_multi_ggx_common_setup(bsdf); } -ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { if(is_zero(bsdf->T)) bsdf->T = make_float3(1.0f, 0.0f, 0.0f); bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } @@ -345,12 +383,16 @@ ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf) return bsdf_microfacet_multi_ggx_common_setup(bsdf); } -ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->alpha_y = bsdf->alpha_x; bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } @@ -455,7 +497,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; } -ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); bsdf->alpha_y = bsdf->alpha_x; @@ -469,6 +511,10 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsd bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; } diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h index 215c32e1ffb..f8ca64293b0 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h @@ -58,6 +58,14 @@ ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL; } +ccl_device bool bsdf_principled_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b) +{ + const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf*)a; + const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf*)b; + + return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness); +} + ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h index 53d703de143..6226ed2c2ef 100644 --- a/intern/cycles/kernel/filter/filter_features.h +++ b/intern/cycles/kernel/filter/filter_features.h @@ -78,16 +78,10 @@ ccl_device_inline void filter_calculate_scale(float *scale) scale[3] = scale[4] = scale[5] = 1.0f/max(sqrtf(scale[3]), 0.01f); } -ccl_device_inline float3 filter_get_pixel_color(const ccl_global float *ccl_restrict buffer, - int pass_stride) +ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer, + int pass_stride) { - return make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2)); -} - -ccl_device_inline float filter_get_pixel_variance(const ccl_global float *ccl_restrict buffer, - int pass_stride) -{ - return average(make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2))); + return make_float3(ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10)); } ccl_device_inline void design_row_add(float *design_row, diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h index 5cb4038bc33..3e752bce68f 100644 --- a/intern/cycles/kernel/filter/filter_nlm_cpu.h +++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h @@ -101,7 +101,7 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict d for(int x = rect.x; x < rect.z; x++) { const int low = max(rect.x, x-f); const int high = min(rect.z, x+f+1); - out_image[y*w+x] = expf(-max(out_image[y*w+x] * (1.0f/(high - low)), 0.0f)); + out_image[y*w+x] = fast_expf(-max(out_image[y*w+x] * (1.0f/(high - low)), 0.0f)); } } } @@ -133,8 +133,6 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, const float *ccl_restrict difference_image, const float *ccl_restrict buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, @@ -167,7 +165,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, dx, dy, w, h, pass_stride, buffer, - color_pass, variance_pass, l_transform, l_rank, weight, l_XtWX, l_XtWY, 0); } diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h index 078c5f56763..2c5ac807051 100644 --- a/intern/cycles/kernel/filter/filter_nlm_gpu.h +++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h @@ -66,7 +66,7 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y, sum += difference_image[y*w+x1]; } sum *= 1.0f/(high-low); - out_image[y*w+x] = expf(-max(sum, 0.0f)); + out_image[y*w+x] = fast_expf(-max(sum, 0.0f)); } ccl_device_inline void kernel_filter_nlm_update_output(int x, int y, @@ -97,8 +97,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy, int dx, int dy, const ccl_global float *ccl_restrict difference_image, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, ccl_global float *XtWX, @@ -130,7 +128,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy, dx, dy, w, h, pass_stride, buffer, - color_pass, variance_pass, transform, rank, weight, XtWX, XtWY, localIdx); diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h index 82cc36625ec..d5ae1b73927 100644 --- a/intern/cycles/kernel/filter/filter_prefilter.h +++ b/intern/cycles/kernel/filter/filter_prefilter.h @@ -142,13 +142,22 @@ ccl_device void kernel_filter_detect_outliers(int x, int y, float ref = 2.0f*values[(int)(n*0.75f)]; float fac = 1.0f; if(L > ref) { - /* If the pixel is an outlier, negate the depth value to mark it as one. - * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ - depth[idx] = -depth[idx]; - fac = ref/L; - variance[idx ] *= fac*fac; - variance[idx + pass_stride] *= fac*fac; - variance[idx+2*pass_stride] *= fac*fac; + /* The pixel appears to be an outlier. + * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel + * should actually be at the reference value: + * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier. + * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight. + */ + float stddev = sqrtf(average(make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride]))); + if(L - 3*stddev < ref) { + /* The pixel is an outlier, so negate the depth value to mark it as one. + * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ + depth[idx] = -depth[idx]; + fac = ref/L; + variance[idx ] *= fac*fac; + variance[idx + pass_stride] *= fac*fac; + variance[idx+2*pass_stride] *= fac*fac; + } } out[idx ] = fac*image[idx]; out[idx + pass_stride] = fac*image[idx + pass_stride]; diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h index 4a4c81b7ba3..25a3025056c 100644 --- a/intern/cycles/kernel/filter/filter_reconstruction.h +++ b/intern/cycles/kernel/filter/filter_reconstruction.h @@ -22,8 +22,6 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int w, int h, int pass_stride, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, float weight, @@ -31,38 +29,31 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, ccl_global float3 *XtWY, int localIdx) { + if(weight < 1e-3f) { + return; + } + int p_offset = y *w + x; int q_offset = (y+dy)*w + (x+dx); -#ifdef __KERNEL_CPU__ - const int stride = 1; - (void)storage_stride; - (void)localIdx; - float design_row[DENOISE_FEATURES+1]; -#elif defined(__KERNEL_CUDA__) +#ifdef __KERNEL_GPU__ const int stride = storage_stride; +#else + const int stride = 1; + (void) storage_stride; +#endif + +#ifdef __KERNEL_CUDA__ ccl_local float shared_design_row[(DENOISE_FEATURES+1)*CCL_MAX_LOCAL_SIZE]; ccl_local_param float *design_row = shared_design_row + localIdx*(DENOISE_FEATURES+1); #else - const int stride = storage_stride; float design_row[DENOISE_FEATURES+1]; #endif - float3 p_color = filter_get_pixel_color(color_pass + p_offset, pass_stride); - float3 q_color = filter_get_pixel_color(color_pass + q_offset, pass_stride); + float3 q_color = filter_get_color(buffer + q_offset, pass_stride); - float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride)); - float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride)); - - /* If the pixel was flagged as an outlier during prefiltering, skip it. - * Otherwise, perform the regular confidence interval test unless - * the center pixel is an outlier (in that case, using the confidence - * interval test could result in no pixels being used at all). */ - bool p_outlier = (ccl_get_feature(buffer + p_offset, 0) < 0.0f); - bool q_outlier = (ccl_get_feature(buffer + q_offset, 0) < 0.0f); - bool outside_of_interval = (average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)); - - if(q_outlier || (!p_outlier && outside_of_interval)) { + /* If the pixel was flagged as an outlier during prefiltering, skip it. */ + if(ccl_get_feature(buffer + q_offset, 0) < 0.0f) { return; } @@ -83,13 +74,19 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h, int4 buffer_params, int sample) { -#ifdef __KERNEL_CPU__ - const int stride = 1; - (void)storage_stride; -#else +#ifdef __KERNEL_GPU__ const int stride = storage_stride; +#else + const int stride = 1; + (void) storage_stride; #endif + if(XtWX[0] < 1e-3f) { + /* There is not enough information to determine a denoised result. + * As a fallback, keep the original value of the pixel. */ + return; + } + /* The weighted average of pixel colors (essentially, the NLM-filtered image). * In case the solution of the linear model fails due to numerical issues, * fall back to this value. */ @@ -102,6 +99,9 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h, final_color = mean_color; } + /* Clamp pixel value to positive values. */ + final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f)); + ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z; final_color *= sample; if(buffer_params.w) { @@ -114,6 +114,4 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h, combined_buffer[2] = final_color.z; } -#undef STORAGE_TYPE - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 06728415c15..175bd6b9737 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -621,25 +621,43 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance { float fac = 1.0f/num_samples; +#ifdef __SPLIT_KERNEL__ +# define safe_float3_add(f, v) \ + do { \ + ccl_global float *p = (ccl_global float*)(&(f)); \ + atomic_add_and_fetch_float(p+0, (v).x); \ + atomic_add_and_fetch_float(p+1, (v).y); \ + atomic_add_and_fetch_float(p+2, (v).z); \ + } while(0) +#else +# define safe_float3_add(f, v) (f) += (v) +#endif /* __SPLIT_KERNEL__ */ + #ifdef __PASSES__ - L->direct_diffuse += L_sample->direct_diffuse*fac; - L->direct_glossy += L_sample->direct_glossy*fac; - L->direct_transmission += L_sample->direct_transmission*fac; - L->direct_subsurface += L_sample->direct_subsurface*fac; - L->direct_scatter += L_sample->direct_scatter*fac; - - L->indirect_diffuse += L_sample->indirect_diffuse*fac; - L->indirect_glossy += L_sample->indirect_glossy*fac; - L->indirect_transmission += L_sample->indirect_transmission*fac; - L->indirect_subsurface += L_sample->indirect_subsurface*fac; - L->indirect_scatter += L_sample->indirect_scatter*fac; - - L->background += L_sample->background*fac; - L->ao += L_sample->ao*fac; - L->shadow += L_sample->shadow*fac; + safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse*fac); + safe_float3_add(L->direct_glossy, L_sample->direct_glossy*fac); + safe_float3_add(L->direct_transmission, L_sample->direct_transmission*fac); + safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface*fac); + safe_float3_add(L->direct_scatter, L_sample->direct_scatter*fac); + + safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse*fac); + safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy*fac); + safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission*fac); + safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface*fac); + safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter*fac); + + safe_float3_add(L->background, L_sample->background*fac); + safe_float3_add(L->ao, L_sample->ao*fac); + safe_float3_add(L->shadow, L_sample->shadow*fac); +# ifdef __SPLIT_KERNEL__ + atomic_add_and_fetch_float(&L->mist, L_sample->mist*fac); +# else L->mist += L_sample->mist*fac; -#endif - L->emission += L_sample->emission * fac; +# endif /* __SPLIT_KERNEL__ */ +#endif /* __PASSES__ */ + safe_float3_add(L->emission, L_sample->emission*fac); + +#undef safe_float3_add } #ifdef __SHADOW_TRICKS__ diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 9d52834ffcc..9cd7ffb181d 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -142,7 +142,7 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_glob ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, ShaderData *sd, - ccl_global PathState *state, + ccl_addr_space PathState *state, PathRadiance *L) { #ifdef __DENOISING_FEATURES__ diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index 0fa77d9e8bd..5d92fd12201 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -139,9 +139,11 @@ ccl_device_inline void path_state_next(KernelGlobals *kg, ccl_addr_space PathSta /* random number generator next bounce */ state->rng_offset += PRNG_BOUNCE_NUM; +#ifdef __DENOISING_FEATURES__ if((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) { state->flag &= ~PATH_RAY_STORE_SHADOW_INFO; } +#endif } ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *state) diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h index 96bc636d5ac..e32d4bbbc1b 100644 --- a/intern/cycles/kernel/kernel_queues.h +++ b/intern/cycles/kernel/kernel_queues.h @@ -128,6 +128,21 @@ ccl_device unsigned int get_global_queue_index( return my_gqidx; } +ccl_device int dequeue_ray_index( + int queue_number, + ccl_global int *queues, + int queue_size, + ccl_global int *queue_index) +{ + int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1; + + if(index < 0) { + return QUEUE_EMPTY_SLOT; + } + + return queues[index + queue_number * queue_size]; +} + CCL_NAMESPACE_END #endif // __KERNEL_QUEUE_H__ diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 1026cde7b29..6475d4b66fd 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -418,7 +418,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup( } /* subsurface scattering step, from a point on the surface to another nearby point on the same object */ -ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_global PathState *state, +ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index dbeaffdfb24..34affab1b9d 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -135,7 +135,7 @@ CCL_NAMESPACE_BEGIN * this is because megakernel in device_opencl does not support * custom cflags depending on the scene features. */ -# endif /* __KERNEL_OPENCL_NVIDIA__ */ +# endif /* __KERNEL_OPENCL_APPLE__ */ # ifdef __KERNEL_OPENCL_AMD__ # define __CL_USE_NATIVE__ @@ -236,6 +236,9 @@ CCL_NAMESPACE_BEGIN #ifdef __NO_PRINCIPLED__ # undef __PRINCIPLED__ #endif +#ifdef __NO_DENOISING__ +# undef __DENOISING_FEATURES__ +#endif /* Random Numbers */ @@ -1387,6 +1390,8 @@ enum QueueNumber { #ifdef __BRANCHED_PATH__ /* All rays moving to next iteration of the indirect loop for light */ QUEUE_LIGHT_INDIRECT_ITER, + /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */ + QUEUE_INACTIVE_RAYS, # ifdef __VOLUME__ /* All rays moving to next iteration of the indirect loop for volumes */ QUEUE_VOLUME_INDIRECT_ITER, @@ -1429,6 +1434,9 @@ enum RayState { RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5), RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6), RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT), + + /* Ray is evaluating an iteration of an indirect loop for another thread */ + RAY_BRANCHED_INDIRECT_SHARED = (1 << 7), }; #define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state)) diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 9c0878249d4..1e472aaf51a 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -660,6 +660,7 @@ typedef struct VolumeSegment { * but the entire segment is needed to do always scattering, rather than probabilistically * hitting or missing the volume. if we don't know the transmittance at the end of the * volume we can't generate stratified distance samples up to that transmittance */ +#ifdef __VOLUME_DECOUPLED__ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous) { @@ -829,6 +830,7 @@ ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *s #endif } } +#endif /* __VOLUME_DECOUPLED__ */ /* scattering for homogeneous and heterogeneous volumes, using decoupled ray * marching. diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index ffd34c293fc..2ed713299fd 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -107,8 +107,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, float *difference_image, float *buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index 261176846b1..8dc1a8d583c 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -213,8 +213,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, float *difference_image, float *buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, @@ -229,7 +227,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); #else - kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, color_pass, variance_pass, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride); + kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride); #endif } diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 9895080d328..c8938534fe8 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -85,6 +85,7 @@ DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting) DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive) DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 9b85a864153..d4315ee5ec4 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -53,6 +53,7 @@ # include "kernel/split/kernel_direct_lighting.h" # include "kernel/split/kernel_shadow_blocked_ao.h" # include "kernel/split/kernel_shadow_blocked_dl.h" +# include "kernel/split/kernel_enqueue_inactive.h" # include "kernel/split/kernel_next_iteration_setup.h" # include "kernel/split/kernel_indirect_subsurface.h" # include "kernel/split/kernel_buffer_update.h" @@ -230,6 +231,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index 2edbff08087..009c3fde9d5 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -207,8 +207,6 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_nlm_construct_gramian(int dx, int dy, const float *ccl_restrict difference_image, const float *ccl_restrict buffer, - float *color_pass, - float *variance_pass, float const* __restrict__ transform, int *rank, float *XtWX, @@ -225,7 +223,6 @@ kernel_cuda_filter_nlm_construct_gramian(int dx, int dy, dx, dy, difference_image, buffer, - color_pass, variance_pass, transform, rank, XtWX, XtWY, rect, filter_rect, diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index 8b7f1a8d405..628891b1458 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -39,6 +39,7 @@ #include "kernel/split/kernel_direct_lighting.h" #include "kernel/split/kernel_shadow_blocked_ao.h" #include "kernel/split/kernel_shadow_blocked_dl.h" +#include "kernel/split/kernel_enqueue_inactive.h" #include "kernel/split/kernel_next_iteration_setup.h" #include "kernel/split/kernel_indirect_subsurface.h" #include "kernel/split/kernel_buffer_update.h" @@ -118,6 +119,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index 0462ca6f9bc..ba53ba4b26f 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -207,8 +207,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx, int dy, const ccl_global float *ccl_restrict difference_image, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, ccl_global float *XtWX, @@ -227,7 +225,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx, dx, dy, difference_image, buffer, - color_pass, variance_pass, transform, rank, XtWX, XtWY, rect, filter_rect, diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl index db65c91baf7..dcea2630aef 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_buffer_update.h" -__kernel void kernel_ocl_path_trace_buffer_update( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME buffer_update +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl index eb34f750881..ed64ae01aae 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_direct_lighting.h" -__kernel void kernel_ocl_path_trace_direct_lighting( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME direct_lighting +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl index 83ef5f5f3f2..8afaa686e28 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_do_volume.h" -__kernel void kernel_ocl_path_trace_do_volume( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_do_volume((KernelGlobals*)kg); -} +#define KERNEL_NAME do_volume +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl new file mode 100644 index 00000000000..e68d4104a91 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_enqueue_inactive.h" + +#define KERNEL_NAME enqueue_inactive +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl index d071b39aa6f..9e1e57beba6 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -18,12 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" -__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local BackgroundAOLocals locals; - kernel_holdout_emission_blurring_pathtermination_ao( - (KernelGlobals*)kg, - &locals); -} +#define KERNEL_NAME holdout_emission_blurring_pathtermination_ao +#define LOCALS_TYPE BackgroundAOLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl index 8c213ff5cb2..192d01444ba 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_indirect_background.h" -__kernel void kernel_ocl_path_trace_indirect_background( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_indirect_background((KernelGlobals*)kg); -} +#define KERNEL_NAME indirect_background +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl index 998ebc4c0c3..84938b889e5 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_indirect_subsurface.h" -__kernel void kernel_ocl_path_trace_indirect_subsurface( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_indirect_subsurface((KernelGlobals*)kg); -} +#define KERNEL_NAME indirect_subsurface +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl index 822d2287715..c314dc96c33 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_lamp_emission.h" -__kernel void kernel_ocl_path_trace_lamp_emission( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_lamp_emission((KernelGlobals*)kg); -} +#define KERNEL_NAME lamp_emission +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl index 6d207253a40..8b1332bf013 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_next_iteration_setup.h" -__kernel void kernel_ocl_path_trace_next_iteration_setup( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME next_iteration_setup +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl index bd9aa9538c8..fa210e747c0 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_path_init.h" -__kernel void kernel_ocl_path_trace_path_init( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_path_init((KernelGlobals*)kg); -} +#define KERNEL_NAME path_init +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl index 9be154e3d75..68ee6f1d536 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_queue_enqueue.h" -__kernel void kernel_ocl_path_trace_queue_enqueue( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local QueueEnqueueLocals locals; - kernel_queue_enqueue((KernelGlobals*)kg, &locals); -} +#define KERNEL_NAME queue_enqueue +#define LOCALS_TYPE QueueEnqueueLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl index eb4fb4d153a..10d09377ba9 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_scene_intersect.h" -__kernel void kernel_ocl_path_trace_scene_intersect( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_scene_intersect((KernelGlobals*)kg); -} +#define KERNEL_NAME scene_intersect +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl index 5bfb31b193a..40eaa561863 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shader_eval.h" -__kernel void kernel_ocl_path_trace_shader_eval( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shader_eval((KernelGlobals*)kg); -} +#define KERNEL_NAME shader_eval +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl index 38bfd04ad4c..8c36100f762 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shader_setup.h" -__kernel void kernel_ocl_path_trace_shader_setup( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_shader_setup((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME shader_setup +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl index 6f722915d45..bcacaa4a054 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl @@ -19,10 +19,9 @@ #include "kernel/split/kernel_shader_sort.h" __attribute__((reqd_work_group_size(64, 1, 1))) -__kernel void kernel_ocl_path_trace_shader_sort( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local ShaderSortLocals locals; - kernel_shader_sort((KernelGlobals*)kg, &locals); -} +#define KERNEL_NAME shader_sort +#define LOCALS_TYPE ShaderSortLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl index 6a8ef81b32a..8de250a375c 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shadow_blocked_ao.h" -__kernel void kernel_ocl_path_trace_shadow_blocked_ao( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shadow_blocked_ao((KernelGlobals*)kg); -} +#define KERNEL_NAME shadow_blocked_ao +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl index b255cc5ef8b..29da77022ed 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shadow_blocked_dl.h" -__kernel void kernel_ocl_path_trace_shadow_blocked_dl( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shadow_blocked_dl((KernelGlobals*)kg); -} +#define KERNEL_NAME shadow_blocked_dl +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl index 8de82db7afe..651addb02f4 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl @@ -31,6 +31,7 @@ #include "kernel/kernels/opencl/kernel_direct_lighting.cl" #include "kernel/kernels/opencl/kernel_shadow_blocked_ao.cl" #include "kernel/kernels/opencl/kernel_shadow_blocked_dl.cl" +#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl" #include "kernel/kernels/opencl/kernel_next_iteration_setup.cl" #include "kernel/kernels/opencl/kernel_indirect_subsurface.cl" #include "kernel/kernels/opencl/kernel_buffer_update.cl" diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h new file mode 100644 index 00000000000..f1e914a70d4 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h @@ -0,0 +1,72 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define KERNEL_NAME_JOIN(a, b) a ## _ ## b +#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b) + +__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)( + ccl_global char *kg_global, + ccl_constant KernelData *data, + + ccl_global void *split_data_buffer, + ccl_global char *ray_state, + ccl_global uint *rng_state, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "kernel/kernel_textures.h" + + ccl_global int *queue_index, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pools, + ccl_global float *buffer + ) +{ +#ifdef LOCALS_TYPE + ccl_local LOCALS_TYPE locals; +#endif + + KernelGlobals *kg = (KernelGlobals*)kg_global; + + if(ccl_local_id(0) + ccl_local_id(1) == 0) { + kg->data = data; + + kernel_split_params.rng_state = rng_state; + kernel_split_params.queue_index = queue_index; + kernel_split_params.use_queues_flag = use_queues_flag; + kernel_split_params.work_pools = work_pools; + kernel_split_params.buffer = buffer; + + split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state); + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "kernel/kernel_textures.h" + } + + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + KERNEL_NAME_EVAL(kernel, KERNEL_NAME)( + kg +#ifdef LOCALS_TYPE + , &locals +#endif + ); +} + +#undef KERNEL_NAME_JOIN +#undef KERNEL_NAME_EVAL + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl index 99b74a1802b..2b3be38df84 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_subsurface_scatter.h" -__kernel void kernel_ocl_path_trace_subsurface_scatter( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_subsurface_scatter((KernelGlobals*)kg); -} +#define KERNEL_NAME subsurface_scatter +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index 188c3960a5f..27a96720c1e 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -191,7 +191,7 @@ class PrincipledBSSRDFClosure : public CBSSRDFClosure { public: void setup(ShaderData *sd, int path_flag, float3 weight) { - alloc(sd, path_flag, weight * albedo, CLOSURE_BSSRDF_PRINCIPLED_ID); + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID); } }; diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 5b66793a05d..14c5c1c3db5 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -156,7 +156,7 @@ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refra BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction) BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, unused), + CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N), CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1), CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2), CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), @@ -164,7 +164,7 @@ BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection) BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, unused), + CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N), CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1), CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2), CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), @@ -191,7 +191,7 @@ BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen) class PrincipledClearcoatClosure : public CBSDFClosure { public: MicrofacetBsdf params; - float clearcoat, clearcoat_gloss; + float clearcoat, clearcoat_roughness; MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) { @@ -202,8 +202,8 @@ public: bsdf->ior = 1.5f; - bsdf->alpha_x = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss; - bsdf->alpha_y = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss; + bsdf->alpha_x = clearcoat_roughness; + bsdf->alpha_y = clearcoat_roughness; bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); bsdf->extra->clearcoat = clearcoat; @@ -217,7 +217,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_ggx_clearcoat_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd) : 0; } }; @@ -226,7 +226,7 @@ ClosureParam *closure_bsdf_principled_clearcoat_params() static ClosureParam params[] = { CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N), CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat), - CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_gloss), + CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness), CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"), CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure) }; @@ -389,7 +389,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_ggx_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_ggx_fresnel_setup(bsdf, sd) : 0; } }; @@ -413,7 +413,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd) : 0; } }; @@ -566,7 +566,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd) : 0; } }; @@ -590,7 +590,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd) : 0; } }; @@ -618,7 +618,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd) : 0; } }; diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl index 57f40789d49..2bb981c3918 100644 --- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl @@ -32,7 +32,7 @@ shader node_principled_bsdf( float Sheen = 0.0, float SheenTint = 0.5, float Clearcoat = 0.0, - float ClearcoatGloss = 1.0, + float ClearcoatRoughness = 0.03, float IOR = 1.45, float Transmission = 0.0, float TransmissionRoughness = 0.0, @@ -57,8 +57,8 @@ shader node_principled_bsdf( if (diffuse_weight > 1e-5) { if (Subsurface > 1e-5) { - color Albedo = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface); - BSDF = bssrdf_principled(Normal, Subsurface * SubsurfaceRadius, 0.0, Albedo, Roughness); + color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface); + BSDF = mixed_ss_base_color * bssrdf_principled(Normal, Subsurface * SubsurfaceRadius, 0.0, SubsurfaceColor, Roughness); } else { BSDF = BaseColor * principled_diffuse(Normal, Roughness); } @@ -114,7 +114,7 @@ shader node_principled_bsdf( } if (Clearcoat > 1e-5) { - BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatGloss); + BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness); } } diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index 289d1091b0a..c91d2918687 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -546,7 +546,7 @@ closure color holdout() BUILTIN; closure color ambient_occlusion() BUILTIN; closure color principled_diffuse(normal N, float roughness) BUILTIN; closure color principled_sheen(normal N) BUILTIN; -closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_gloss) BUILTIN; +closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN; // BSSRDF closure color bssrdf_cubic(normal N, vector radius, float texture_blur, float sharpness) BUILTIN; diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h index dc74a2ada53..e2762a85fc8 100644 --- a/intern/cycles/kernel/split/kernel_branched.h +++ b/intern/cycles/kernel/split/kernel_branched.h @@ -63,12 +63,49 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobal REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT); } +ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index) +{ + ccl_global char *ray_state = kernel_split_state.ray_state; + + int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS, + kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index); + + if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) { + return false; + } + +#define SPLIT_DATA_ENTRY(type, name, num) \ + kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; + SPLIT_DATA_ENTRIES_BRANCHED_SHARED +#undef SPLIT_DATA_ENTRY + + kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0; + kernel_split_state.branched_state[inactive_ray].original_ray = ray_index; + kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false; + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray]; + + path_radiance_init(inactive_L, kernel_data.film.use_light_pass); + inactive_L->direct_throughput = L->direct_throughput; + path_radiance_copy_indirect(inactive_L, L); + + ray_state[inactive_ray] = RAY_REGENERATED; + ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED); + ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)); + + atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count); + + return true; +} + /* bounce off surface and integrate indirect light */ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg, int ray_index, float num_samples_adjust, ShaderData *saved_sd, - bool reset_path_state) + bool reset_path_state, + bool wait_for_shared) { SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; @@ -155,12 +192,25 @@ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter( /* start the indirect path */ *tp *= num_samples_inv; + if(kernel_split_branched_indirect_start_shared(kg, ray_index)) { + continue; + } + return true; } branched_state->next_sample = 0; } + branched_state->next_closure = sd->num_closure; + + if(wait_for_shared) { + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if(branched_state->waiting_on_shared_samples) { + return true; + } + } + return false; } diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index 694b777f429..9f8dd2392d9 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -75,11 +75,30 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K branched_state->next_sample = j+1; branched_state->num_samples = num_samples; + /* Attempting to share too many samples is slow for volumes as it causes us to + * loop here more and have many calls to kernel_volume_integrate which evaluates + * shaders. The many expensive shader evaluations cause the work load to become + * unbalanced and many threads to become idle in this kernel. Limiting the + * number of shared samples here helps quite a lot. + */ + if(branched_state->shared_sample_count < 2) { + if(kernel_split_branched_indirect_start_shared(kg, ray_index)) { + continue; + } + } + return true; } # endif } + branched_state->next_sample = num_samples; + + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if(branched_state->waiting_on_shared_samples) { + return true; + } + kernel_split_branched_path_indirect_loop_end(kg, ray_index); /* todo: avoid this calculation using decoupled ray marching */ diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h new file mode 100644 index 00000000000..496355bbc3a --- /dev/null +++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_enqueue_inactive(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) +{ +#ifdef __BRANCHED_PATH__ + /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */ + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + + char enqueue_flag = 0; + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) { + enqueue_flag = 1; + } + + enqueue_ray_index_local(ray_index, + QUEUE_INACTIVE_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); +#endif /* __BRANCHED_PATH__ */ +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index 71017fed19e..7758e35fd32 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -147,6 +147,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, ray_index, 1.0f, &kernel_split_state.branched_state[ray_index].sd, + true, true)) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); @@ -193,6 +194,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, ray_index, 1.0f, &kernel_split_state.branched_state[ray_index].sd, + true, true)) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h index e2e841f36d3..66ce2dfb6f1 100644 --- a/intern/cycles/kernel/split/kernel_queue_enqueue.h +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -51,7 +51,8 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg, int queue_number = -1; if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) { + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) { queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; } else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index 5dc94caec85..45984ca509b 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -43,11 +43,21 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg) } /* All regenerated rays become active here */ - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { +#ifdef __BRANCHED_PATH__ + if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) { + kernel_split_path_end(kg, ray_index); + } + else +#endif /* __BRANCHED_PATH__ */ + { + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); + } + } - if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) + if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { return; + } #ifdef __KERNEL_DEBUG__ DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index 386fbbc4d09..78e61709b01 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -29,6 +29,14 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) kernel_split_state.queue_data, kernel_split_params.queue_size, 1); } +#ifdef __BRANCHED_PATH__ + /* TODO(mai): move this somewhere else? */ + if(thread_index == 0) { + /* Clear QUEUE_INACTIVE_RAYS before next kernel. */ + kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0; + } +#endif /* __BRANCHED_PATH__ */ + if(ray_index == QUEUE_EMPTY_SLOT) return; diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index 57f070d51e0..08f0124b529 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -56,7 +56,20 @@ ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index) ccl_global char *ray_state = kernel_split_state.ray_state; #ifdef __BRANCHED_PATH__ - if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) { + if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) { + int orig_ray = kernel_split_state.branched_state[ray_index].original_ray; + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray]; + + path_radiance_sum_indirect(L); + path_radiance_accum_sample(orig_ray_L, L, 1); + + atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count); + + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); + } + else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER); } else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) { diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index bb1aca2acbf..4bb2f0d3d80 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -95,6 +95,10 @@ typedef ccl_global struct SplitBranchedState { VolumeStack volume_stack[VOLUME_STACK_SIZE]; # endif /* __VOLUME__ */ #endif /*__SUBSURFACE__ */ + + int shared_sample_count; /* number of branched samples shared with other threads */ + int original_ray; /* index of original ray when sharing branched samples */ + bool waiting_on_shared_samples; } SplitBranchedState; #define SPLIT_DATA_BRANCHED_ENTRIES \ @@ -137,6 +141,25 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_BRANCHED_ENTRIES \ SPLIT_DATA_DEBUG_ENTRIES \ +/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */ +#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \ + SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \ + SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ + SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \ + SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ + SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ + SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ + SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ + SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ + SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ + SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \ + SPLIT_DATA_SUBSURFACE_ENTRIES \ + SPLIT_DATA_VOLUME_ENTRIES \ + SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_DEBUG_ENTRIES \ + /* struct that holds pointers to data in the shared state buffer */ typedef struct SplitData { #define SPLIT_DATA_ENTRY(type, name, num) type *name; diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index 1dffe1b179e..d5083b23f80 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -169,6 +169,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it ray_index, num_samples_inv, bssrdf_sd, + false, false)) { branched_state->ss_next_closure = i; @@ -187,6 +188,13 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it branched_state->ss_next_sample = 0; } + branched_state->ss_next_closure = sd->num_closure; + + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if(branched_state->waiting_on_shared_samples) { + return true; + } + kernel_split_branched_path_indirect_loop_end(kg, ray_index); return false; @@ -257,21 +265,20 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) /* do bssrdf scatter step if we picked a bssrdf closure */ if(sc) { uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb); - float bssrdf_u, bssrdf_v; path_state_rng_2D(kg, - &rng, - state, - PRNG_BSDF_U, - &bssrdf_u, &bssrdf_v); + &rng, + state, + PRNG_BSDF_U, + &bssrdf_u, &bssrdf_v); subsurface_scatter_step(kg, - sd, - state, - state->flag, - sc, - &lcg_state, - bssrdf_u, bssrdf_v, - false); + sd, + state, + state->flag, + sc, + &lcg_state, + bssrdf_u, bssrdf_v, + false); } } else { diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index f04f765686e..9578fcf2687 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -79,13 +79,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #ifdef __PRINCIPLED__ case CLOSURE_BSDF_PRINCIPLED_ID: { uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset, - sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset, eta_offset, transmission_offset, + sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, eta_offset, transmission_offset, anisotropic_rotation_offset, transmission_roughness_offset; uint4 data_node2 = read_node(kg, offset); float3 T = stack_load_float3(stack, data_node.y); decode_node_uchar4(data_node.z, &specular_offset, &roughness_offset, &specular_tint_offset, &anisotropic_offset); - decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_gloss_offset); + decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_roughness_offset); decode_node_uchar4(data_node2.x, &eta_offset, &transmission_offset, &anisotropic_rotation_offset, &transmission_roughness_offset); // get Disney principled parameters @@ -98,7 +98,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float sheen = stack_load_float(stack, sheen_offset); float sheen_tint = stack_load_float(stack, sheen_tint_offset); float clearcoat = stack_load_float(stack, clearcoat_offset); - float clearcoat_gloss = stack_load_float(stack, clearcoat_gloss_offset); + float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset); float transmission = stack_load_float(stack, transmission_offset); float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset); float transmission_roughness = stack_load_float(stack, transmission_roughness_offset); @@ -141,8 +141,8 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float3 weight = sd->svm_closure_weight * mix_weight; #ifdef __SUBSURFACE__ - float3 albedo = subsurface_color * subsurface + base_color * (1.0f - subsurface); - float3 subsurf_weight = weight * albedo * diffuse_weight; + float3 mixed_ss_base_color = subsurface_color * subsurface + base_color * (1.0f - subsurface); + float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight; float subsurf_sample_weight = fabsf(average(subsurf_weight)); /* disable in case of diffuse ancestor, can't see it well then and @@ -154,11 +154,11 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * /* need to set the base color in this case such that the * rays get the correctly mixed color after transmitting * the object */ - base_color = albedo; + base_color = mixed_ss_base_color; } /* diffuse */ - if(fabsf(average(base_color)) > CLOSURE_WEIGHT_CUTOFF) { + if(fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) { if(subsurface < CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) { float3 diff_weight = weight * base_color * diffuse_weight; @@ -186,7 +186,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bssrdf->sample_weight = subsurf_sample_weight; bssrdf->radius = radius.x; bssrdf->texture_blur = texture_blur; - bssrdf->albedo = albedo.x; + bssrdf->albedo = subsurface_color.x; bssrdf->sharpness = sharpness; bssrdf->N = N; bssrdf->roughness = roughness; @@ -200,7 +200,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bssrdf->sample_weight = subsurf_sample_weight; bssrdf->radius = radius.y; bssrdf->texture_blur = texture_blur; - bssrdf->albedo = albedo.y; + bssrdf->albedo = subsurface_color.y; bssrdf->sharpness = sharpness; bssrdf->N = N; bssrdf->roughness = roughness; @@ -214,7 +214,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bssrdf->sample_weight = subsurf_sample_weight; bssrdf->radius = radius.z; bssrdf->texture_blur = texture_blur; - bssrdf->albedo = albedo.z; + bssrdf->albedo = subsurface_color.z; bssrdf->sharpness = sharpness; bssrdf->N = N; bssrdf->roughness = roughness; @@ -292,9 +292,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * /* setup bsdf */ if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */ - sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd); else /* use multi-scatter GGX */ - sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf); + sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd); } } #ifdef __CAUSTICS_TRICKS__ @@ -332,7 +332,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->extra->cspec0 = cspec0; /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); } } @@ -377,7 +377,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->extra->cspec0 = cspec0; /* setup bsdf */ - sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf); + sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); } } } @@ -398,14 +398,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->ior = 1.5f; bsdf->extra = extra; - bsdf->alpha_x = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss; - bsdf->alpha_y = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss; + bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness; + bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness; bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); bsdf->extra->clearcoat = clearcoat; /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); } } #ifdef __CAUSTICS_TRICKS__ @@ -725,6 +725,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * HairBsdf *bsdf = (HairBsdf*)bsdf_alloc(sd, sizeof(HairBsdf), weight); if(bsdf) { + bsdf->N = N; bsdf->roughness1 = param1; bsdf->roughness2 = param2; bsdf->offset = -stack_load_float(stack, data_node.z); |