From e17f7af0ce7e045e287b517f775a282a7d7cc8c1 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sat, 16 Mar 2019 16:06:36 +0100 Subject: Cleanup: remove Cycles advanced shading features toggle. It's effectively always enabled, only not on some unsupported OpenCL devices. For testing those it's not useful to disable these features. This is replaced by the more fine grained feature toggles that we have now. --- intern/cycles/kernel/kernel_types.h | 35 +++-------------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 281d9a25047..e0bf9b054ac 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -90,7 +90,6 @@ CCL_NAMESPACE_BEGIN # define __QBVH__ # endif # define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ # define __BRANCHED_PATH__ # ifdef WITH_OSL # define __OSL__ @@ -107,7 +106,6 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_CUDA__ # define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ # define __VOLUME__ # define __VOLUME_SCATTER__ # define __SUBSURFACE__ @@ -132,50 +130,26 @@ CCL_NAMESPACE_BEGIN # define __HAIR__ # else -/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */ +# define __KERNEL_SHADING__ +# define __PRINCIPLED__ +# define __CMJ__ # ifdef __KERNEL_OPENCL_NVIDIA__ -# define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ # define __SUBSURFACE__ -# define __PRINCIPLED__ # define __VOLUME__ # define __VOLUME_SCATTER__ # define __SHADOW_RECORD_ALL__ -# define __CMJ__ # define __BRANCHED_PATH__ # endif /* __KERNEL_OPENCL_NVIDIA__ */ -# ifdef __KERNEL_OPENCL_APPLE__ -# define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ -# define __PRINCIPLED__ -# define __CMJ__ -/* TODO(sergey): Currently experimental section is ignored here, - * this is because megakernel in device_opencl does not support - * custom cflags depending on the scene features. - */ -# endif /* __KERNEL_OPENCL_APPLE__ */ - # ifdef __KERNEL_OPENCL_AMD__ -# define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ # define __SUBSURFACE__ -# define __PRINCIPLED__ # define __VOLUME__ # define __VOLUME_SCATTER__ # define __SHADOW_RECORD_ALL__ -# define __CMJ__ # define __BRANCHED_PATH__ # endif /* __KERNEL_OPENCL_AMD__ */ -# ifdef __KERNEL_OPENCL_INTEL_CPU__ -# define __KERNEL_SHADING__ -# define __KERNEL_ADV_SHADING__ -# define __PRINCIPLED__ -# define __CMJ__ -# endif /* __KERNEL_OPENCL_INTEL_CPU__ */ - # endif /* KERNEL_OPENCL_PREVIEW__ */ #endif /* __KERNEL_OPENCL__ */ @@ -202,9 +176,6 @@ CCL_NAMESPACE_BEGIN # define __TEXTURES__ # define __EXTRA_NODES__ # define __HOLDOUT__ -#endif - -#ifdef __KERNEL_ADV_SHADING__ # define __MULTI_CLOSURE__ # define __TRANSPARENT_SHADOWS__ # define __PASSES__ -- cgit v1.2.3 From 9873005ecd7c21e3a6d371833a64b3ce722a48ea Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sat, 16 Mar 2019 16:45:50 +0100 Subject: Cleanup: simplify kernel features definition. No functional changes, logic here got too complex after many changes over the years. --- intern/cycles/kernel/kernel_compat_opencl.h | 2 +- intern/cycles/kernel/kernel_types.h | 106 +++++++++------------------- 2 files changed, 34 insertions(+), 74 deletions(-) (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 21a95098894..3bf8cdebf4a 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -125,7 +125,7 @@ #define fmodf(x, y) fmod((float)(x), (float)(y)) #define sinhf(x) sinh(((float)(x))) -#ifndef __CL_USE_NATIVE__ +#if !(defined(__KERNEL_OPENCL_AMD__) || defined(__KERNEL_OPENCL_INTEL_CPU__)) # define sinf(x) native_sin(((float)(x))) # define cosf(x) native_cos(((float)(x))) # define tanf(x) native_tan(((float)(x))) diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index e0bf9b054ac..4b1c8e82dfa 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -83,76 +83,6 @@ CCL_NAMESPACE_BEGIN # define SHADER_SORT_LOCAL_SIZE 1 #endif - -/* Device capabilities */ -#ifdef __KERNEL_CPU__ -# ifdef __KERNEL_SSE2__ -# define __QBVH__ -# endif -# define __KERNEL_SHADING__ -# define __BRANCHED_PATH__ -# ifdef WITH_OSL -# define __OSL__ -# endif -# define __PRINCIPLED__ -# define __SUBSURFACE__ -# define __CMJ__ -# define __VOLUME__ -# define __VOLUME_SCATTER__ -# define __SHADOW_RECORD_ALL__ -# define __VOLUME_DECOUPLED__ -# define __VOLUME_RECORD_ALL__ -#endif /* __KERNEL_CPU__ */ - -#ifdef __KERNEL_CUDA__ -# define __KERNEL_SHADING__ -# define __VOLUME__ -# define __VOLUME_SCATTER__ -# define __SUBSURFACE__ -# define __PRINCIPLED__ -# define __SHADOW_RECORD_ALL__ -# define __CMJ__ -# ifndef __SPLIT_KERNEL__ -# define __BRANCHED_PATH__ -# endif -#endif /* __KERNEL_CUDA__ */ - -#ifdef __KERNEL_OPENCL__ - -# if defined(__KERNEL_OPENCL_AMD__) || defined(__KERNEL_OPENCL_INTEL_CPU__) -# define __CL_USE_NATIVE__ -# endif - -/* Preview kernel is used as a small kernel when the optimized kernel is still being compiled. */ -# ifdef __KERNEL_OPENCL_PREVIEW__ -# define __AO__ -# define __PASSES__ -# define __HAIR__ -# else - -# define __KERNEL_SHADING__ -# define __PRINCIPLED__ -# define __CMJ__ - -# ifdef __KERNEL_OPENCL_NVIDIA__ -# define __SUBSURFACE__ -# define __VOLUME__ -# define __VOLUME_SCATTER__ -# define __SHADOW_RECORD_ALL__ -# define __BRANCHED_PATH__ -# endif /* __KERNEL_OPENCL_NVIDIA__ */ - -# ifdef __KERNEL_OPENCL_AMD__ -# define __SUBSURFACE__ -# define __VOLUME__ -# define __VOLUME_SCATTER__ -# define __SHADOW_RECORD_ALL__ -# define __BRANCHED_PATH__ -# endif /* __KERNEL_OPENCL_AMD__ */ - -# endif /* KERNEL_OPENCL_PREVIEW__ */ -#endif /* __KERNEL_OPENCL__ */ - /* Kernel features */ #define __SOBOL__ #define __INSTANCING__ @@ -169,8 +99,12 @@ CCL_NAMESPACE_BEGIN #define __SHADOW_TRICKS__ #define __DENOISING_FEATURES__ #define __SHADER_RAYTRACE__ +#define __AO__ +#define __PASSES__ +#define __HAIR__ -#ifdef __KERNEL_SHADING__ +/* Without these we get an AO render, used by OpenCL preview kernel. */ +#ifndef __KERNEL_AO_PREVIEW__ # define __SVM__ # define __EMISSION__ # define __TEXTURES__ @@ -178,16 +112,42 @@ CCL_NAMESPACE_BEGIN # define __HOLDOUT__ # define __MULTI_CLOSURE__ # define __TRANSPARENT_SHADOWS__ -# define __PASSES__ # define __BACKGROUND_MIS__ # define __LAMP_MIS__ -# define __AO__ # define __CAMERA_MOTION__ # define __OBJECT_MOTION__ # define __HAIR__ # define __BAKING__ +# define __PRINCIPLED__ +# define __SUBSURFACE__ +# define __VOLUME__ +# define __VOLUME_SCATTER__ +# define __CMJ__ +# define __SHADOW_RECORD_ALL__ +# define __BRANCHED_PATH__ #endif +/* Device specific features */ +#ifdef __KERNEL_CPU__ +# ifdef __KERNEL_SSE2__ +# define __QBVH__ +# endif +# ifdef WITH_OSL +# define __OSL__ +# endif +# define __VOLUME_DECOUPLED__ +# define __VOLUME_RECORD_ALL__ +#endif /* __KERNEL_CPU__ */ + +#ifdef __KERNEL_CUDA__ +# ifdef __SPLIT_KERNEL__ +# undef __BRANCHED_PATH__ +# endif +#endif /* __KERNEL_CUDA__ */ + +#ifdef __KERNEL_OPENCL__ +#endif /* __KERNEL_OPENCL__ */ + /* Scene-based selective features compilation. */ #ifdef __NO_CAMERA_MOTION__ # undef __CAMERA_MOTION__ -- cgit v1.2.3 From 7778a1a0a12fc026dc5bcc2c195b9460d1c77b53 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 19 Feb 2019 17:44:58 +0100 Subject: Cycles: optimization for constant background colors. Skip shader evaluation then, as we already do for lights. Less than 1% faster in my tests, but might as well be consistent for both. --- intern/cycles/kernel/kernel_bake.h | 12 +++-- intern/cycles/kernel/kernel_emission.h | 96 ++++++++++++++++++---------------- intern/cycles/kernel/kernel_shader.h | 91 ++++++++++++++++++-------------- 3 files changed, 111 insertions(+), 88 deletions(-) (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 920b10086c5..37c163f2538 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -351,7 +351,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, out = make_float3(roughness, roughness, roughness); } else { - out = shader_emissive_eval(kg, &sd); + out = shader_emissive_eval(&sd); } break; } @@ -475,8 +475,9 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, shader_setup_from_background(kg, &sd, &ray); /* evaluate */ - int flag = 0; /* we can't know which type of BSDF this is for */ - out = shader_eval_background(kg, &sd, &state, flag); + int path_flag = 0; /* we can't know which type of BSDF this is for */ + shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION); + out = shader_background_eval(&sd); break; } default: @@ -554,8 +555,9 @@ ccl_device void kernel_background_evaluate(KernelGlobals *kg, shader_setup_from_background(kg, &sd, &ray); /* evaluate */ - int flag = 0; /* we can't know which type of BSDF this is for */ - float3 color = shader_eval_background(kg, &sd, &state, flag); + int path_flag = 0; /* we can't know which type of BSDF this is for */ + shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION); + float3 color = shader_background_eval(&sd); /* write output */ output[i] += make_float4(color.x, color.y, color.z, 0.0f); diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 302bb047647..9c47d1ca7be 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -29,43 +29,36 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, /* setup shading at emitter */ float3 eval; - int shader_flag = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).flags; - -#ifdef __BACKGROUND_MIS__ - if(ls->type == LIGHT_BACKGROUND) { - Ray ray; - ray.D = ls->D; - ray.P = ls->P; - ray.t = 1.0f; - ray.time = time; - ray.dP = differential3_zero(); - ray.dD = dI; - - shader_setup_from_background(kg, emission_sd, &ray); - - path_state_modify_bounce(state, true); - eval = shader_eval_background(kg, emission_sd, state, 0); - path_state_modify_bounce(state, false); - } - else -#endif - if(shader_flag & SD_HAS_CONSTANT_EMISSION) - { - eval.x = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[0]; - eval.y = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[1]; - eval.z = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[2]; + if(shader_constant_emission_eval(kg, ls->shader, &eval)) { if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) { ls->Ng = -ls->Ng; } } - else - { - shader_setup_from_sample(kg, emission_sd, - ls->P, ls->Ng, I, - ls->shader, ls->object, ls->prim, - ls->u, ls->v, t, time, false, ls->lamp); + else { + /* Setup shader data and call shader_eval_surface once, better + * for GPU coherence and compile times. */ +#ifdef __BACKGROUND_MIS__ + if(ls->type == LIGHT_BACKGROUND) { + Ray ray; + ray.D = ls->D; + ray.P = ls->P; + ray.t = 1.0f; + ray.time = time; + ray.dP = differential3_zero(); + ray.dD = dI; + + shader_setup_from_background(kg, emission_sd, &ray); + } + else +#endif + { + shader_setup_from_sample(kg, emission_sd, + ls->P, ls->Ng, I, + ls->shader, ls->object, ls->prim, + ls->u, ls->v, t, time, false, ls->lamp); - ls->Ng = emission_sd->Ng; + ls->Ng = emission_sd->Ng; + } /* No proper path flag, we're evaluating this for all closures. that's * weak but we'd have to do multiple evaluations otherwise. */ @@ -73,8 +66,16 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, shader_eval_surface(kg, emission_sd, state, PATH_RAY_EMISSION); path_state_modify_bounce(state, false); - /* Evaluate emissive closure. */ - eval = shader_emissive_eval(kg, emission_sd); + /* Evaluate closures. */ +#ifdef __BACKGROUND_MIS__ + if (ls->type == LIGHT_BACKGROUND) { + eval = shader_background_eval(emission_sd); + } + else +#endif + { + eval = shader_emissive_eval(emission_sd); + } } eval *= ls->eval_fac; @@ -201,7 +202,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf) { /* evaluate emissive closure */ - float3 L = shader_emissive_eval(kg, sd); + float3 L = shader_emissive_eval(sd); #ifdef __HAIR__ if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_ALL_TRIANGLE)) @@ -294,7 +295,7 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, #ifdef __BACKGROUND__ int shader = kernel_data.background.surface_shader; - /* use visibility flag to skip lights */ + /* Use visibility flag to skip lights. */ if(shader & SHADER_EXCLUDE_ANY) { if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || ((shader & SHADER_EXCLUDE_GLOSSY) && @@ -305,20 +306,27 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, return make_float3(0.0f, 0.0f, 0.0f); } - /* evaluate background closure */ + + /* Evaluate background shader. */ + float3 L; + if(!shader_constant_emission_eval(kg, shader, &L)) { # ifdef __SPLIT_KERNEL__ - Ray priv_ray = *ray; - shader_setup_from_background(kg, emission_sd, &priv_ray); + Ray priv_ray = *ray; + shader_setup_from_background(kg, emission_sd, &priv_ray); # else - shader_setup_from_background(kg, emission_sd, ray); + shader_setup_from_background(kg, emission_sd, ray); # endif - path_state_modify_bounce(state, true); - float3 L = shader_eval_background(kg, emission_sd, state, state->flag); - path_state_modify_bounce(state, false); + path_state_modify_bounce(state, true); + shader_eval_surface(kg, emission_sd, state, PATH_RAY_EMISSION); + path_state_modify_bounce(state, false); + + L = shader_background_eval(emission_sd); + } + /* Background MIS weights. */ #ifdef __BACKGROUND_MIS__ - /* check if background light exists or if we should skip pdf */ + /* Check if background light exists or if we should skip pdf. */ int res_x = kernel_data.integrator.pdf_background_res_x; if(!(state->flag & PATH_RAY_MIS_SKIP) && res_x) { diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index afb9ff11c10..eff792ec53d 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -984,9 +984,40 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b } #endif /* __SUBSURFACE__ */ +/* Constant emission optimization */ + +ccl_device bool shader_constant_emission_eval(KernelGlobals *kg, int shader, float3 *eval) +{ + int shader_index = shader & SHADER_MASK; + int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags; + + if (shader_flag & SD_HAS_CONSTANT_EMISSION) { + *eval = make_float3( + kernel_tex_fetch(__shaders, shader_index).constant_emission[0], + kernel_tex_fetch(__shaders, shader_index).constant_emission[1], + kernel_tex_fetch(__shaders, shader_index).constant_emission[2]); + + return true; + } + + return false; +} + +/* Background */ + +ccl_device float3 shader_background_eval(ShaderData *sd) +{ + if(sd->flag & SD_EMISSION) { + return sd->closure_emission_background; + } + else { + return make_float3(0.0f, 0.0f, 0.0f); + } +} + /* Emission */ -ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd) +ccl_device float3 shader_emissive_eval(ShaderData *sd) { if(sd->flag & SD_EMISSION) { return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; @@ -1034,20 +1065,32 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, sd->num_closure_left = max_closures; #ifdef __OSL__ - if(kg->osl) - OSLShader::eval_surface(kg, sd, state, path_flag); + if(kg->osl) { + if (sd->object == OBJECT_NONE) { + OSLShader::eval_background(kg, sd, state, path_flag); + } + else { + OSLShader::eval_surface(kg, sd, state, path_flag); + } + } else #endif { #ifdef __SVM__ svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag); #else - DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, - sizeof(DiffuseBsdf), - make_float3(0.8f, 0.8f, 0.8f)); - if(bsdf != NULL) { - bsdf->N = sd->N; - sd->flag |= bsdf_diffuse_setup(bsdf); + if(sd->object == OBJECT_NONE) { + sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f); + sd->flag |= SD_EMISSION; + } + else { + DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, + sizeof(DiffuseBsdf), + make_float3(0.8f, 0.8f, 0.8f)); + if(bsdf != NULL) { + bsdf->N = sd->N; + sd->flag |= bsdf_diffuse_setup(bsdf); + } } #endif } @@ -1057,36 +1100,6 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, } } -/* Background Evaluation */ - -ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, - ccl_addr_space PathState *state, int path_flag) -{ - sd->num_closure = 0; - sd->num_closure_left = 0; - -#ifdef __SVM__ -# ifdef __OSL__ - if(kg->osl) { - OSLShader::eval_background(kg, sd, state, path_flag); - } - else -# endif /* __OSL__ */ - { - svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag); - } - - if(sd->flag & SD_EMISSION) { - return sd->closure_emission_background; - } - else { - return make_float3(0.0f, 0.0f, 0.0f); - } -#else /* __SVM__ */ - return make_float3(0.8f, 0.8f, 0.8f); -#endif /* __SVM__ */ -} - /* Volume */ #ifdef __VOLUME__ -- cgit v1.2.3