diff options
author | Campbell Barton <ideasman42@gmail.com> | 2018-11-29 04:55:58 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2018-11-29 04:55:58 +0300 |
commit | 9893fee4e6ce5949c8d3274b2746855b3095258f (patch) | |
tree | 973d8b46af8d83705ecc97c21f12bbb1799af2a3 /intern/cycles/kernel | |
parent | 535984a848865c07a39610b32a53432d1a078c59 (diff) | |
parent | 140f2209b61d637411cfbc22c755703c6220278f (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/bvh.h | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_globals.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_passes.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 14 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path_subsurface.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path_surface.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_profiling.h | 40 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_shader.h | 38 |
9 files changed, 116 insertions, 0 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 92cb66bdec9..d4145225b77 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -110,6 +110,7 @@ set(SRC_HEADERS kernel_path_surface.h kernel_path_subsurface.h kernel_path_volume.h + kernel_profiling.h kernel_projection.h kernel_queues.h kernel_random.h diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 6708a3efac1..284b1e9208c 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -186,6 +186,8 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, float difl, float extmax) { + PROFILING_INIT(kg, PROFILING_INTERSECT); + if(!scene_intersect_valid(&ray)) { return false; } @@ -248,6 +250,8 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, uint *lcg_state, int max_hits) { + PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL); + if(!scene_intersect_valid(&ray)) { return false; } @@ -327,6 +331,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, uint max_hits, uint *num_hits) { + PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL); + if(!scene_intersect_valid(ray)) { return false; } @@ -407,6 +413,8 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, Intersection *isect, const uint visibility) { + PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME); + if(!scene_intersect_valid(ray)) { return false; } @@ -438,6 +446,8 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, const uint max_hits, const uint visibility) { + PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL); + if(!scene_intersect_valid(ray)) { return false; } diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 37402f42863..59f1e252d21 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -19,6 +19,8 @@ #ifndef __KERNEL_GLOBALS_H__ #define __KERNEL_GLOBALS_H__ +#include "kernel/kernel_profiling.h" + #ifdef __KERNEL_CPU__ # include "util/util_vector.h" # include "util/util_map.h" @@ -82,6 +84,8 @@ typedef struct KernelGlobals { int2 global_size; int2 global_id; + + ProfilingState profiler; } KernelGlobals; #endif /* __KERNEL_CPU__ */ diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 1f5929e4938..08e9db05c39 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -376,6 +376,9 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg, int sample, PathRadiance *L) { + PROFILING_INIT(kg, PROFILING_WRITE_RESULT); + PROFILING_OBJECT(PRIM_NONE); + float alpha; float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha); diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index cb1f410b09f..a1fc6028293 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -57,6 +57,8 @@ ccl_device_forceinline bool kernel_path_scene_intersect( Intersection *isect, PathRadiance *L) { + PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT); + uint visibility = path_state_ray_visibility(kg, state); if(path_state_ao_bounce(kg, state)) { @@ -105,6 +107,8 @@ ccl_device_forceinline void kernel_path_lamp_emission( ShaderData *emission_sd, PathRadiance *L) { + PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION); + #ifdef __LAMP_MIS__ if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) { /* ray starting from previous non-transparent bounce */ @@ -172,6 +176,8 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume( ShaderData *emission_sd, PathRadiance *L) { + PROFILING_INIT(kg, PROFILING_VOLUME); + /* Sanitize volume stack. */ if(!hit) { kernel_volume_clean_stack(kg, state->volume_stack); @@ -278,6 +284,8 @@ ccl_device_forceinline bool kernel_path_shader_apply( PathRadiance *L, ccl_global float *buffer) { + PROFILING_INIT(kg, PROFILING_SHADER_APPLY); + #ifdef __SHADOW_TRICKS__ if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) { if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) { @@ -355,6 +363,8 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, float3 throughput, float3 ao_alpha) { + PROFILING_INIT(kg, PROFILING_AO); + /* todo: solve correlation */ float bsdf_u, bsdf_v; @@ -568,6 +578,8 @@ ccl_device_forceinline void kernel_path_integrate( ccl_global float *buffer, ShaderData *emission_sd) { + PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE); + /* Shader data memory used for both volumes and surfaces, saves stack space. */ ShaderData sd; @@ -719,6 +731,8 @@ ccl_device void kernel_path_trace(KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride) { + PROFILING_INIT(kg, PROFILING_RAY_SETUP); + /* buffer offset */ int index = offset + x + y*stride; int pass_stride = kernel_data.film.pass_stride; diff --git a/intern/cycles/kernel/kernel_path_subsurface.h b/intern/cycles/kernel/kernel_path_subsurface.h index ff4a8a9d580..962776f21c1 100644 --- a/intern/cycles/kernel/kernel_path_subsurface.h +++ b/intern/cycles/kernel/kernel_path_subsurface.h @@ -32,6 +32,8 @@ bool kernel_path_subsurface_scatter( ccl_addr_space float3 *throughput, ccl_addr_space SubsurfaceIndirectRays *ss_indirect) { + PROFILING_INIT(kg, PROFILING_SUBSURFACE); + float bssrdf_u, bssrdf_v; path_state_rng_2D(kg, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index c9202ccb16d..0d18a1e8c77 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -217,6 +217,8 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state, PathRadiance *L) { + PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT); + #ifdef __EMISSION__ if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) return; @@ -274,6 +276,8 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg, PathRadianceState *L_state, ccl_addr_space Ray *ray) { + PROFILING_INIT(kg, PROFILING_SURFACE_BOUNCE); + /* no BSDF? we can stop here */ if(sd->flag & SD_BSDF) { /* sample BSDF */ diff --git a/intern/cycles/kernel/kernel_profiling.h b/intern/cycles/kernel/kernel_profiling.h new file mode 100644 index 00000000000..a46d6376473 --- /dev/null +++ b/intern/cycles/kernel/kernel_profiling.h @@ -0,0 +1,40 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KERNEL_PROFILING_H__ +#define __KERNEL_PROFILING_H__ + +#ifdef __KERNEL_CPU__ +# include "util/util_profiling.h" +#endif + +CCL_NAMESPACE_BEGIN + +#ifdef __KERNEL_CPU__ +# define PROFILING_INIT(kg, event) ProfilingHelper profiling_helper(&kg->profiler, event) +# define PROFILING_EVENT(event) profiling_helper.set_event(event) +# define PROFILING_SHADER(shader) if((shader) != SHADER_NONE) { profiling_helper.set_shader((shader) & SHADER_MASK); } +# define PROFILING_OBJECT(object) if((object) != PRIM_NONE) { profiling_helper.set_object(object); } +#else +# define PROFILING_INIT(kg, event) +# define PROFILING_EVENT(event) +# define PROFILING_SHADER(shader) +# define PROFILING_OBJECT(object) +#endif /* __KERNEL_CPU__ */ + +CCL_NAMESPACE_END + +#endif /* __KERNEL_PROFILING_H__ */ diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index af883aa715b..4b2e675bb21 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -54,6 +54,8 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, const Intersection *isect, const Ray *ray) { + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + #ifdef __INSTANCING__ sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object; #endif @@ -147,6 +149,9 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, differential_incoming(&sd->dI, ray->dD); differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); #endif + + PROFILING_SHADER(sd->shader); + PROFILING_OBJECT(sd->object); } /* ShaderData setup from BSSRDF scatter */ @@ -163,6 +168,8 @@ void shader_setup_from_subsurface( const Intersection *isect, const Ray *ray) { + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + const bool backfacing = sd->flag & SD_BACKFACING; /* object, matrices, time, ray_length stay the same */ @@ -233,6 +240,8 @@ void shader_setup_from_subsurface( differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); /* don't modify dP and dI */ # endif + + PROFILING_SHADER(sd->shader); } #endif @@ -249,6 +258,8 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, bool object_space, int lamp) { + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + /* vectors */ sd->P = P; sd->N = Ng; @@ -353,6 +364,9 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, sd->du = differential_zero(); sd->dv = differential_zero(); #endif + + PROFILING_SHADER(sd->shader); + PROFILING_OBJECT(sd->object); } /* ShaderData setup for displacement */ @@ -380,6 +394,8 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray) { + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + /* vectors */ sd->P = ray->D; sd->N = -ray->D; @@ -414,6 +430,9 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat sd->du = differential_zero(); sd->dv = differential_zero(); #endif + + PROFILING_SHADER(sd->shader); + PROFILING_OBJECT(sd->object); } /* ShaderData setup from point inside volume */ @@ -421,6 +440,8 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat #ifdef __VOLUME__ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray) { + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + /* vectors */ sd->P = ray->P; sd->N = -ray->D; @@ -461,6 +482,9 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s /* for NDC coordinates */ sd->ray_P = ray->P; sd->ray_dP = ray->dP; + + PROFILING_SHADER(sd->shader); + PROFILING_OBJECT(sd->object); } #endif /* __VOLUME__ */ @@ -591,6 +615,8 @@ void shader_bsdf_eval(KernelGlobals *kg, float light_pdf, bool use_mis) { + PROFILING_INIT(kg, PROFILING_CLOSURE_EVAL); + bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); #ifdef __BRANCHED_PATH__ @@ -720,6 +746,8 @@ ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg, differential3 *domega_in, float *pdf) { + PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE); + const ShaderClosure *sc = shader_bsdf_pick(sd, &randu); if(sc == NULL) { *pdf = 0.0f; @@ -751,6 +779,8 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval, float3 *omega_in, differential3 *domega_in, float *pdf) { + PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE); + int label; float3 eval; @@ -984,6 +1014,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, int path_flag) { + PROFILING_INIT(kg, PROFILING_SHADER_EVAL); + /* If path is being terminated, we are tracing a shadow ray or evaluating * emission, then we don't need to store closures. The emission and shadow * shader data also do not have a closure array to save GPU memory. */ @@ -1084,6 +1116,8 @@ ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, con ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, BsdfEval *eval, float *pdf) { + PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_EVAL); + bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f); @@ -1093,6 +1127,8 @@ ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *s float randu, float randv, BsdfEval *phase_eval, float3 *omega_in, differential3 *domega_in, float *pdf) { + PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE); + int sampled = 0; if(sd->num_closure > 1) { @@ -1151,6 +1187,8 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData * const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval, float3 *omega_in, differential3 *domega_in, float *pdf) { + PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE); + int label; float3 eval; |