Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLukas Stockner <lukas.stockner@freenet.de>2018-11-29 04:06:30 +0300
committerLukas Stockner <lukas.stockner@freenet.de>2018-11-29 04:45:24 +0300
commit7fa6f72084b1364cddfbef4f06bbb244210d6967 (patch)
treeae4f682248bd5ba4f716ff60c6dbd67c5684b3d2 /intern/cycles/kernel
parentfb057153b05555606d801d1e942113d40ec15cec (diff)
Cycles: Add sample-based runtime profiler that measures time spent in various parts of the CPU kernel
This commit adds a sample-based profiler that runs during CPU rendering and collects statistics on time spent in different parts of the kernel (ray intersection, shader evaluation etc.) as well as time spent per material and object. The results are currently not exposed in the user interface or per Python yet, to see the stats on the console pass the "--cycles-print-stats" argument to Cycles (e.g. "./blender -- --cycles-print-stats"). Unfortunately, there is no clear way to extend this functionality to CUDA or OpenCL, so it is CPU-only for now. Reviewers: brecht, sergey, swerner Reviewed By: brecht, swerner Differential Revision: https://developer.blender.org/D3892
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/bvh/bvh.h10
-rw-r--r--intern/cycles/kernel/kernel_globals.h4
-rw-r--r--intern/cycles/kernel/kernel_passes.h3
-rw-r--r--intern/cycles/kernel/kernel_path.h14
-rw-r--r--intern/cycles/kernel/kernel_path_subsurface.h2
-rw-r--r--intern/cycles/kernel/kernel_path_surface.h4
-rw-r--r--intern/cycles/kernel/kernel_profiling.h40
-rw-r--r--intern/cycles/kernel/kernel_shader.h38
9 files changed, 116 insertions, 0 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 92cb66bdec9..d4145225b77 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -110,6 +110,7 @@ set(SRC_HEADERS
kernel_path_surface.h
kernel_path_subsurface.h
kernel_path_volume.h
+ kernel_profiling.h
kernel_projection.h
kernel_queues.h
kernel_random.h
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 6708a3efac1..284b1e9208c 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -186,6 +186,8 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
float difl,
float extmax)
{
+ PROFILING_INIT(kg, PROFILING_INTERSECT);
+
if(!scene_intersect_valid(&ray)) {
return false;
}
@@ -248,6 +250,8 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
+ PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
+
if(!scene_intersect_valid(&ray)) {
return false;
}
@@ -327,6 +331,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
uint max_hits,
uint *num_hits)
{
+ PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
+
if(!scene_intersect_valid(ray)) {
return false;
}
@@ -407,6 +413,8 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
+ PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
+
if(!scene_intersect_valid(ray)) {
return false;
}
@@ -438,6 +446,8 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
+ PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL);
+
if(!scene_intersect_valid(ray)) {
return false;
}
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 37402f42863..59f1e252d21 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -19,6 +19,8 @@
#ifndef __KERNEL_GLOBALS_H__
#define __KERNEL_GLOBALS_H__
+#include "kernel/kernel_profiling.h"
+
#ifdef __KERNEL_CPU__
# include "util/util_vector.h"
# include "util/util_map.h"
@@ -82,6 +84,8 @@ typedef struct KernelGlobals {
int2 global_size;
int2 global_id;
+
+ ProfilingState profiler;
} KernelGlobals;
#endif /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 1f5929e4938..08e9db05c39 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -376,6 +376,9 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
int sample,
PathRadiance *L)
{
+ PROFILING_INIT(kg, PROFILING_WRITE_RESULT);
+ PROFILING_OBJECT(PRIM_NONE);
+
float alpha;
float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index cb1f410b09f..a1fc6028293 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -57,6 +57,8 @@ ccl_device_forceinline bool kernel_path_scene_intersect(
Intersection *isect,
PathRadiance *L)
{
+ PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT);
+
uint visibility = path_state_ray_visibility(kg, state);
if(path_state_ao_bounce(kg, state)) {
@@ -105,6 +107,8 @@ ccl_device_forceinline void kernel_path_lamp_emission(
ShaderData *emission_sd,
PathRadiance *L)
{
+ PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION);
+
#ifdef __LAMP_MIS__
if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
@@ -172,6 +176,8 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
ShaderData *emission_sd,
PathRadiance *L)
{
+ PROFILING_INIT(kg, PROFILING_VOLUME);
+
/* Sanitize volume stack. */
if(!hit) {
kernel_volume_clean_stack(kg, state->volume_stack);
@@ -278,6 +284,8 @@ ccl_device_forceinline bool kernel_path_shader_apply(
PathRadiance *L,
ccl_global float *buffer)
{
+ PROFILING_INIT(kg, PROFILING_SHADER_APPLY);
+
#ifdef __SHADOW_TRICKS__
if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) {
@@ -355,6 +363,8 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
float3 throughput,
float3 ao_alpha)
{
+ PROFILING_INIT(kg, PROFILING_AO);
+
/* todo: solve correlation */
float bsdf_u, bsdf_v;
@@ -568,6 +578,8 @@ ccl_device_forceinline void kernel_path_integrate(
ccl_global float *buffer,
ShaderData *emission_sd)
{
+ PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE);
+
/* Shader data memory used for both volumes and surfaces, saves stack space. */
ShaderData sd;
@@ -719,6 +731,8 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
ccl_global float *buffer,
int sample, int x, int y, int offset, int stride)
{
+ PROFILING_INIT(kg, PROFILING_RAY_SETUP);
+
/* buffer offset */
int index = offset + x + y*stride;
int pass_stride = kernel_data.film.pass_stride;
diff --git a/intern/cycles/kernel/kernel_path_subsurface.h b/intern/cycles/kernel/kernel_path_subsurface.h
index ff4a8a9d580..962776f21c1 100644
--- a/intern/cycles/kernel/kernel_path_subsurface.h
+++ b/intern/cycles/kernel/kernel_path_subsurface.h
@@ -32,6 +32,8 @@ bool kernel_path_subsurface_scatter(
ccl_addr_space float3 *throughput,
ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
{
+ PROFILING_INIT(kg, PROFILING_SUBSURFACE);
+
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index c9202ccb16d..0d18a1e8c77 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -217,6 +217,8 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg,
ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
PathRadiance *L)
{
+ PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT);
+
#ifdef __EMISSION__
if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
return;
@@ -274,6 +276,8 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
PathRadianceState *L_state,
ccl_addr_space Ray *ray)
{
+ PROFILING_INIT(kg, PROFILING_SURFACE_BOUNCE);
+
/* no BSDF? we can stop here */
if(sd->flag & SD_BSDF) {
/* sample BSDF */
diff --git a/intern/cycles/kernel/kernel_profiling.h b/intern/cycles/kernel/kernel_profiling.h
new file mode 100644
index 00000000000..a46d6376473
--- /dev/null
+++ b/intern/cycles/kernel/kernel_profiling.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_PROFILING_H__
+#define __KERNEL_PROFILING_H__
+
+#ifdef __KERNEL_CPU__
+# include "util/util_profiling.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __KERNEL_CPU__
+# define PROFILING_INIT(kg, event) ProfilingHelper profiling_helper(&kg->profiler, event)
+# define PROFILING_EVENT(event) profiling_helper.set_event(event)
+# define PROFILING_SHADER(shader) if((shader) != SHADER_NONE) { profiling_helper.set_shader((shader) & SHADER_MASK); }
+# define PROFILING_OBJECT(object) if((object) != PRIM_NONE) { profiling_helper.set_object(object); }
+#else
+# define PROFILING_INIT(kg, event)
+# define PROFILING_EVENT(event)
+# define PROFILING_SHADER(shader)
+# define PROFILING_OBJECT(object)
+#endif /* __KERNEL_CPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __KERNEL_PROFILING_H__ */
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index af883aa715b..4b2e675bb21 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -54,6 +54,8 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
const Intersection *isect,
const Ray *ray)
{
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
#ifdef __INSTANCING__
sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
#endif
@@ -147,6 +149,9 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
differential_incoming(&sd->dI, ray->dD);
differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
#endif
+
+ PROFILING_SHADER(sd->shader);
+ PROFILING_OBJECT(sd->object);
}
/* ShaderData setup from BSSRDF scatter */
@@ -163,6 +168,8 @@ void shader_setup_from_subsurface(
const Intersection *isect,
const Ray *ray)
{
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
const bool backfacing = sd->flag & SD_BACKFACING;
/* object, matrices, time, ray_length stay the same */
@@ -233,6 +240,8 @@ void shader_setup_from_subsurface(
differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
/* don't modify dP and dI */
# endif
+
+ PROFILING_SHADER(sd->shader);
}
#endif
@@ -249,6 +258,8 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
bool object_space,
int lamp)
{
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
/* vectors */
sd->P = P;
sd->N = Ng;
@@ -353,6 +364,9 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
+
+ PROFILING_SHADER(sd->shader);
+ PROFILING_OBJECT(sd->object);
}
/* ShaderData setup for displacement */
@@ -380,6 +394,8 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
{
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
/* vectors */
sd->P = ray->D;
sd->N = -ray->D;
@@ -414,6 +430,9 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
+
+ PROFILING_SHADER(sd->shader);
+ PROFILING_OBJECT(sd->object);
}
/* ShaderData setup from point inside volume */
@@ -421,6 +440,8 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
#ifdef __VOLUME__
ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
{
+ PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
/* vectors */
sd->P = ray->P;
sd->N = -ray->D;
@@ -461,6 +482,9 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
/* for NDC coordinates */
sd->ray_P = ray->P;
sd->ray_dP = ray->dP;
+
+ PROFILING_SHADER(sd->shader);
+ PROFILING_OBJECT(sd->object);
}
#endif /* __VOLUME__ */
@@ -591,6 +615,8 @@ void shader_bsdf_eval(KernelGlobals *kg,
float light_pdf,
bool use_mis)
{
+ PROFILING_INIT(kg, PROFILING_CLOSURE_EVAL);
+
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
#ifdef __BRANCHED_PATH__
@@ -720,6 +746,8 @@ ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
differential3 *domega_in,
float *pdf)
{
+ PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
+
const ShaderClosure *sc = shader_bsdf_pick(sd, &randu);
if(sc == NULL) {
*pdf = 0.0f;
@@ -751,6 +779,8 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
float3 *omega_in, differential3 *domega_in, float *pdf)
{
+ PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
+
int label;
float3 eval;
@@ -984,6 +1014,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
ccl_addr_space PathState *state, int path_flag)
{
+ PROFILING_INIT(kg, PROFILING_SHADER_EVAL);
+
/* If path is being terminated, we are tracing a shadow ray or evaluating
* emission, then we don't need to store closures. The emission and shadow
* shader data also do not have a closure array to save GPU memory. */
@@ -1084,6 +1116,8 @@ ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, con
ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
const float3 omega_in, BsdfEval *eval, float *pdf)
{
+ PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_EVAL);
+
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
_shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
@@ -1093,6 +1127,8 @@ ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *s
float randu, float randv, BsdfEval *phase_eval,
float3 *omega_in, differential3 *domega_in, float *pdf)
{
+ PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
+
int sampled = 0;
if(sd->num_closure > 1) {
@@ -1151,6 +1187,8 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *
const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
float3 *omega_in, differential3 *domega_in, float *pdf)
{
+ PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
+
int label;
float3 eval;