Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLukas Stockner <lukas.stockner@freenet.de>2018-11-29 04:06:30 +0300
committerLukas Stockner <lukas.stockner@freenet.de>2018-11-29 04:45:24 +0300
commit7fa6f72084b1364cddfbef4f06bbb244210d6967 (patch)
treeae4f682248bd5ba4f716ff60c6dbd67c5684b3d2 /intern/cycles/render
parentfb057153b05555606d801d1e942113d40ec15cec (diff)
Cycles: Add sample-based runtime profiler that measures time spent in various parts of the CPU kernel
This commit adds a sample-based profiler that runs during CPU rendering and collects statistics on time spent in different parts of the kernel (ray intersection, shader evaluation etc.) as well as time spent per material and object. The results are currently not exposed in the user interface or per Python yet, to see the stats on the console pass the "--cycles-print-stats" argument to Cycles (e.g. "./blender -- --cycles-print-stats"). Unfortunately, there is no clear way to extend this functionality to CUDA or OpenCL, so it is CPU-only for now. Reviewers: brecht, sergey, swerner Reviewed By: brecht, swerner Differential Revision: https://developer.blender.org/D3892
Diffstat (limited to 'intern/cycles/render')
-rw-r--r--intern/cycles/render/object.cpp57
-rw-r--r--intern/cycles/render/object.h14
-rw-r--r--intern/cycles/render/session.cpp27
-rw-r--r--intern/cycles/render/session.h10
-rw-r--r--intern/cycles/render/stats.cpp194
-rw-r--r--intern/cycles/render/stats.h56
6 files changed, 325 insertions, 33 deletions
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index dc7a1043208..b34d16c438b 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -335,6 +335,11 @@ uint Object::visibility_for_tracing() const {
return trace_visibility;
}
+int Object::get_device_index() const
+{
+ return index;
+}
+
/* Object Manager */
ObjectManager::ObjectManager()
@@ -348,10 +353,9 @@ ObjectManager::~ObjectManager()
}
void ObjectManager::device_update_object_transform(UpdateObjectTransformState *state,
- Object *ob,
- int object_index)
+ Object *ob)
{
- KernelObject& kobject = state->objects[object_index];
+ KernelObject& kobject = state->objects[ob->index];
Transform *object_motion_pass = state->object_motion_pass;
Mesh *mesh = ob->mesh;
@@ -457,13 +461,13 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
tfm_post = tfm_post * itfm;
}
- int motion_pass_offset = object_index*OBJECT_MOTION_PASS_SIZE;
+ int motion_pass_offset = ob->index*OBJECT_MOTION_PASS_SIZE;
object_motion_pass[motion_pass_offset + 0] = tfm_pre;
object_motion_pass[motion_pass_offset + 1] = tfm_post;
}
else if(state->need_motion == Scene::MOTION_BLUR) {
if(ob->use_motion()) {
- kobject.motion_offset = state->motion_offset[object_index];
+ kobject.motion_offset = state->motion_offset[ob->index];
/* Decompose transforms for interpolation. */
DecomposedTransform *decomp = state->object_motion + kobject.motion_offset;
@@ -494,7 +498,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
if(ob->use_holdout) {
flag |= SD_OBJECT_HOLDOUT_MASK;
}
- state->object_flag[object_index] = flag;
+ state->object_flag[ob->index] = flag;
/* Have curves. */
if(mesh->num_curves()) {
@@ -538,7 +542,7 @@ void ObjectManager::device_update_object_transform_task(
for(int i = 0; i < num_objects; ++i) {
const int object_index = start_index + i;
Object *ob = state->scene->objects[object_index];
- device_update_object_transform(state, ob, object_index);
+ device_update_object_transform(state, ob);
}
}
}
@@ -593,10 +597,8 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene,
* need some tweaks to make mid-complex scenes optimal.
*/
if(scene->objects.size() < 64) {
- int object_index = 0;
foreach(Object *ob, scene->objects) {
- device_update_object_transform(&state, ob, object_index);
- object_index++;
+ device_update_object_transform(&state, ob);
if(progress.get_cancel()) {
return;
}
@@ -642,6 +644,12 @@ void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *sc
if(scene->objects.size() == 0)
return;
+ /* Assign object IDs. */
+ int index = 0;
+ foreach(Object *object, scene->objects) {
+ object->index = index++;
+ }
+
/* set object transform matrices, before applying static transforms */
progress.set_status("Updating Objects", "Copying Transformations to device");
device_update_transforms(dscene, scene, progress);
@@ -686,26 +694,25 @@ void ObjectManager::device_update_flags(Device *,
}
}
- int object_index = 0;
foreach(Object *object, scene->objects) {
if(object->mesh->has_volume) {
- object_flag[object_index] |= SD_OBJECT_HAS_VOLUME;
- object_flag[object_index] &= ~SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
+ object_flag[object->index] |= SD_OBJECT_HAS_VOLUME;
+ object_flag[object->index] &= ~SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
foreach(Attribute& attr, object->mesh->attributes.attributes) {
if(attr.element == ATTR_ELEMENT_VOXEL) {
- object_flag[object_index] |= SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
+ object_flag[object->index] |= SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
}
}
}
else {
- object_flag[object_index] &= ~(SD_OBJECT_HAS_VOLUME|SD_OBJECT_HAS_VOLUME_ATTRIBUTES);
+ object_flag[object->index] &= ~(SD_OBJECT_HAS_VOLUME|SD_OBJECT_HAS_VOLUME_ATTRIBUTES);
}
if(object->is_shadow_catcher) {
- object_flag[object_index] |= SD_OBJECT_SHADOW_CATCHER;
+ object_flag[object->index] |= SD_OBJECT_SHADOW_CATCHER;
}
else {
- object_flag[object_index] &= ~SD_OBJECT_SHADOW_CATCHER;
+ object_flag[object->index] &= ~SD_OBJECT_SHADOW_CATCHER;
}
if(bounds_valid) {
@@ -714,7 +721,7 @@ void ObjectManager::device_update_flags(Device *,
continue;
}
if(object->bounds.intersects(volume_object->bounds)) {
- object_flag[object_index] |= SD_OBJECT_INTERSECTS_VOLUME;
+ object_flag[object->index] |= SD_OBJECT_INTERSECTS_VOLUME;
break;
}
}
@@ -723,9 +730,8 @@ void ObjectManager::device_update_flags(Device *,
/* Not really valid, but can't make more reliable in the case
* of bounds not being up to date.
*/
- object_flag[object_index] |= SD_OBJECT_INTERSECTS_VOLUME;
+ object_flag[object->index] |= SD_OBJECT_INTERSECTS_VOLUME;
}
- ++object_index;
}
/* Copy object flag. */
@@ -741,7 +747,6 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc
KernelObject *kobjects = dscene->objects.data();
bool update = false;
- int object_index = 0;
foreach(Object *object, scene->objects) {
Mesh* mesh = object->mesh;
@@ -750,18 +755,16 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc
uint patch_map_offset = 2*(mesh->patch_table_offset + mesh->patch_table->total_size() -
mesh->patch_table->num_nodes * PATCH_NODE_SIZE) - mesh->patch_offset;
- if(kobjects[object_index].patch_map_offset != patch_map_offset) {
- kobjects[object_index].patch_map_offset = patch_map_offset;
+ if(kobjects[object->index].patch_map_offset != patch_map_offset) {
+ kobjects[object->index].patch_map_offset = patch_map_offset;
update = true;
}
}
- if(kobjects[object_index].attribute_map_offset != mesh->attr_map_offset) {
- kobjects[object_index].attribute_map_offset = mesh->attr_map_offset;
+ if(kobjects[object->index].attribute_map_offset != mesh->attr_map_offset) {
+ kobjects[object->index].attribute_map_offset = mesh->attr_map_offset;
update = true;
}
-
- object_index++;
}
if(update) {
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index 87e6e6652ad..134f0bc3577 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -38,6 +38,7 @@ class Progress;
class Scene;
struct Transform;
struct UpdateObjectTransformState;
+class ObjectManager;
/* Object */
@@ -88,6 +89,16 @@ public:
* determined flags which denotes trace-time visibility.
*/
uint visibility_for_tracing() const;
+
+ /* Returns the index that is used in the kernel for this object. */
+ int get_device_index() const;
+
+protected:
+ /* Specifies the position of the object in scene->objects and
+ * in the device vectors. Gets set in device_update. */
+ int index;
+
+ friend class ObjectManager;
};
/* Object Manager */
@@ -123,8 +134,7 @@ public:
protected:
void device_update_object_transform(UpdateObjectTransformState *state,
- Object *ob,
- const int object_index);
+ Object *ob);
void device_update_object_transform_task(UpdateObjectTransformState *state);
bool device_update_object_transform_pop_work(
UpdateObjectTransformState *state,
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index d6ecafa19b7..a8374b662d0 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -250,7 +250,9 @@ void Session::run_gpu()
if(!no_tiles) {
/* update scene */
scoped_timer update_timer;
- update_scene();
+ if(update_scene()) {
+ stats.profiler.reset(scene->shaders.size(), scene->objects.size());
+ }
progress.add_skip_time(update_timer, params.background);
if(!device->error_message().empty())
@@ -585,7 +587,9 @@ void Session::run_cpu()
/* update scene */
scoped_timer update_timer;
- update_scene();
+ if(update_scene()) {
+ stats.profiler.reset(scene->shaders.size(), scene->objects.size());
+ }
progress.add_skip_time(update_timer, params.background);
if(!device->error_message().empty())
@@ -729,6 +733,10 @@ void Session::run()
/* load kernels */
load_kernels();
+ if(params.use_profiling && (params.device.type == DEVICE_CPU)) {
+ stats.profiler.start();
+ }
+
/* session thread loop */
progress.set_status("Waiting for render to start");
@@ -743,6 +751,8 @@ void Session::run()
run_cpu();
}
+ stats.profiler.stop();
+
/* progress update */
if(progress.get_cancel())
progress.set_status("Cancel", progress.get_cancel_message());
@@ -825,7 +835,7 @@ void Session::wait()
session_thread = NULL;
}
-void Session::update_scene()
+bool Session::update_scene()
{
thread_scoped_lock scene_lock(scene->mutex);
@@ -876,7 +886,10 @@ void Session::update_scene()
progress.set_status("Updating Scene");
MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
+
+ return true;
}
+ return false;
}
void Session::update_status_time(bool show_pause, bool show_done)
@@ -1052,6 +1065,14 @@ void Session::device_free()
*/
}
+void Session::collect_statistics(RenderStats *render_stats)
+{
+ scene->collect_statistics(render_stats);
+ if(params.use_profiling && (params.device.type == DEVICE_CPU)) {
+ render_stats->collect_profiling(scene, &stats);
+ }
+}
+
int Session::get_max_closure_count()
{
int max_closures = 0;
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 56a69919a7a..db07c758427 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -20,6 +20,7 @@
#include "render/buffers.h"
#include "device/device.h"
#include "render/shader.h"
+#include "render/stats.h"
#include "render/tile.h"
#include "util/util_progress.h"
@@ -55,6 +56,8 @@ public:
int pixel_size;
int threads;
+ bool use_profiling;
+
bool display_buffer_linear;
bool use_denoising;
@@ -89,6 +92,8 @@ public:
pixel_size = 1;
threads = 0;
+ use_profiling = false;
+
use_denoising = false;
denoising_passes = false;
denoising_radius = 8;
@@ -118,6 +123,7 @@ public:
&& start_resolution == params.start_resolution
&& pixel_size == params.pixel_size
&& threads == params.threads
+ && use_profiling == params.use_profiling
&& display_buffer_linear == params.display_buffer_linear
&& cancel_timeout == params.cancel_timeout
&& reset_timeout == params.reset_timeout
@@ -159,7 +165,7 @@ public:
void set_samples(int samples);
void set_pause(bool pause);
- void update_scene();
+ bool update_scene();
void load_kernels(bool lock_scene=true);
void device_free();
@@ -168,6 +174,8 @@ public:
* (for example, when rendering with unlimited samples). */
float get_progress();
+ void collect_statistics(RenderStats *stats);
+
protected:
struct DelayedReset {
thread_mutex mutex;
diff --git a/intern/cycles/render/stats.cpp b/intern/cycles/render/stats.cpp
index 101d33fcf65..71d9353ad3d 100644
--- a/intern/cycles/render/stats.cpp
+++ b/intern/cycles/render/stats.cpp
@@ -15,6 +15,7 @@
*/
#include "render/stats.h"
+#include "render/object.h"
#include "util/util_algorithm.h"
#include "util/util_foreach.h"
#include "util/util_string.h"
@@ -33,6 +34,16 @@ bool namedSizeEntryComparator(const NamedSizeEntry& a, const NamedSizeEntry& b)
return a.size > b.size;
}
+bool namedTimeSampleEntryComparator(const NamedNestedSampleStats& a, const NamedNestedSampleStats& b)
+{
+ return a.sum_samples > b.sum_samples;
+}
+
+bool namedSampleCountPairComparator(const NamedSampleCountPair& a, const NamedSampleCountPair& b)
+{
+ return a.samples > b.samples;
+}
+
} // namespace
NamedSizeEntry::NamedSizeEntry()
@@ -77,6 +88,113 @@ string NamedSizeStats::full_report(int indent_level)
return result;
}
+/* Named time sample statistics. */
+
+NamedNestedSampleStats::NamedNestedSampleStats()
+ : name(""), self_samples(0), sum_samples(0)
+{}
+
+NamedNestedSampleStats::NamedNestedSampleStats(const string& name, uint64_t samples)
+ : name(name), self_samples(samples), sum_samples(samples)
+{}
+
+NamedNestedSampleStats& NamedNestedSampleStats::add_entry(const string& name_, uint64_t samples_)
+{
+ entries.push_back(NamedNestedSampleStats(name_, samples_));
+ return entries[entries.size()-1];
+}
+
+void NamedNestedSampleStats::update_sum()
+{
+ sum_samples = self_samples;
+ foreach(NamedNestedSampleStats& entry, entries) {
+ entry.update_sum();
+ sum_samples += entry.sum_samples;
+ }
+}
+
+string NamedNestedSampleStats::full_report(int indent_level, uint64_t total_samples)
+{
+ update_sum();
+
+ if(total_samples == 0) {
+ total_samples = sum_samples;
+ }
+
+ const string indent(indent_level * kIndentNumSpaces, ' ');
+
+ const double sum_percent = 100*((double) sum_samples) / total_samples;
+ const double sum_seconds = sum_samples * 0.001;
+ const double self_percent = 100*((double) self_samples) / total_samples;
+ const double self_seconds = self_samples * 0.001;
+ string info = string_printf("%-32s: Total %3.2f%% (%.2fs), Self %3.2f%% (%.2fs)\n",
+ name.c_str(),
+ sum_percent,
+ sum_seconds,
+ self_percent,
+ self_seconds);
+ string result = indent + info;
+
+ sort(entries.begin(), entries.end(), namedTimeSampleEntryComparator);
+ foreach(NamedNestedSampleStats& entry, entries) {
+ result += entry.full_report(indent_level + 1, total_samples);
+ }
+ return result;
+}
+
+/* Named sample count pairs. */
+
+NamedSampleCountPair::NamedSampleCountPair(const ustring& name, uint64_t samples, uint64_t hits)
+ : name(name), samples(samples), hits(hits)
+{}
+
+NamedSampleCountStats::NamedSampleCountStats()
+{}
+
+void NamedSampleCountStats::add(const ustring& name, uint64_t samples, uint64_t hits)
+{
+ entry_map::iterator entry = entries.find(name);
+ if(entry != entries.end()) {
+ entry->second.samples += samples;
+ entry->second.hits += hits;
+ return;
+ }
+ entries.emplace(name, NamedSampleCountPair(name, samples, hits));
+}
+
+string NamedSampleCountStats::full_report(int indent_level)
+{
+ const string indent(indent_level * kIndentNumSpaces, ' ');
+
+ vector<NamedSampleCountPair> sorted_entries;
+ sorted_entries.reserve(entries.size());
+
+ uint64_t total_hits = 0, total_samples = 0;
+ foreach(entry_map::const_reference entry, entries) {
+ const NamedSampleCountPair &pair = entry.second;
+
+ total_hits += pair.hits;
+ total_samples += pair.samples;
+
+ sorted_entries.push_back(pair);
+ }
+ const double avg_samples_per_hit = ((double) total_samples) / total_hits;
+
+ sort(sorted_entries.begin(), sorted_entries.end(), namedSampleCountPairComparator);
+
+ string result = "";
+ foreach(const NamedSampleCountPair& entry, sorted_entries) {
+ const double seconds = entry.samples * 0.001;
+ const double relative = ((double) entry.samples) / (entry.hits * avg_samples_per_hit);
+
+ result += indent + string_printf("%-32s: %.2fs (Relative cost: %.2f)\n",
+ entry.name.c_str(),
+ seconds,
+ relative);
+ }
+ return result;
+}
+
/* Mesh statistics. */
MeshStats::MeshStats() {
@@ -106,6 +224,74 @@ string ImageStats::full_report(int indent_level)
/* Overall statistics. */
RenderStats::RenderStats() {
+ has_profiling = false;
+}
+
+void RenderStats::collect_profiling(Scene *scene, Stats *stats)
+{
+ has_profiling = true;
+
+ Profiler &prof = stats->profiler;
+
+ kernel = NamedNestedSampleStats("Total render time", prof.get_event(PROFILING_UNKNOWN));
+
+ kernel.add_entry("Ray setup", prof.get_event(PROFILING_RAY_SETUP));
+ kernel.add_entry("Result writing", prof.get_event(PROFILING_WRITE_RESULT));
+
+ NamedNestedSampleStats &integrator = kernel.add_entry("Path integration", prof.get_event(PROFILING_PATH_INTEGRATE));
+ integrator.add_entry("Scene intersection", prof.get_event(PROFILING_SCENE_INTERSECT));
+ integrator.add_entry("Indirect emission", prof.get_event(PROFILING_INDIRECT_EMISSION));
+ integrator.add_entry("Volumes", prof.get_event(PROFILING_VOLUME));
+
+ NamedNestedSampleStats &shading = integrator.add_entry("Shading", 0);
+ shading.add_entry("Shader Setup", prof.get_event(PROFILING_SHADER_SETUP));
+ shading.add_entry("Shader Eval", prof.get_event(PROFILING_SHADER_EVAL));
+ shading.add_entry("Shader Apply", prof.get_event(PROFILING_SHADER_APPLY));
+ shading.add_entry("Ambient Occlusion", prof.get_event(PROFILING_AO));
+ shading.add_entry("Subsurface", prof.get_event(PROFILING_SUBSURFACE));
+
+ integrator.add_entry("Connect Light", prof.get_event(PROFILING_CONNECT_LIGHT));
+ integrator.add_entry("Surface Bounce", prof.get_event(PROFILING_SURFACE_BOUNCE));
+
+ NamedNestedSampleStats &intersection = kernel.add_entry("Intersection", 0);
+ intersection.add_entry("Full Intersection", prof.get_event(PROFILING_INTERSECT));
+ intersection.add_entry("Local Intersection", prof.get_event(PROFILING_INTERSECT_LOCAL));
+ intersection.add_entry("Shadow All Intersection", prof.get_event(PROFILING_INTERSECT_SHADOW_ALL));
+ intersection.add_entry("Volume Intersection", prof.get_event(PROFILING_INTERSECT_VOLUME));
+ intersection.add_entry("Volume All Intersection", prof.get_event(PROFILING_INTERSECT_VOLUME_ALL));
+
+ NamedNestedSampleStats &closure = kernel.add_entry("Closures", 0);
+ closure.add_entry("Surface Closure Evaluation", prof.get_event(PROFILING_CLOSURE_EVAL));
+ closure.add_entry("Surface Closure Sampling", prof.get_event(PROFILING_CLOSURE_SAMPLE));
+ closure.add_entry("Volume Closure Evaluation", prof.get_event(PROFILING_CLOSURE_VOLUME_EVAL));
+ closure.add_entry("Volume Closure Sampling", prof.get_event(PROFILING_CLOSURE_VOLUME_SAMPLE));
+
+ NamedNestedSampleStats &denoising = kernel.add_entry("Denoising", prof.get_event(PROFILING_DENOISING));
+ denoising.add_entry("Construct Transform", prof.get_event(PROFILING_DENOISING_CONSTRUCT_TRANSFORM));
+ denoising.add_entry("Reconstruct", prof.get_event(PROFILING_DENOISING_RECONSTRUCT));
+
+ NamedNestedSampleStats &prefilter = denoising.add_entry("Prefiltering", 0);
+ prefilter.add_entry("Divide Shadow", prof.get_event(PROFILING_DENOISING_DIVIDE_SHADOW));
+ prefilter.add_entry("Non-Local means", prof.get_event(PROFILING_DENOISING_NON_LOCAL_MEANS));
+ prefilter.add_entry("Get Feature", prof.get_event(PROFILING_DENOISING_GET_FEATURE));
+ prefilter.add_entry("Detect Outliers", prof.get_event(PROFILING_DENOISING_DETECT_OUTLIERS));
+ prefilter.add_entry("Combine Halves", prof.get_event(PROFILING_DENOISING_COMBINE_HALVES));
+
+ shaders.entries.clear();
+ foreach(Shader *shader, scene->shaders) {
+ uint64_t samples, hits;
+ if(prof.get_shader(shader->id, samples, hits)) {
+ shaders.add(shader->name, samples, hits);
+ }
+ }
+
+ objects.entries.clear();
+ foreach(Object *object, scene->objects) {
+ uint64_t samples, hits;
+ if(prof.get_object(object->get_device_index(), samples, hits)) {
+ objects.add(object->name, samples, hits);
+ }
+ }
}
string RenderStats::full_report()
@@ -113,6 +299,14 @@ string RenderStats::full_report()
string result = "";
result += "Mesh statistics:\n" + mesh.full_report(1);
result += "Image statistics:\n" + image.full_report(1);
+ if(has_profiling) {
+ result += "Kernel statistics:\n" + kernel.full_report(1);
+ result += "Shader statistics:\n" + shaders.full_report(1);
+ result += "Object statistics:\n" + objects.full_report(1);
+ }
+ else {
+ result += "Profiling information not available (only works with CPU rendering)";
+ }
return result;
}
diff --git a/intern/cycles/render/stats.h b/intern/cycles/render/stats.h
index 2ff0ec3e0e9..ce0529bb8bd 100644
--- a/intern/cycles/render/stats.h
+++ b/intern/cycles/render/stats.h
@@ -17,6 +17,9 @@
#ifndef __RENDER_STATS_H__
#define __RENDER_STATS_H__
+#include "render/scene.h"
+
+#include "util/util_stats.h"
#include "util/util_string.h"
#include "util/util_vector.h"
@@ -61,6 +64,51 @@ public:
vector<NamedSizeEntry> entries;
};
+class NamedNestedSampleStats {
+public:
+ NamedNestedSampleStats();
+ NamedNestedSampleStats(const string& name, uint64_t samples);
+
+ NamedNestedSampleStats& add_entry(const string& name, uint64_t samples);
+
+ /* Updates sum_samples recursively. */
+ void update_sum();
+
+ string full_report(int indent_level = 0, uint64_t total_samples = 0);
+
+ string name;
+
+ /* self_samples contains only the samples that this specific event got,
+ * while sum_samples also includes the samples of all sub-entries. */
+ uint64_t self_samples, sum_samples;
+
+ vector<NamedNestedSampleStats> entries;
+};
+
+/* Named entry containing both a time-sample count for objects of a type and a
+ * total count of processed items.
+ * This allows to estimate the time spent per item. */
+class NamedSampleCountPair {
+public:
+ NamedSampleCountPair(const ustring& name, uint64_t samples, uint64_t hits);
+
+ ustring name;
+ uint64_t samples;
+ uint64_t hits;
+};
+
+/* Contains statistics about pairs of samples and counts as described above. */
+class NamedSampleCountStats {
+public:
+ NamedSampleCountStats();
+
+ string full_report(int indent_level = 0);
+ void add(const ustring& name, uint64_t samples, uint64_t hits);
+
+ typedef unordered_map<ustring, NamedSampleCountPair, ustringHash> entry_map;
+ entry_map entries;
+};
+
/* Statistics about mesh in the render database. */
class MeshStats {
public:
@@ -95,8 +143,16 @@ public:
/* Return full report as string. */
string full_report();
+ /* Collect kernel sampling information from Stats. */
+ void collect_profiling(Scene *scene, Stats *stats);
+
+ bool has_profiling;
+
MeshStats mesh;
ImageStats image;
+ NamedNestedSampleStats kernel;
+ NamedSampleCountStats shaders;
+ NamedSampleCountStats objects;
};
CCL_NAMESPACE_END