diff options
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/device.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device.h | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 24 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.h | 4 |
5 files changed, 32 insertions, 1 deletions
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 7e20bb449c3..54ffd4bc4df 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -362,6 +362,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th info.has_half_images = true; info.has_volume_decoupled = true; info.has_osl = true; + info.has_profiling = true; foreach(const DeviceInfo &device, subdevices) { /* Ensure CPU device does not slow down GPU. */ @@ -396,6 +397,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th info.has_half_images &= device.has_half_images; info.has_volume_decoupled &= device.has_volume_decoupled; info.has_osl &= device.has_osl; + info.has_profiling &= device.has_profiling; } return info; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index f3fb338e638..071f61a7566 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -60,6 +60,7 @@ public: bool has_volume_decoupled; /* Decoupled volume shading. */ bool has_osl; /* Support Open Shading Language. */ bool use_split_kernel; /* Use split or mega kernel. */ + bool has_profiling; /* Supports runtime collection of profiling info. */ int cpu_threads; vector<DeviceInfo> multi_devices; @@ -75,6 +76,7 @@ public: has_volume_decoupled = false; has_osl = false; use_split_kernel = false; + has_profiling = false; } bool operator==(const DeviceInfo &info) { diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 36fe9bfc92b..f0a6fd6e3f4 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -477,6 +477,8 @@ public: bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr, DenoisingTask *task) { + ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_NON_LOCAL_MEANS); + int4 rect = task->rect; int r = task->nlm_state.r; int f = task->nlm_state.f; @@ -529,6 +531,8 @@ public: bool denoising_construct_transform(DenoisingTask *task) { + ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_CONSTRUCT_TRANSFORM); + for(int y = 0; y < task->filter_area.w; y++) { for(int x = 0; x < task->filter_area.z; x++) { filter_construct_transform_kernel()((float*) task->buffer.mem.device_pointer, @@ -551,6 +555,8 @@ public: device_ptr output_ptr, DenoisingTask *task) { + ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_RECONSTRUCT); + mem_zero(task->storage.XtWX); mem_zero(task->storage.XtWY); @@ -609,8 +615,10 @@ public: bool denoising_combine_halves(device_ptr a_ptr, device_ptr b_ptr, device_ptr mean_ptr, device_ptr variance_ptr, - int r, int4 rect, DenoisingTask * /*task*/) + int r, int4 rect, DenoisingTask *task) { + ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_COMBINE_HALVES); + for(int y = rect.y; y < rect.w; y++) { for(int x = rect.x; x < rect.z; x++) { filter_combine_halves_kernel()(x, y, @@ -629,6 +637,8 @@ public: device_ptr sample_variance_ptr, device_ptr sv_variance_ptr, device_ptr buffer_variance_ptr, DenoisingTask *task) { + ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DIVIDE_SHADOW); + for(int y = task->rect.y; y < task->rect.w; y++) { for(int x = task->rect.x; x < task->rect.z; x++) { filter_divide_shadow_kernel()(task->render_buffer.samples, @@ -653,6 +663,8 @@ public: device_ptr variance_ptr, DenoisingTask *task) { + ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_GET_FEATURE); + for(int y = task->rect.y; y < task->rect.w; y++) { for(int x = task->rect.x; x < task->rect.z; x++) { filter_get_feature_kernel()(task->render_buffer.samples, @@ -676,6 +688,8 @@ public: device_ptr output_ptr, DenoisingTask *task) { + ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DETECT_OUTLIERS); + for(int y = task->rect.y; y < task->rect.w; y++) { for(int x = task->rect.x; x < task->rect.z; x++) { filter_detect_outliers_kernel()(x, y, @@ -735,6 +749,8 @@ public: void denoise(DenoisingTask& denoising, RenderTile &tile) { + ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING); + tile.sample = tile.start_sample + tile.num_samples; denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising); @@ -765,6 +781,8 @@ public: KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init()); + stats.profiler.add_state(&kg->profiler); + CPUSplitKernel *split_kernel = NULL; if(use_split_kernel) { split_kernel = new CPUSplitKernel(this); @@ -778,6 +796,7 @@ public: RenderTile tile; DenoisingTask denoising(this, task); + denoising.profiler = &kg->profiler; while(task.acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { @@ -802,6 +821,8 @@ public: } } + stats.profiler.remove_state(&kg->profiler); + thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer); kg->~KernelGlobals(); kgbuffer.free(); @@ -1061,6 +1082,7 @@ void device_cpu_info(vector<DeviceInfo>& devices) info.has_volume_decoupled = true; info.has_osl = true; info.has_half_images = true; + info.has_profiling = true; devices.insert(devices.begin(), info); } diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 78c65a3d22d..433cbd3c265 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -22,6 +22,7 @@ CCL_NAMESPACE_BEGIN DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task) : tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE), + profiler(NULL), storage(device), buffer(device), device(device) diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index 8e0666d0e59..beae60c220f 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -23,6 +23,8 @@ #include "kernel/filter/filter_defines.h" +#include "util/util_profiling.h" + CCL_NAMESPACE_BEGIN class DenoisingTask { @@ -51,6 +53,8 @@ public: TileInfo *tile_info; device_vector<int> tile_info_mem; + ProfilingState *profiler; + int4 rect; int4 filter_area; |