Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-05-17 13:30:46 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-05-18 11:14:24 +0300
commit7b356a856540a1affa5dc85360183418e6337a5a (patch)
tree9acee7019c696f694c97d504e1a2fe678a7f0cd1 /intern/cycles/kernel/kernel_volume.h
parent2433a537fa12dad6cc8a1c323b1b73e5cad6cd4d (diff)
Cycles: Reduce amount of malloc() calls from the kernel
This commit makes it so malloc() is only happening once per volume and once per transparent shadow query (per thread), improving scalability of the code to multiple CPU cores. Hard to measure this with a low-bottom i7 here currently, but from quick tests seems volume sampling gave about 3-5% speedup. The idea is to store allocated memory in kernel globals, which are per thread on CPU already. Reviewers: dingto, juicyfruit, lukasstockner97, maiself, brecht Reviewed By: brecht Subscribers: Blendify, nutel Differential Revision: https://developer.blender.org/D1996
Diffstat (limited to 'intern/cycles/kernel/kernel_volume.h')
-rw-r--r--intern/cycles/kernel/kernel_volume.h30
1 files changed, 27 insertions, 3 deletions
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index c499773b980..224c275b03d 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -627,12 +627,30 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
step_size = kernel_data.integrator.volume_step_size;
/* compute exact steps in advance for malloc */
max_steps = max((int)ceilf(ray->t/step_size), 1);
+ /* NOTE: For the branched path tracing it's possible to have direct
+ * and indirect light integration both having volume segments allocated.
+ * We detect this using index in the pre-allocated memory. Currently we
+ * only support two segments allocated at a time, if more needed some
+ * modifications to the KernelGlobals will be needed.
+ *
+ * This gives us restrictions that decoupled record should only happen
+ * in the stack manner, meaning if there's subsequent call of decoupled
+ * record it'll need to free memory before it's caller frees memory.
+ */
+ const int index = kg->decoupled_volume_steps_index;
+ assert(index < sizeof(kg->decoupled_volume_steps) /
+ sizeof(*kg->decoupled_volume_steps));
if(max_steps > global_max_steps) {
max_steps = global_max_steps;
step_size = ray->t / (float)max_steps;
}
- segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
+ if(kg->decoupled_volume_steps[index] == NULL) {
+ kg->decoupled_volume_steps[index] =
+ (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
+ }
+ segment->steps = kg->decoupled_volume_steps[index];
random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
+ ++kg->decoupled_volume_steps_index;
}
else {
max_steps = 1;
@@ -745,8 +763,14 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
{
- if(segment->steps != &segment->stack_step)
- free(segment->steps);
+ if(segment->steps != &segment->stack_step) {
+ /* NOTE: We only allow free last allocated segment.
+ * No random order of alloc/free is supported.
+ */
+ assert(kg->decoupled_volume_steps_index > 0);
+ assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
+ --kg->decoupled_volume_steps_index;
+ }
}
/* scattering for homogeneous and heterogeneous volumes, using decoupled ray