Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2014-11-10 11:54:55 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2014-11-10 20:47:28 +0300
commit157067acbde7e65bb5bef2023effca8a5fc657f3 (patch)
tree1cfe2f9d61bf27925fd5197ee6b593186b41885c
parent3ead24d2351fb6a39659ff4bd6b978876ded8d5b (diff)
Cycles: Speedup for homogenous volumes in decoupled volume sampling
The idea is to avoid memory allocation when only one segment step is to be allocated. This gives some speedup which is difficult to measure on this trashcan from hell, but it's about from 7% to 10% in the extreme case with single volume filling the whole of the viewport. This seems to depends on the phase of the bug-o-meter in the studio. On the linux boxes it's not that spectacular speedup, it's about 2% on my laptop and about 3% on the studio desktop. This is likely because of the awesomeness of jemalloc.
-rw-r--r--intern/cycles/kernel/kernel_volume.h10
1 files changed, 6 insertions, 4 deletions
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index cba95abacf6..99caff04eac 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -595,6 +595,7 @@ typedef struct VolumeStep {
} VolumeStep;
typedef struct VolumeSegment {
+ VolumeStep stack_step; /* stack storage for homogenous step, to avoid malloc */
VolumeStep *steps; /* recorded steps */
int numsteps; /* number of steps */
int closure_flag; /* accumulated closure flags from all steps */
@@ -627,11 +628,13 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
/* compute exact steps in advance for malloc */
max_steps = max((int)ceilf(ray->t/step_size), 1);
+ segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
}
else {
max_steps = 1;
step_size = ray->t;
random_jitter_offset = 0.0f;
+ segment->steps = &segment->stack_step;
}
/* init accumulation variables */
@@ -640,10 +643,8 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
float t = 0.0f;
- segment->closure_flag = 0;
segment->numsteps = 0;
-
- segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
+ segment->closure_flag = 0;
VolumeStep *step = segment->steps;
@@ -729,7 +730,8 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
{
- free(segment->steps);
+ if(segment->steps != &segment->stack_step)
+ free(segment->steps);
}
/* scattering for homogeneous and heterogeneous volumes, using decoupled ray