Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device_cpu.cpp45
-rw-r--r--intern/cycles/kernel/kernel_globals.h11
-rw-r--r--intern/cycles/kernel/kernel_shadow.h18
-rw-r--r--intern/cycles/kernel/kernel_volume.h30
4 files changed, 83 insertions, 21 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 676b1279a80..275ee028eb4 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -213,12 +213,7 @@ public:
return;
}
- KernelGlobals kg = kernel_globals;
-
-#ifdef WITH_OSL
- OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
-#endif
-
+ KernelGlobals kg = thread_kernel_globals_init();
RenderTile tile;
void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
@@ -289,9 +284,7 @@ public:
}
}
-#ifdef WITH_OSL
- OSLShader::thread_free(&kg);
-#endif
+ thread_kernel_globals_free(&kg);
}
void thread_film_convert(DeviceTask& task)
@@ -481,6 +474,40 @@ public:
{
task_pool.cancel();
}
+
+protected:
+ inline KernelGlobals thread_kernel_globals_init()
+ {
+ KernelGlobals kg = kernel_globals;
+ kg.transparent_shadow_intersections = NULL;
+ const int decoupled_count = sizeof(kg.decoupled_volume_steps) /
+ sizeof(*kg.decoupled_volume_steps);
+ for(int i = 0; i < decoupled_count; ++i) {
+ kg.decoupled_volume_steps[i] = NULL;
+ }
+ kg.decoupled_volume_steps_index = 0;
+#ifdef WITH_OSL
+ OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
+#endif
+ return kg;
+ }
+
+ inline void thread_kernel_globals_free(KernelGlobals *kg)
+ {
+ if(kg->transparent_shadow_intersections != NULL) {
+ free(kg->transparent_shadow_intersections);
+ }
+ const int decoupled_count = sizeof(kg->decoupled_volume_steps) /
+ sizeof(*kg->decoupled_volume_steps);
+ for(int i = 0; i < decoupled_count; ++i) {
+ if(kg->decoupled_volume_steps[i] != NULL) {
+ free(kg->decoupled_volume_steps[i]);
+ }
+ }
+#ifdef WITH_OSL
+ OSLShader::thread_free(kg);
+#endif
+ }
};
Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background)
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index c44ea1b051f..7e6cdf93fb9 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -31,6 +31,9 @@ struct OSLThreadData;
struct OSLShadingSystem;
# endif
+struct Intersection;
+struct VolumeStep;
+
typedef struct KernelGlobals {
texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_IMAGES_CPU];
texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_IMAGES_CPU];
@@ -51,6 +54,14 @@ typedef struct KernelGlobals {
OSLThreadData *osl_tdata;
# endif
+ /* **** Run-time data **** */
+
+ /* Heap-allocated storage for transparent shadows intersections. */
+ Intersection *transparent_shadow_intersections;
+
+ /* Storage for decoupled volume steps. */
+ VolumeStep *decoupled_volume_steps[2];
+ int decoupled_volume_steps_index;
} KernelGlobals;
#endif /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 3b1111e5069..504ac2e40bc 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -59,14 +59,20 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
/* intersect to find an opaque surface, or record all transparent surface hits */
Intersection hits_stack[STACK_MAX_HITS];
Intersection *hits = hits_stack;
- uint max_hits = kernel_data.integrator.transparent_max_bounce - state->transparent_bounce - 1;
+ const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+ uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
/* prefer to use stack but use dynamic allocation if too deep max hits
* we need max_hits + 1 storage space due to the logic in
* scene_intersect_shadow_all which will first store and then check if
* the limit is exceeded */
- if(max_hits + 1 > STACK_MAX_HITS)
- hits = (Intersection*)malloc(sizeof(Intersection)*(max_hits + 1));
+ if(max_hits + 1 > STACK_MAX_HITS) {
+ if(kg->transparent_shadow_intersections == NULL) {
+ kg->transparent_shadow_intersections =
+ (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
+ }
+ hits = kg->transparent_shadow_intersections;
+ }
uint num_hits;
blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits);
@@ -147,14 +153,8 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
*shadow = throughput;
- if(hits != hits_stack)
- free(hits);
return is_zero(throughput);
}
-
- /* free dynamic storage */
- if(hits != hits_stack)
- free(hits);
}
else {
Intersection isect;
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index c499773b980..224c275b03d 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -627,12 +627,30 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
step_size = kernel_data.integrator.volume_step_size;
/* compute exact steps in advance for malloc */
max_steps = max((int)ceilf(ray->t/step_size), 1);
+ /* NOTE: For the branched path tracing it's possible to have direct
+ * and indirect light integration both having volume segments allocated.
+ * We detect this using index in the pre-allocated memory. Currently we
+ * only support two segments allocated at a time, if more needed some
+ * modifications to the KernelGlobals will be needed.
+ *
+ * This gives us restrictions that decoupled record should only happen
+ * in the stack manner, meaning if there's subsequent call of decoupled
+ * record it'll need to free memory before it's caller frees memory.
+ */
+ const int index = kg->decoupled_volume_steps_index;
+ assert(index < sizeof(kg->decoupled_volume_steps) /
+ sizeof(*kg->decoupled_volume_steps));
if(max_steps > global_max_steps) {
max_steps = global_max_steps;
step_size = ray->t / (float)max_steps;
}
- segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
+ if(kg->decoupled_volume_steps[index] == NULL) {
+ kg->decoupled_volume_steps[index] =
+ (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
+ }
+ segment->steps = kg->decoupled_volume_steps[index];
random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
+ ++kg->decoupled_volume_steps_index;
}
else {
max_steps = 1;
@@ -745,8 +763,14 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
{
- if(segment->steps != &segment->stack_step)
- free(segment->steps);
+ if(segment->steps != &segment->stack_step) {
+ /* NOTE: We only allow free last allocated segment.
+ * No random order of alloc/free is supported.
+ */
+ assert(kg->decoupled_volume_steps_index > 0);
+ assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
+ --kg->decoupled_volume_steps_index;
+ }
}
/* scattering for homogeneous and heterogeneous volumes, using decoupled ray