Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/split')
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h11
-rw-r--r--intern/cycles/kernel/split/kernel_buffer_update.h80
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h73
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h12
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h17
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h159
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h33
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_subsurface.h1
-rw-r--r--intern/cycles/kernel/split/kernel_lamp_emission.h23
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h5
-rw-r--r--intern/cycles/kernel/split/kernel_path_init.h60
-rw-r--r--intern/cycles/kernel/split/kernel_queue_enqueue.h3
-rw-r--r--intern/cycles/kernel/split/kernel_scene_intersect.h46
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h28
-rw-r--r--intern/cycles/kernel/split/kernel_shader_sort.h10
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h7
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h13
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h2
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h34
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h35
20 files changed, 145 insertions, 507 deletions
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index e2762a85fc8..2313feac089 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -87,7 +87,6 @@ ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals
PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
- inactive_L->direct_throughput = L->direct_throughput;
path_radiance_copy_indirect(inactive_L, L);
ray_state[inactive_ray] = RAY_REGENERATED;
@@ -110,7 +109,6 @@ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(
SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
ShaderData *sd = saved_sd;
- RNG rng = kernel_split_state.rng[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
float3 throughput = branched_state->throughput;
ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
@@ -157,37 +155,38 @@ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(
num_samples = ceil_to_int(num_samples_adjust*num_samples);
float num_samples_inv = num_samples_adjust/num_samples;
- RNG bsdf_rng = cmj_hash(rng, i);
for(int j = branched_state->next_sample; j < num_samples; j++) {
if(reset_path_state) {
*ps = branched_state->path_state;
}
+ ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
*tp = throughput;
ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
if(!kernel_branched_path_surface_bounce(kg,
- &bsdf_rng,
sd,
sc,
j,
num_samples,
tp,
ps,
- L,
+ &L->state,
bsdf_ray,
sum_sample_weight))
{
continue;
}
+ ps->rng_hash = branched_state->path_state.rng_hash;
+
/* update state for next iteration */
branched_state->next_closure = i;
branched_state->next_sample = j+1;
- branched_state->num_samples = num_samples;
/* start the indirect path */
*tp *= num_samples_inv;
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index 4c1fdd2d69c..511334e0550 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -75,107 +75,65 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
if(ray_index != QUEUE_EMPTY_SLOT) {
#endif
- ccl_global uint *rng_state = kernel_split_params.rng_state;
- int stride = kernel_split_params.stride;
-
ccl_global char *ray_state = kernel_split_state.ray_state;
-#ifdef __KERNEL_DEBUG__
- DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
-#endif
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
- ccl_global float *buffer = kernel_split_params.buffer;
-
- unsigned int work_index;
- ccl_global uint *initial_rng;
-
- unsigned int sample;
- unsigned int tile_x;
- unsigned int tile_y;
- unsigned int pixel_x;
- unsigned int pixel_y;
-
- work_index = kernel_split_state.work_array[ray_index];
- sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
- get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
- &tile_x, &tile_y,
- work_index,
- ray_index);
- initial_rng = rng_state;
-
- rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
- buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
-#ifdef __KERNEL_DEBUG__
- kernel_write_debug_passes(kg, buffer, state, debug_data, sample);
-#endif
+ uint sample = state->sample;
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
/* accumulate result in output buffer */
- bool is_shadow_catcher = (state->flag & PATH_RAY_SHADOW_CATCHER);
- kernel_write_result(kg, buffer, sample, L, 1.0f - (*L_transparent), is_shadow_catcher);
-
- path_rng_end(kg, rng_state, rng);
+ kernel_write_result(kg, buffer, sample, L);
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
}
if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
/* We have completed current work; So get next work */
- int valid_work = get_next_work(kg, &work_index, ray_index);
- if(!valid_work) {
+ ccl_global uint *work_pools = kernel_split_params.work_pools;
+ uint total_work_size = kernel_split_params.total_work_size;
+ uint work_index;
+
+ if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
/* If work is invalid, this means no more work is available and the thread may exit */
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
}
if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
- kernel_split_state.work_array[ray_index] = work_index;
- /* Get the sample associated with the current work */
- sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
- /* Get pixel and tile position associated with current work */
- get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, work_index, ray_index);
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
- /* Remap rng_state according to the current work */
- rng_state = initial_rng + kernel_split_params.offset + pixel_x + pixel_y*stride;
- /* Remap buffer according to the current work */
- buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
+ /* Store buffer offset for writing to passes. */
+ uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
+ kernel_split_state.buffer_offset[ray_index] = buffer_offset;
/* Initialize random numbers and ray. */
- kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, &rng, ray);
+ uint rng_hash;
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
if(ray->t != 0.0f) {
- /* Initialize throughput, L_transparent, Ray, PathState;
+ /* Initialize throughput, path radiance, Ray, PathState;
* These rays proceed with path-iteration.
*/
*throughput = make_float3(1.0f, 1.0f, 1.0f);
- *L_transparent = 0.0f;
path_radiance_init(L, kernel_data.film.use_light_pass);
- path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, &rng, sample, ray);
+ path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng_hash, sample, ray);
#ifdef __SUBSURFACE__
kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
#endif
-#ifdef __KERNEL_DEBUG__
- debug_data_init(debug_data);
-#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
enqueue_flag = 1;
}
else {
- /* These rays do not participate in path-iteration. */
- float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- /* Accumulate result in output buffer. */
- kernel_write_pass_float4(buffer, sample, L_rad);
- path_rng_end(kg, rng_state, rng);
-
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
}
}
}
- kernel_split_state.rng[ray_index] = rng;
#ifndef __COMPUTE_DEVICE_GPU__
}
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index e4545d66eff..77fb61b80a8 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -23,22 +23,6 @@ CCL_NAMESPACE_BEGIN
* The number of elements in the queues is initialized to 0;
*/
-/* Distributes an amount of work across all threads
- * note: work done inside the loop may not show up to all threads till after
- * the current kernel has completed
- */
-#define parallel_for(kg, iter_name, work_size) \
- for(size_t _size = (work_size), \
- _global_size = ccl_global_size(0) * ccl_global_size(1), \
- _n = _size / _global_size, \
- _thread = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0), \
- iter_name = (_n > 0) ? (_thread * _n) : (_thread) \
- ; \
- (iter_name < (_thread+1) * _n) || (iter_name == _n * _global_size + _thread && _thread < _size % _global_size) \
- ; \
- iter_name = (iter_name != (_thread+1) * _n - 1) ? (iter_name + 1) : (_n * _global_size + _thread) \
- )
-
#ifndef __KERNEL_CPU__
ccl_device void kernel_data_init(
#else
@@ -49,12 +33,9 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
ccl_global void *split_data_buffer,
int num_elements,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
#ifdef __KERNEL_OPENCL__
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name,
-#include "kernel/kernel_textures.h"
+ KERNEL_BUFFER_PARAMS,
#endif
int start_sample,
@@ -75,34 +56,32 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
kg->data = data;
#endif
- kernel_split_params.x = sx;
- kernel_split_params.y = sy;
- kernel_split_params.w = sw;
- kernel_split_params.h = sh;
+ kernel_split_params.tile.x = sx;
+ kernel_split_params.tile.y = sy;
+ kernel_split_params.tile.w = sw;
+ kernel_split_params.tile.h = sh;
- kernel_split_params.offset = offset;
- kernel_split_params.stride = stride;
+ kernel_split_params.tile.start_sample = start_sample;
+ kernel_split_params.tile.num_samples = num_samples;
- kernel_split_params.rng_state = rng_state;
+ kernel_split_params.tile.offset = offset;
+ kernel_split_params.tile.stride = stride;
- kernel_split_params.start_sample = start_sample;
- kernel_split_params.end_sample = end_sample;
+ kernel_split_params.tile.buffer = buffer;
+
+ kernel_split_params.total_work_size = sw * sh * num_samples;
kernel_split_params.work_pools = work_pools;
- kernel_split_params.num_samples = num_samples;
kernel_split_params.queue_index = Queue_index;
kernel_split_params.queue_size = queuesize;
kernel_split_params.use_queues_flag = use_queues_flag;
- kernel_split_params.buffer = buffer;
-
split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
#ifdef __KERNEL_OPENCL__
-#define KERNEL_TEX(type, ttype, name) \
- kg->name = name;
-#include "kernel/kernel_textures.h"
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
#endif
int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
@@ -124,30 +103,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
*/
*use_queues_flag = 0;
}
-
- /* zero the tiles pixels and initialize rng_state if this is the first sample */
- if(start_sample == 0) {
- parallel_for(kg, i, sw * sh * kernel_data.film.pass_stride) {
- int pixel = i / kernel_data.film.pass_stride;
- int pass = i % kernel_data.film.pass_stride;
-
- int x = sx + pixel % sw;
- int y = sy + pixel / sw;
-
- int index = (offset + x + y*stride) * kernel_data.film.pass_stride + pass;
-
- *(buffer + index) = 0.0f;
- }
-
- parallel_for(kg, i, sw * sh) {
- int x = sx + i % sw;
- int y = sy + i / sw;
-
- int index = (offset + x + y*stride);
- *(rng_state + index) = hash_int_2d(x, y);
- }
- }
-
#endif /* KERENL_STUB */
}
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index 3336c968a44..2aac66ecb84 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -62,8 +62,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
/* direct lighting */
#ifdef __EMISSION__
- RNG rng = kernel_split_state.rng[ray_index];
-
bool flag = (kernel_data.integrator.use_direct_light &&
(sd->flag & SD_BSDF_HAS_EVAL));
@@ -83,23 +81,20 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
if(flag) {
/* Sample illumination from lights to find path contribution. */
- float light_t = path_state_rng_1D(kg, &rng, state, PRNG_LIGHT);
float light_u, light_v;
- path_state_rng_2D(kg, &rng, state, PRNG_LIGHT_U, &light_u, &light_v);
- float terminate = path_state_rng_light_termination(kg, &rng, state);
+ path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_state_rng_light_termination(kg, state);
LightSample ls;
if(light_sample(kg,
- light_t, light_u, light_v,
+ light_u, light_v,
sd->time,
sd->P,
state->bounce,
&ls)) {
Ray light_ray;
-# ifdef __OBJECT_MOTION__
light_ray.time = sd->time;
-# endif
BsdfEval L_light;
bool is_lamp;
@@ -115,7 +110,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
}
}
}
- kernel_split_state.rng[ray_index] = rng;
#endif /* __EMISSION__ */
}
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 9f8dd2392d9..491487f1230 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -30,7 +30,6 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
@@ -58,22 +57,21 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, ps, sd, &volume_ray, L, tp, &rng, heterogeneous);
+ kg, ps, sd, &volume_ray, L, tp, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
- kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *tp, &branched_state->path_state, L);
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
/* indirect light bounce */
- if(!kernel_path_volume_bounce(kg, &rng, sd, tp, ps, L, pray)) {
+ if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
continue;
}
/* start the indirect path */
branched_state->next_closure = 0;
branched_state->next_sample = j+1;
- branched_state->num_samples = num_samples;
/* Attempting to share too many samples is slow for volumes as it causes us to
* loop here more and have many calls to kernel_volume_integrate which evaluates
@@ -141,7 +139,6 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
@@ -165,15 +162,15 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
+ kg, state, sd, &volume_ray, L, throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
- kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *throughput, state, L);
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
/* indirect light bounce */
- if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray)) {
+ if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
else {
@@ -194,8 +191,6 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
}
# endif /* __BRANCHED_PATH__ */
}
-
- kernel_split_state.rng[ray_index] = rng;
}
# ifdef __BRANCHED_PATH__
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 670a557f084..906bad8ceb6 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -90,163 +90,58 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
if(ray_index != QUEUE_EMPTY_SLOT) {
#endif
- int stride = kernel_split_params.stride;
-
- unsigned int work_index;
- unsigned int pixel_x;
- unsigned int pixel_y;
-
- unsigned int tile_x;
- unsigned int tile_y;
- unsigned int sample;
-
- RNG rng = kernel_split_state.rng[ray_index];
ccl_global PathState *state = 0x0;
float3 throughput;
ccl_global char *ray_state = kernel_split_state.ray_state;
ShaderData *sd = &kernel_split_state.sd[ray_index];
- ccl_global float *buffer = kernel_split_params.buffer;
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
throughput = kernel_split_state.throughput[ray_index];
state = &kernel_split_state.path_state[ray_index];
- work_index = kernel_split_state.work_array[ray_index];
- sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
- get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
- &tile_x, &tile_y,
- work_index,
- ray_index);
-
- buffer += (kernel_split_params.offset + pixel_x + pixel_y * stride) * kernel_data.film.pass_stride;
-
-#ifdef __SHADOW_TRICKS__
- if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
- if(state->flag & PATH_RAY_CAMERA) {
- state->flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_SHADOW_CATCHER_ONLY | PATH_RAY_STORE_SHADOW_INFO);
- state->catcher_object = sd->object;
- if(!kernel_data.background.transparent) {
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- L->shadow_color = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
- }
- }
- }
- else {
- state->flag &= ~PATH_RAY_SHADOW_CATCHER_ONLY;
- }
-#endif /* __SHADOW_TRICKS__ */
-
- /* holdout */
-#ifdef __HOLDOUT__
- if(((sd->flag & SD_HOLDOUT) ||
- (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
- (state->flag & PATH_RAY_CAMERA))
+ if(!kernel_path_shader_apply(kg,
+ sd,
+ state,
+ ray,
+ throughput,
+ emission_sd,
+ L,
+ buffer))
{
- if(kernel_data.background.transparent) {
- float3 holdout_weight;
- if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
- holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
- }
- else {
- holdout_weight = shader_holdout_eval(kg, sd);
- }
- /* any throughput is ok, should all be identical here */
- kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput);
- }
- if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
- kernel_split_path_end(kg, ray_index);
- }
+ kernel_split_path_end(kg, ray_index);
}
-#endif /* __HOLDOUT__ */
}
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-
-#ifdef __BRANCHED_PATH__
- if(!IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
-#endif /* __BRANCHED_PATH__ */
- {
- /* Holdout mask objects do not write data passes. */
- kernel_write_data_passes(kg,
- buffer,
- L,
- sd,
- sample,
- state,
- throughput);
- }
-
- /* Blurring of bsdf after bounces, for rays that have a small likelihood
- * of following this particular path (diffuse, rough glossy.
- */
-#ifndef __BRANCHED_PATH__
- if(kernel_data.integrator.filter_glossy != FLT_MAX)
-#else
- if(kernel_data.integrator.filter_glossy != FLT_MAX &&
- (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)))
-#endif /* __BRANCHED_PATH__ */
- {
- float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
- if(blur_pdf < 1.0f) {
- float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
- shader_bsdf_blur(kg, sd, blur_roughness);
- }
- }
-
-#ifdef __EMISSION__
- /* emission */
- if(sd->flag & SD_EMISSION) {
- /* TODO(sergey): is isect.t wrong here for transparent surfaces? */
- float3 emission = indirect_primitive_emission(
- kg,
- sd,
- kernel_split_state.isect[ray_index].t,
- state->flag,
- state->ray_pdf);
- path_radiance_accum_emission(L, throughput, emission, state->bounce);
- }
-#endif /* __EMISSION__ */
-
/* Path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate.
*/
-#ifndef __BRANCHED_PATH__
- float probability = path_state_terminate_probability(kg, state, throughput);
-#else
- float probability = 1.0f;
-
- if(!kernel_data.integrator.branched) {
- probability = path_state_terminate_probability(kg, state, throughput);
- }
- else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
- int num_samples = kernel_split_state.branched_state[ray_index].num_samples;
- probability = path_state_terminate_probability(kg, state, throughput*num_samples);
- }
- else if(state->flag & PATH_RAY_TRANSPARENT) {
- probability = path_state_terminate_probability(kg, state, throughput);
- }
-#endif
+ float probability = path_state_continuation_probability(kg, state, throughput);
if(probability == 0.0f) {
kernel_split_path_end(kg, ray_index);
}
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- if(probability != 1.0f) {
- float terminate = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_TERMINATE);
- if(terminate >= probability) {
- kernel_split_path_end(kg, ray_index);
- }
- else {
- kernel_split_state.throughput[ray_index] = throughput/probability;
- }
+ else if(probability < 1.0f) {
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+ if(terminate >= probability) {
+ kernel_split_path_end(kg, ray_index);
}
+ else {
+ kernel_split_state.throughput[ray_index] = throughput/probability;
+ }
+ }
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
kernel_update_denoising_features(kg, sd, state, L);
}
}
@@ -260,8 +155,6 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
}
#endif /* __AO__ */
- kernel_split_state.rng[ray_index] = rng;
-
#ifndef __COMPUTE_DEVICE_GPU__
}
#endif
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index f0ebb90f60a..437043a5971 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -33,7 +33,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
if(ray_index != QUEUE_EMPTY_SLOT) {
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- if(state->bounce > kernel_data.integrator.ao_bounces) {
+ if(path_state_ao_bounce(kg, state)) {
kernel_split_path_end(kg, ray_index);
}
}
@@ -50,33 +50,16 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
return;
}
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
-
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
- /* eval background shader if nothing hit */
- if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
- *L_transparent = (*L_transparent) + average((*throughput));
-#ifdef __PASSES__
- if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
-#endif
- kernel_split_path_end(kg, ray_index);
- }
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
- if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
-#ifdef __BACKGROUND__
- /* sample background shader */
- float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
- path_radiance_accum_background(L, state, (*throughput), L_background);
-#endif
- kernel_split_path_end(kg, ray_index);
- }
+ kernel_path_background(kg, state, ray, throughput, emission_sd, L);
+ kernel_split_path_end(kg, ray_index);
}
-
-
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
index 82bc2f01fd7..e9fe5552e8c 100644
--- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h
+++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
@@ -54,7 +54,6 @@ ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
#endif
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- kernel_path_subsurface_accum_indirect(ss_indirect, L);
/* Trace indirect subsurface rays by restarting the loop. this uses less
* stack memory than invoking kernel_path_indirect.
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index c669d79ddcd..448456d167d 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -57,27 +57,10 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
float3 throughput = kernel_split_state.throughput[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
+ ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
-#ifdef __LAMP_MIS__
- if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
- /* ray starting from previous non-transparent bounce */
- Ray light_ray;
-
- light_ray.P = ray.P - state->ray_t*ray.D;
- state->ray_t += kernel_split_state.isect[ray_index].t;
- light_ray.D = ray.D;
- light_ray.t = state->ray_t;
- light_ray.time = ray.time;
- light_ray.dD = ray.dD;
- light_ray.dP = ray.dP;
- /* intersect with lamp */
- float3 emission;
-
- if(indirect_lamp_emission(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, &light_ray, &emission)) {
- path_radiance_accum_emission(L, throughput, emission, state->bounce);
- }
- }
-#endif /* __LAMP_MIS__ */
+ kernel_path_lamp_emission(kg, state, &ray, throughput, isect, emission_sd, L);
}
}
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 7758e35fd32..c3373174582 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -126,7 +126,6 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
if(active) {
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
@@ -135,7 +134,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
#endif
/* Compute direct lighting and next bounce. */
- if(!kernel_path_surface_bounce(kg, &rng, sd, throughput, state, L, ray)) {
+ if(!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) {
kernel_split_path_end(kg, ray_index);
}
#ifdef __BRANCHED_PATH__
@@ -157,8 +156,6 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
}
}
#endif /* __BRANCHED_PATH__ */
-
- kernel_split_state.rng[ray_index] = rng;
}
/* Enqueue RAY_UPDATE_BUFFER rays. */
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index a7ecde7c80d..5ad62b585fe 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -29,77 +29,53 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
*/
kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
- unsigned int my_sample;
- unsigned int pixel_x;
- unsigned int pixel_y;
- unsigned int tile_x;
- unsigned int tile_y;
-
- unsigned int work_index = 0;
/* Get work. */
- if(!get_next_work(kg, &work_index, ray_index)) {
+ ccl_global uint *work_pools = kernel_split_params.work_pools;
+ uint total_work_size = kernel_split_params.total_work_size;
+ uint work_index;
+
+ if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
/* No more work, mark ray as inactive */
kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
return;
}
- /* Get the sample associated with the work. */
- my_sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
-
- /* Get pixel and tile position associated with the work. */
- get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
- &tile_x, &tile_y,
- work_index,
- ray_index);
- kernel_split_state.work_array[ray_index] = work_index;
-
- ccl_global uint *rng_state = kernel_split_params.rng_state;
- rng_state += kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride;
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
- ccl_global float *buffer = kernel_split_params.buffer;
- buffer += (kernel_split_params.offset + pixel_x + pixel_y * kernel_split_params.stride) * kernel_data.film.pass_stride;
-
- RNG rng = kernel_split_state.rng[ray_index];
+ /* Store buffer offset for writing to passes. */
+ uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
+ kernel_split_state.buffer_offset[ray_index] = buffer_offset;
/* Initialize random numbers and ray. */
+ uint rng_hash;
kernel_path_trace_setup(kg,
- rng_state,
- my_sample,
- pixel_x, pixel_y,
- &rng,
+ sample,
+ x, y,
+ &rng_hash,
&kernel_split_state.ray[ray_index]);
if(kernel_split_state.ray[ray_index].t != 0.0f) {
- /* Initialize throughput, L_transparent, Ray, PathState;
+ /* Initialize throughput, path radiance, Ray, PathState;
* These rays proceed with path-iteration.
*/
kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
- kernel_split_state.L_transparent[ray_index] = 0.0f;
path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
path_state_init(kg,
&kernel_split_state.sd_DL_shadow[ray_index],
&kernel_split_state.path_state[ray_index],
- &rng,
- my_sample,
+ rng_hash,
+ sample,
&kernel_split_state.ray[ray_index]);
#ifdef __SUBSURFACE__
kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
#endif
-
-#ifdef __KERNEL_DEBUG__
- debug_data_init(&kernel_split_state.debug_data[ray_index]);
-#endif
}
else {
- /* These rays do not participate in path-iteration. */
- float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- /* Accumulate result in output buffer. */
- kernel_write_pass_float4(buffer, my_sample, L_rad);
- path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
}
- kernel_split_state.rng[ray_index] = rng;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index e2e841f36d3..66ce2dfb6f1 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -51,7 +51,8 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
int queue_number = -1;
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
}
else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index 45984ca509b..f5378bc172b 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -59,52 +59,14 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
return;
}
-#ifdef __KERNEL_DEBUG__
- DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
-#endif
- Intersection isect;
- PathState state = kernel_split_state.path_state[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- /* intersect scene */
- uint visibility = path_state_ray_visibility(kg, &state);
-
- if(state.bounce > kernel_data.integrator.ao_bounces) {
- visibility = PATH_RAY_SHADOW;
- ray.t = kernel_data.background.ao_distance;
- }
-
-#ifdef __HAIR__
- float difl = 0.0f, extmax = 0.0f;
- uint lcg_state = 0;
- RNG rng = kernel_split_state.rng[ray_index];
-
- if(kernel_data.bvh.have_curves) {
- if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {
- float3 pixdiff = ray.dD.dx + ray.dD.dy;
- /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
- difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
- }
-
- extmax = kernel_data.curve.maximum_width;
- lcg_state = lcg_state_init(&rng, state.rng_offset, state.sample, 0x51633e2d);
- }
-
- bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
-#else
- bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
-#endif
+ Intersection isect;
+ bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
kernel_split_state.isect[ray_index] = isect;
-#ifdef __KERNEL_DEBUG__
- if(state.flag & PATH_RAY_CAMERA) {
- debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes;
- debug_data->num_bvh_traversed_instances += isect.num_traversed_instances;
- debug_data->num_bvh_intersections += isect.num_intersections;
- }
- debug_data->num_ray_bounces++;
-#endif
-
if(!hit) {
/* Change the state of rays that hit the background;
* These rays undergo special processing in the
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 2801b32f285..7032461b04a 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -48,30 +48,18 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
ccl_global char *ray_state = kernel_split_state.ray_state;
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- RNG rng = kernel_split_state.rng[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-#ifndef __BRANCHED_PATH__
- float rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF);
- shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
-#else
- ShaderContext ctx = SHADER_CONTEXT_MAIN;
- float rbsdf = 0.0f;
-
- if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
- rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF);
-
+ shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag);
+#ifdef __BRANCHED_PATH__
+ if(kernel_data.integrator.branched) {
+ shader_merge_closures(&kernel_split_state.sd[ray_index]);
}
-
- if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
- ctx = SHADER_CONTEXT_INDIRECT;
+ else
+#endif
+ {
+ shader_prepare_closures(&kernel_split_state.sd[ray_index], state);
}
-
- shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, ctx);
- shader_merge_closures(&kernel_split_state.sd[ray_index]);
-#endif /* __BRANCHED_PATH__ */
-
- kernel_split_state.rng[ray_index] = rng;
}
}
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
index 297decb0bc2..5a55b680695 100644
--- a/intern/cycles/kernel/split/kernel_shader_sort.h
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -39,7 +39,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
ccl_local ushort *local_index = &locals->local_index[0];
/* copy to local memory */
- for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+ for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
uint idx = offset + i + lid;
uint add = input + idx;
uint value = (~0);
@@ -59,9 +59,9 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
# ifdef __KERNEL_OPENCL__
/* bitonic sort */
- for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
- for (uint inc = length; inc > 0; inc >>= 1) {
- for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
+ for(uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
+ for(uint inc = length; inc > 0; inc >>= 1) {
+ for(uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
uint i = lid + ii;
bool direction = ((i & (length << 1)) != 0);
uint j = i ^ inc;
@@ -81,7 +81,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
# endif /* __KERNEL_OPENCL__ */
/* copy to destination */
- for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+ for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
uint idx = offset + i + lid;
uint lidx = local_index[i + lid];
uint outi = output + idx;
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index 474286285a9..79aa2c9435b 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -37,21 +37,18 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
float3 throughput = kernel_split_state.throughput[ray_index];
#ifdef __BRANCHED_PATH__
if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
#endif
- kernel_path_ao(kg, sd, emission_sd, L, state, &rng, throughput, shader_bsdf_alpha(kg, sd));
+ kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd));
#ifdef __BRANCHED_PATH__
}
else {
- kernel_branched_path_ao(kg, sd, emission_sd, L, state, &rng, throughput);
+ kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput);
}
#endif
-
- kernel_split_state.rng[ray_index] = rng;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index 78e61709b01..b52f9a5eb81 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -45,7 +45,6 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
float3 throughput = kernel_split_state.throughput[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
@@ -75,7 +74,6 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
if(use_branched) {
kernel_branched_path_surface_connect_light(kg,
- &rng,
sd,
emission_sd,
state,
@@ -91,10 +89,11 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
float3 shadow;
if(!shadow_blocked(kg,
- emission_sd,
- state,
- &ray,
- &shadow))
+ sd,
+ emission_sd,
+ state,
+ &ray,
+ &shadow))
{
/* accumulate */
path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
@@ -103,8 +102,6 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
path_radiance_accum_total_light(L, state, throughput, &L_light);
}
}
-
- kernel_split_state.rng[ray_index] = rng;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 08f0124b529..558d327bc76 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -63,7 +63,7 @@ ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
path_radiance_sum_indirect(L);
- path_radiance_accum_sample(orig_ray_L, L, 1);
+ path_radiance_accum_sample(orig_ray_L, L);
atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 4bb2f0d3d80..b0e6e5f5250 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -22,28 +22,15 @@ CCL_NAMESPACE_BEGIN
/* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */
typedef struct SplitParams {
- int x;
- int y;
- int w;
- int h;
-
- int offset;
- int stride;
-
- ccl_global uint *rng_state;
-
- int start_sample;
- int end_sample;
+ WorkTile tile;
+ uint total_work_size;
ccl_global unsigned int *work_pools;
- unsigned int num_samples;
ccl_global int *queue_index;
int queue_size;
ccl_global char *use_queues_flag;
- ccl_global float *buffer;
-
/* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
int dummy_sd_flag;
} SplitParams;
@@ -56,14 +43,6 @@ typedef struct SplitParams {
/* SPLIT_DATA_ENTRY(type, name, num) */
-#if defined(WITH_CYCLES_DEBUG) || defined(__KERNEL_DEBUG__)
-/* DebugData memory */
-# define SPLIT_DATA_DEBUG_ENTRIES \
- SPLIT_DATA_ENTRY(DebugData, debug_data, 1)
-#else
-# define SPLIT_DATA_DEBUG_ENTRIES
-#endif /* DEBUG */
-
#ifdef __BRANCHED_PATH__
typedef ccl_global struct SplitBranchedState {
@@ -80,7 +59,6 @@ typedef ccl_global struct SplitBranchedState {
/* indirect loop state */
int next_closure;
int next_sample;
- int num_samples;
#ifdef __SUBSURFACE__
int ss_next_closure;
@@ -122,9 +100,7 @@ typedef ccl_global struct SplitBranchedState {
#endif /* __VOLUME__ */
#define SPLIT_DATA_ENTRIES \
- SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
- SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \
SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
@@ -133,19 +109,16 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
- SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
+ SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
SPLIT_DATA_SUBSURFACE_ENTRIES \
SPLIT_DATA_VOLUME_ENTRIES \
SPLIT_DATA_BRANCHED_ENTRIES \
- SPLIT_DATA_DEBUG_ENTRIES \
/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
- SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
- SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \
SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
@@ -158,7 +131,6 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_SUBSURFACE_ENTRIES \
SPLIT_DATA_VOLUME_ENTRIES \
SPLIT_DATA_BRANCHED_ENTRIES \
- SPLIT_DATA_DEBUG_ENTRIES \
/* struct that holds pointers to data in the shared state buffer */
typedef struct SplitData {
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index d5083b23f80..3b957856aea 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -38,7 +38,6 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
ShaderData *sd = &branched_state->sd;
- RNG rng = kernel_split_state.rng[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
@@ -52,14 +51,12 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
branched_state->next_closure == 0 && branched_state->next_sample == 0)
{
- branched_state->lcg_state = lcg_state_init(&rng,
- branched_state->path_state.rng_offset,
- branched_state->path_state.sample,
- 0x68bc21eb);
+ branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
+ 0x68bc21eb);
}
int num_samples = kernel_data.integrator.subsurface_samples;
float num_samples_inv = 1.0f/num_samples;
- RNG bssrdf_rng = cmj_hash(rng, i);
+ uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
/* do subsurface scatter step with copy of shader data, this will
* replace the BSSRDF with a diffuse BSDF closure */
@@ -67,7 +64,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
ccl_global SubsurfaceIntersection *ss_isect = &branched_state->ss_isect;
float bssrdf_u, bssrdf_v;
path_branched_rng_2D(kg,
- &bssrdf_rng,
+ bssrdf_rng_hash,
&branched_state->path_state,
j,
num_samples,
@@ -77,7 +74,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
/* intersection is expensive so avoid doing multiple times for the same input */
if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
- RNG lcg_state = branched_state->lcg_state;
+ uint lcg_state = branched_state->lcg_state;
SubsurfaceIntersection ss_isect_private;
branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
@@ -152,7 +149,6 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
int all = (kernel_data.integrator.sample_all_lights_direct) ||
(branched_state->path_state.flag & PATH_RAY_SHADOW_CATCHER);
kernel_branched_path_surface_connect_light(kg,
- &rng,
bssrdf_sd,
emission_sd,
hit_state,
@@ -229,7 +225,6 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
@@ -246,7 +241,6 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
emission_sd,
L,
state,
- &rng,
ray,
throughput,
ss_indirect))
@@ -256,21 +250,17 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
#ifdef __BRANCHED_PATH__
}
else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
- float bssrdf_probability;
- ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
+ float bssrdf_u, bssrdf_v;
+ path_state_rng_2D(kg,
+ state,
+ PRNG_BSDF_U,
+ &bssrdf_u, &bssrdf_v);
- /* modify throughput for picking bssrdf or bsdf */
- *throughput *= bssrdf_probability;
+ const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u);
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
- uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb);
- float bssrdf_u, bssrdf_v;
- path_state_rng_2D(kg,
- &rng,
- state,
- PRNG_BSDF_U,
- &bssrdf_u, &bssrdf_v);
+ uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb);
subsurface_scatter_step(kg,
sd,
state,
@@ -290,7 +280,6 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
}
#endif
}
- kernel_split_state.rng[ray_index] = rng;
}
# ifdef __BRANCHED_PATH__