diff options
Diffstat (limited to 'intern/cycles/kernel/split')
20 files changed, 145 insertions, 507 deletions
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h index e2762a85fc8..2313feac089 100644 --- a/intern/cycles/kernel/split/kernel_branched.h +++ b/intern/cycles/kernel/split/kernel_branched.h @@ -87,7 +87,6 @@ ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray]; path_radiance_init(inactive_L, kernel_data.film.use_light_pass); - inactive_L->direct_throughput = L->direct_throughput; path_radiance_copy_indirect(inactive_L, L); ray_state[inactive_ray] = RAY_REGENERATED; @@ -110,7 +109,6 @@ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter( SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; ShaderData *sd = saved_sd; - RNG rng = kernel_split_state.rng[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; float3 throughput = branched_state->throughput; ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; @@ -157,37 +155,38 @@ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter( num_samples = ceil_to_int(num_samples_adjust*num_samples); float num_samples_inv = num_samples_adjust/num_samples; - RNG bsdf_rng = cmj_hash(rng, i); for(int j = branched_state->next_sample; j < num_samples; j++) { if(reset_path_state) { *ps = branched_state->path_state; } + ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); + ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; *tp = throughput; ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index]; if(!kernel_branched_path_surface_bounce(kg, - &bsdf_rng, sd, sc, j, num_samples, tp, ps, - L, + &L->state, bsdf_ray, sum_sample_weight)) { continue; } + ps->rng_hash = branched_state->path_state.rng_hash; + /* update state for next iteration */ branched_state->next_closure = i; branched_state->next_sample = j+1; - branched_state->num_samples = num_samples; /* start the indirect path */ *tp *= num_samples_inv; diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 4c1fdd2d69c..511334e0550 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -75,107 +75,65 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, if(ray_index != QUEUE_EMPTY_SLOT) { #endif - ccl_global uint *rng_state = kernel_split_params.rng_state; - int stride = kernel_split_params.stride; - ccl_global char *ray_state = kernel_split_state.ray_state; -#ifdef __KERNEL_DEBUG__ - DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; -#endif ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index]; - RNG rng = kernel_split_state.rng[ray_index]; - ccl_global float *buffer = kernel_split_params.buffer; - - unsigned int work_index; - ccl_global uint *initial_rng; - - unsigned int sample; - unsigned int tile_x; - unsigned int tile_y; - unsigned int pixel_x; - unsigned int pixel_y; - - work_index = kernel_split_state.work_array[ray_index]; - sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; - get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, - &tile_x, &tile_y, - work_index, - ray_index); - initial_rng = rng_state; - - rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride; - buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { -#ifdef __KERNEL_DEBUG__ - kernel_write_debug_passes(kg, buffer, state, debug_data, sample); -#endif + uint sample = state->sample; + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; /* accumulate result in output buffer */ - bool is_shadow_catcher = (state->flag & PATH_RAY_SHADOW_CATCHER); - kernel_write_result(kg, buffer, sample, L, 1.0f - (*L_transparent), is_shadow_catcher); - - path_rng_end(kg, rng_state, rng); + kernel_write_result(kg, buffer, sample, L); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); } if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { /* We have completed current work; So get next work */ - int valid_work = get_next_work(kg, &work_index, ray_index); - if(!valid_work) { + ccl_global uint *work_pools = kernel_split_params.work_pools; + uint total_work_size = kernel_split_params.total_work_size; + uint work_index; + + if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { /* If work is invalid, this means no more work is available and the thread may exit */ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); } if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { - kernel_split_state.work_array[ray_index] = work_index; - /* Get the sample associated with the current work */ - sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; - /* Get pixel and tile position associated with current work */ - get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, work_index, ray_index); + ccl_global WorkTile *tile = &kernel_split_params.tile; + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); - /* Remap rng_state according to the current work */ - rng_state = initial_rng + kernel_split_params.offset + pixel_x + pixel_y*stride; - /* Remap buffer according to the current work */ - buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; + /* Store buffer offset for writing to passes. */ + uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride; + kernel_split_state.buffer_offset[ray_index] = buffer_offset; /* Initialize random numbers and ray. */ - kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, &rng, ray); + uint rng_hash; + kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray); if(ray->t != 0.0f) { - /* Initialize throughput, L_transparent, Ray, PathState; + /* Initialize throughput, path radiance, Ray, PathState; * These rays proceed with path-iteration. */ *throughput = make_float3(1.0f, 1.0f, 1.0f); - *L_transparent = 0.0f; path_radiance_init(L, kernel_data.film.use_light_pass); - path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, &rng, sample, ray); + path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng_hash, sample, ray); #ifdef __SUBSURFACE__ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif -#ifdef __KERNEL_DEBUG__ - debug_data_init(debug_data); -#endif ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); enqueue_flag = 1; } else { - /* These rays do not participate in path-iteration. */ - float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - /* Accumulate result in output buffer. */ - kernel_write_pass_float4(buffer, sample, L_rad); - path_rng_end(kg, rng_state, rng); - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); } } } - kernel_split_state.rng[ray_index] = rng; #ifndef __COMPUTE_DEVICE_GPU__ } diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index e4545d66eff..77fb61b80a8 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -23,22 +23,6 @@ CCL_NAMESPACE_BEGIN * The number of elements in the queues is initialized to 0; */ -/* Distributes an amount of work across all threads - * note: work done inside the loop may not show up to all threads till after - * the current kernel has completed - */ -#define parallel_for(kg, iter_name, work_size) \ - for(size_t _size = (work_size), \ - _global_size = ccl_global_size(0) * ccl_global_size(1), \ - _n = _size / _global_size, \ - _thread = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0), \ - iter_name = (_n > 0) ? (_thread * _n) : (_thread) \ - ; \ - (iter_name < (_thread+1) * _n) || (iter_name == _n * _global_size + _thread && _thread < _size % _global_size) \ - ; \ - iter_name = (iter_name != (_thread+1) * _n - 1) ? (iter_name + 1) : (_n * _global_size + _thread) \ - ) - #ifndef __KERNEL_CPU__ ccl_device void kernel_data_init( #else @@ -49,12 +33,9 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( ccl_global void *split_data_buffer, int num_elements, ccl_global char *ray_state, - ccl_global uint *rng_state, #ifdef __KERNEL_OPENCL__ -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, #endif int start_sample, @@ -75,34 +56,32 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( kg->data = data; #endif - kernel_split_params.x = sx; - kernel_split_params.y = sy; - kernel_split_params.w = sw; - kernel_split_params.h = sh; + kernel_split_params.tile.x = sx; + kernel_split_params.tile.y = sy; + kernel_split_params.tile.w = sw; + kernel_split_params.tile.h = sh; - kernel_split_params.offset = offset; - kernel_split_params.stride = stride; + kernel_split_params.tile.start_sample = start_sample; + kernel_split_params.tile.num_samples = num_samples; - kernel_split_params.rng_state = rng_state; + kernel_split_params.tile.offset = offset; + kernel_split_params.tile.stride = stride; - kernel_split_params.start_sample = start_sample; - kernel_split_params.end_sample = end_sample; + kernel_split_params.tile.buffer = buffer; + + kernel_split_params.total_work_size = sw * sh * num_samples; kernel_split_params.work_pools = work_pools; - kernel_split_params.num_samples = num_samples; kernel_split_params.queue_index = Queue_index; kernel_split_params.queue_size = queuesize; kernel_split_params.use_queues_flag = use_queues_flag; - kernel_split_params.buffer = buffer; - split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state); #ifdef __KERNEL_OPENCL__ -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); #endif int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); @@ -124,30 +103,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( */ *use_queues_flag = 0; } - - /* zero the tiles pixels and initialize rng_state if this is the first sample */ - if(start_sample == 0) { - parallel_for(kg, i, sw * sh * kernel_data.film.pass_stride) { - int pixel = i / kernel_data.film.pass_stride; - int pass = i % kernel_data.film.pass_stride; - - int x = sx + pixel % sw; - int y = sy + pixel / sw; - - int index = (offset + x + y*stride) * kernel_data.film.pass_stride + pass; - - *(buffer + index) = 0.0f; - } - - parallel_for(kg, i, sw * sh) { - int x = sx + i % sw; - int y = sy + i / sw; - - int index = (offset + x + y*stride); - *(rng_state + index) = hash_int_2d(x, y); - } - } - #endif /* KERENL_STUB */ } diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index 3336c968a44..2aac66ecb84 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -62,8 +62,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg, /* direct lighting */ #ifdef __EMISSION__ - RNG rng = kernel_split_state.rng[ray_index]; - bool flag = (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)); @@ -83,23 +81,20 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg, if(flag) { /* Sample illumination from lights to find path contribution. */ - float light_t = path_state_rng_1D(kg, &rng, state, PRNG_LIGHT); float light_u, light_v; - path_state_rng_2D(kg, &rng, state, PRNG_LIGHT_U, &light_u, &light_v); - float terminate = path_state_rng_light_termination(kg, &rng, state); + path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); + float terminate = path_state_rng_light_termination(kg, state); LightSample ls; if(light_sample(kg, - light_t, light_u, light_v, + light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { Ray light_ray; -# ifdef __OBJECT_MOTION__ light_ray.time = sd->time; -# endif BsdfEval L_light; bool is_lamp; @@ -115,7 +110,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg, } } } - kernel_split_state.rng[ray_index] = rng; #endif /* __EMISSION__ */ } diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index 9f8dd2392d9..491487f1230 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -30,7 +30,6 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; ShaderData *sd = &kernel_split_state.sd[ray_index]; - RNG rng = kernel_split_state.rng[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; @@ -58,22 +57,21 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K /* integrate along volume segment with distance sampling */ VolumeIntegrateResult result = kernel_volume_integrate( - kg, ps, sd, &volume_ray, L, tp, &rng, heterogeneous); + kg, ps, sd, &volume_ray, L, tp, heterogeneous); # ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* direct lighting */ - kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *tp, &branched_state->path_state, L); + kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L); /* indirect light bounce */ - if(!kernel_path_volume_bounce(kg, &rng, sd, tp, ps, L, pray)) { + if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) { continue; } /* start the indirect path */ branched_state->next_closure = 0; branched_state->next_sample = j+1; - branched_state->num_samples = num_samples; /* Attempting to share too many samples is slow for volumes as it causes us to * loop here more and have many calls to kernel_volume_integrate which evaluates @@ -141,7 +139,6 @@ ccl_device void kernel_do_volume(KernelGlobals *kg) IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - RNG rng = kernel_split_state.rng[ray_index]; ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; ShaderData *sd = &kernel_split_state.sd[ray_index]; ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; @@ -165,15 +162,15 @@ ccl_device void kernel_do_volume(KernelGlobals *kg) { /* integrate along volume segment with distance sampling */ VolumeIntegrateResult result = kernel_volume_integrate( - kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous); + kg, state, sd, &volume_ray, L, throughput, heterogeneous); # ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* direct lighting */ - kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *throughput, state, L); + kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray)) { + if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); } else { @@ -194,8 +191,6 @@ ccl_device void kernel_do_volume(KernelGlobals *kg) } # endif /* __BRANCHED_PATH__ */ } - - kernel_split_state.rng[ray_index] = rng; } # ifdef __BRANCHED_PATH__ diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index 670a557f084..906bad8ceb6 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -90,163 +90,58 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( if(ray_index != QUEUE_EMPTY_SLOT) { #endif - int stride = kernel_split_params.stride; - - unsigned int work_index; - unsigned int pixel_x; - unsigned int pixel_y; - - unsigned int tile_x; - unsigned int tile_y; - unsigned int sample; - - RNG rng = kernel_split_state.rng[ray_index]; ccl_global PathState *state = 0x0; float3 throughput; ccl_global char *ray_state = kernel_split_state.ray_state; ShaderData *sd = &kernel_split_state.sd[ray_index]; - ccl_global float *buffer = kernel_split_params.buffer; if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; throughput = kernel_split_state.throughput[ray_index]; state = &kernel_split_state.path_state[ray_index]; - work_index = kernel_split_state.work_array[ray_index]; - sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; - get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, - &tile_x, &tile_y, - work_index, - ray_index); - - buffer += (kernel_split_params.offset + pixel_x + pixel_y * stride) * kernel_data.film.pass_stride; - -#ifdef __SHADOW_TRICKS__ - if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) { - if(state->flag & PATH_RAY_CAMERA) { - state->flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_SHADOW_CATCHER_ONLY | PATH_RAY_STORE_SHADOW_INFO); - state->catcher_object = sd->object; - if(!kernel_data.background.transparent) { - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - L->shadow_color = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray); - } - } - } - else { - state->flag &= ~PATH_RAY_SHADOW_CATCHER_ONLY; - } -#endif /* __SHADOW_TRICKS__ */ - - /* holdout */ -#ifdef __HOLDOUT__ - if(((sd->flag & SD_HOLDOUT) || - (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && - (state->flag & PATH_RAY_CAMERA)) + if(!kernel_path_shader_apply(kg, + sd, + state, + ray, + throughput, + emission_sd, + L, + buffer)) { - if(kernel_data.background.transparent) { - float3 holdout_weight; - if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { - holdout_weight = make_float3(1.0f, 1.0f, 1.0f); - } - else { - holdout_weight = shader_holdout_eval(kg, sd); - } - /* any throughput is ok, should all be identical here */ - kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput); - } - if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { - kernel_split_path_end(kg, ray_index); - } + kernel_split_path_end(kg, ray_index); } -#endif /* __HOLDOUT__ */ } if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - -#ifdef __BRANCHED_PATH__ - if(!IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) -#endif /* __BRANCHED_PATH__ */ - { - /* Holdout mask objects do not write data passes. */ - kernel_write_data_passes(kg, - buffer, - L, - sd, - sample, - state, - throughput); - } - - /* Blurring of bsdf after bounces, for rays that have a small likelihood - * of following this particular path (diffuse, rough glossy. - */ -#ifndef __BRANCHED_PATH__ - if(kernel_data.integrator.filter_glossy != FLT_MAX) -#else - if(kernel_data.integrator.filter_glossy != FLT_MAX && - (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))) -#endif /* __BRANCHED_PATH__ */ - { - float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf; - if(blur_pdf < 1.0f) { - float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f; - shader_bsdf_blur(kg, sd, blur_roughness); - } - } - -#ifdef __EMISSION__ - /* emission */ - if(sd->flag & SD_EMISSION) { - /* TODO(sergey): is isect.t wrong here for transparent surfaces? */ - float3 emission = indirect_primitive_emission( - kg, - sd, - kernel_split_state.isect[ray_index].t, - state->flag, - state->ray_pdf); - path_radiance_accum_emission(L, throughput, emission, state->bounce); - } -#endif /* __EMISSION__ */ - /* Path termination. this is a strange place to put the termination, it's * mainly due to the mixed in MIS that we use. gives too many unneeded * shader evaluations, only need emission if we are going to terminate. */ -#ifndef __BRANCHED_PATH__ - float probability = path_state_terminate_probability(kg, state, throughput); -#else - float probability = 1.0f; - - if(!kernel_data.integrator.branched) { - probability = path_state_terminate_probability(kg, state, throughput); - } - else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { - int num_samples = kernel_split_state.branched_state[ray_index].num_samples; - probability = path_state_terminate_probability(kg, state, throughput*num_samples); - } - else if(state->flag & PATH_RAY_TRANSPARENT) { - probability = path_state_terminate_probability(kg, state, throughput); - } -#endif + float probability = path_state_continuation_probability(kg, state, throughput); if(probability == 0.0f) { kernel_split_path_end(kg, ray_index); } - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - if(probability != 1.0f) { - float terminate = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_TERMINATE); - if(terminate >= probability) { - kernel_split_path_end(kg, ray_index); - } - else { - kernel_split_state.throughput[ray_index] = throughput/probability; - } + else if(probability < 1.0f) { + float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); + if(terminate >= probability) { + kernel_split_path_end(kg, ray_index); } + else { + kernel_split_state.throughput[ray_index] = throughput/probability; + } + } + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; kernel_update_denoising_features(kg, sd, state, L); } } @@ -260,8 +155,6 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( } #endif /* __AO__ */ - kernel_split_state.rng[ray_index] = rng; - #ifndef __COMPUTE_DEVICE_GPU__ } #endif diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h index f0ebb90f60a..437043a5971 100644 --- a/intern/cycles/kernel/split/kernel_indirect_background.h +++ b/intern/cycles/kernel/split/kernel_indirect_background.h @@ -33,7 +33,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg) if(ray_index != QUEUE_EMPTY_SLOT) { if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - if(state->bounce > kernel_data.integrator.ao_bounces) { + if(path_state_ao_bounce(kg, state)) { kernel_split_path_end(kg, ray_index); } } @@ -50,33 +50,16 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg) return; } - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index]; - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { - /* eval background shader if nothing hit */ - if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) { - *L_transparent = (*L_transparent) + average((*throughput)); -#ifdef __PASSES__ - if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) -#endif - kernel_split_path_end(kg, ray_index); - } + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + float3 throughput = kernel_split_state.throughput[ray_index]; + ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { -#ifdef __BACKGROUND__ - /* sample background shader */ - float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray); - path_radiance_accum_background(L, state, (*throughput), L_background); -#endif - kernel_split_path_end(kg, ray_index); - } + kernel_path_background(kg, state, ray, throughput, emission_sd, L); + kernel_split_path_end(kg, ray_index); } - - } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h index 82bc2f01fd7..e9fe5552e8c 100644 --- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h +++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h @@ -54,7 +54,6 @@ ccl_device void kernel_indirect_subsurface(KernelGlobals *kg) #endif if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; - kernel_path_subsurface_accum_indirect(ss_indirect, L); /* Trace indirect subsurface rays by restarting the loop. this uses less * stack memory than invoking kernel_path_indirect. diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index c669d79ddcd..448456d167d 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -57,27 +57,10 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg) float3 throughput = kernel_split_state.throughput[ray_index]; Ray ray = kernel_split_state.ray[ray_index]; + ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; + ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; -#ifdef __LAMP_MIS__ - if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) { - /* ray starting from previous non-transparent bounce */ - Ray light_ray; - - light_ray.P = ray.P - state->ray_t*ray.D; - state->ray_t += kernel_split_state.isect[ray_index].t; - light_ray.D = ray.D; - light_ray.t = state->ray_t; - light_ray.time = ray.time; - light_ray.dD = ray.dD; - light_ray.dP = ray.dP; - /* intersect with lamp */ - float3 emission; - - if(indirect_lamp_emission(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, &light_ray, &emission)) { - path_radiance_accum_emission(L, throughput, emission, state->bounce); - } - } -#endif /* __LAMP_MIS__ */ + kernel_path_lamp_emission(kg, state, &ray, throughput, isect, emission_sd, L); } } diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index 7758e35fd32..c3373174582 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -126,7 +126,6 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, if(active) { ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - RNG rng = kernel_split_state.rng[ray_index]; ShaderData *sd = &kernel_split_state.sd[ray_index]; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; @@ -135,7 +134,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { #endif /* Compute direct lighting and next bounce. */ - if(!kernel_path_surface_bounce(kg, &rng, sd, throughput, state, L, ray)) { + if(!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) { kernel_split_path_end(kg, ray_index); } #ifdef __BRANCHED_PATH__ @@ -157,8 +156,6 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, } } #endif /* __BRANCHED_PATH__ */ - - kernel_split_state.rng[ray_index] = rng; } /* Enqueue RAY_UPDATE_BUFFER rays. */ diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index a7ecde7c80d..5ad62b585fe 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -29,77 +29,53 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { */ kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; - unsigned int my_sample; - unsigned int pixel_x; - unsigned int pixel_y; - unsigned int tile_x; - unsigned int tile_y; - - unsigned int work_index = 0; /* Get work. */ - if(!get_next_work(kg, &work_index, ray_index)) { + ccl_global uint *work_pools = kernel_split_params.work_pools; + uint total_work_size = kernel_split_params.total_work_size; + uint work_index; + + if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { /* No more work, mark ray as inactive */ kernel_split_state.ray_state[ray_index] = RAY_INACTIVE; return; } - /* Get the sample associated with the work. */ - my_sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; - - /* Get pixel and tile position associated with the work. */ - get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, - &tile_x, &tile_y, - work_index, - ray_index); - kernel_split_state.work_array[ray_index] = work_index; - - ccl_global uint *rng_state = kernel_split_params.rng_state; - rng_state += kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride; + ccl_global WorkTile *tile = &kernel_split_params.tile; + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); - ccl_global float *buffer = kernel_split_params.buffer; - buffer += (kernel_split_params.offset + pixel_x + pixel_y * kernel_split_params.stride) * kernel_data.film.pass_stride; - - RNG rng = kernel_split_state.rng[ray_index]; + /* Store buffer offset for writing to passes. */ + uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride; + kernel_split_state.buffer_offset[ray_index] = buffer_offset; /* Initialize random numbers and ray. */ + uint rng_hash; kernel_path_trace_setup(kg, - rng_state, - my_sample, - pixel_x, pixel_y, - &rng, + sample, + x, y, + &rng_hash, &kernel_split_state.ray[ray_index]); if(kernel_split_state.ray[ray_index].t != 0.0f) { - /* Initialize throughput, L_transparent, Ray, PathState; + /* Initialize throughput, path radiance, Ray, PathState; * These rays proceed with path-iteration. */ kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); - kernel_split_state.L_transparent[ray_index] = 0.0f; path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass); path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], &kernel_split_state.path_state[ray_index], - &rng, - my_sample, + rng_hash, + sample, &kernel_split_state.ray[ray_index]); #ifdef __SUBSURFACE__ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif - -#ifdef __KERNEL_DEBUG__ - debug_data_init(&kernel_split_state.debug_data[ray_index]); -#endif } else { - /* These rays do not participate in path-iteration. */ - float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - /* Accumulate result in output buffer. */ - kernel_write_pass_float4(buffer, my_sample, L_rad); - path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]); ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); } - kernel_split_state.rng[ray_index] = rng; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h index e2e841f36d3..66ce2dfb6f1 100644 --- a/intern/cycles/kernel/split/kernel_queue_enqueue.h +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -51,7 +51,8 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg, int queue_number = -1; if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) { + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) { queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; } else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index 45984ca509b..f5378bc172b 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -59,52 +59,14 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg) return; } -#ifdef __KERNEL_DEBUG__ - DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; -#endif - Intersection isect; - PathState state = kernel_split_state.path_state[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; Ray ray = kernel_split_state.ray[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - /* intersect scene */ - uint visibility = path_state_ray_visibility(kg, &state); - - if(state.bounce > kernel_data.integrator.ao_bounces) { - visibility = PATH_RAY_SHADOW; - ray.t = kernel_data.background.ao_distance; - } - -#ifdef __HAIR__ - float difl = 0.0f, extmax = 0.0f; - uint lcg_state = 0; - RNG rng = kernel_split_state.rng[ray_index]; - - if(kernel_data.bvh.have_curves) { - if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) { - float3 pixdiff = ray.dD.dx + ray.dD.dy; - /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/ - difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f; - } - - extmax = kernel_data.curve.maximum_width; - lcg_state = lcg_state_init(&rng, state.rng_offset, state.sample, 0x51633e2d); - } - - bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax); -#else - bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f); -#endif + Intersection isect; + bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L); kernel_split_state.isect[ray_index] = isect; -#ifdef __KERNEL_DEBUG__ - if(state.flag & PATH_RAY_CAMERA) { - debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes; - debug_data->num_bvh_traversed_instances += isect.num_traversed_instances; - debug_data->num_bvh_intersections += isect.num_intersections; - } - debug_data->num_ray_bounces++; -#endif - if(!hit) { /* Change the state of rays that hit the background; * These rays undergo special processing in the diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 2801b32f285..7032461b04a 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -48,30 +48,18 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg) ccl_global char *ray_state = kernel_split_state.ray_state; if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - RNG rng = kernel_split_state.rng[ray_index]; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; -#ifndef __BRANCHED_PATH__ - float rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF); - shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN); -#else - ShaderContext ctx = SHADER_CONTEXT_MAIN; - float rbsdf = 0.0f; - - if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { - rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF); - + shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag); +#ifdef __BRANCHED_PATH__ + if(kernel_data.integrator.branched) { + shader_merge_closures(&kernel_split_state.sd[ray_index]); } - - if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { - ctx = SHADER_CONTEXT_INDIRECT; + else +#endif + { + shader_prepare_closures(&kernel_split_state.sd[ray_index], state); } - - shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, ctx); - shader_merge_closures(&kernel_split_state.sd[ray_index]); -#endif /* __BRANCHED_PATH__ */ - - kernel_split_state.rng[ray_index] = rng; } } diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h index 297decb0bc2..5a55b680695 100644 --- a/intern/cycles/kernel/split/kernel_shader_sort.h +++ b/intern/cycles/kernel/split/kernel_shader_sort.h @@ -39,7 +39,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg, ccl_local ushort *local_index = &locals->local_index[0]; /* copy to local memory */ - for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { + for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { uint idx = offset + i + lid; uint add = input + idx; uint value = (~0); @@ -59,9 +59,9 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg, # ifdef __KERNEL_OPENCL__ /* bitonic sort */ - for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) { - for (uint inc = length; inc > 0; inc >>= 1) { - for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) { + for(uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) { + for(uint inc = length; inc > 0; inc >>= 1) { + for(uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) { uint i = lid + ii; bool direction = ((i & (length << 1)) != 0); uint j = i ^ inc; @@ -81,7 +81,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg, # endif /* __KERNEL_OPENCL__ */ /* copy to destination */ - for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { + for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { uint idx = offset + i + lid; uint lidx = local_index[i + lid]; uint outi = output + idx; diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h index 474286285a9..79aa2c9435b 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h @@ -37,21 +37,18 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg) ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - RNG rng = kernel_split_state.rng[ray_index]; float3 throughput = kernel_split_state.throughput[ray_index]; #ifdef __BRANCHED_PATH__ if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { #endif - kernel_path_ao(kg, sd, emission_sd, L, state, &rng, throughput, shader_bsdf_alpha(kg, sd)); + kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd)); #ifdef __BRANCHED_PATH__ } else { - kernel_branched_path_ao(kg, sd, emission_sd, L, state, &rng, throughput); + kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput); } #endif - - kernel_split_state.rng[ray_index] = rng; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index 78e61709b01..b52f9a5eb81 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -45,7 +45,6 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ShaderData *sd = &kernel_split_state.sd[ray_index]; float3 throughput = kernel_split_state.throughput[ray_index]; - RNG rng = kernel_split_state.rng[ray_index]; BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index]; ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; @@ -75,7 +74,6 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) if(use_branched) { kernel_branched_path_surface_connect_light(kg, - &rng, sd, emission_sd, state, @@ -91,10 +89,11 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) float3 shadow; if(!shadow_blocked(kg, - emission_sd, - state, - &ray, - &shadow)) + sd, + emission_sd, + state, + &ray, + &shadow)) { /* accumulate */ path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); @@ -103,8 +102,6 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) path_radiance_accum_total_light(L, state, throughput, &L_light); } } - - kernel_split_state.rng[ray_index] = rng; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index 08f0124b529..558d327bc76 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -63,7 +63,7 @@ ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index) PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray]; path_radiance_sum_indirect(L); - path_radiance_accum_sample(orig_ray_L, L, 1); + path_radiance_accum_sample(orig_ray_L, L); atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count); diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 4bb2f0d3d80..b0e6e5f5250 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -22,28 +22,15 @@ CCL_NAMESPACE_BEGIN /* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */ typedef struct SplitParams { - int x; - int y; - int w; - int h; - - int offset; - int stride; - - ccl_global uint *rng_state; - - int start_sample; - int end_sample; + WorkTile tile; + uint total_work_size; ccl_global unsigned int *work_pools; - unsigned int num_samples; ccl_global int *queue_index; int queue_size; ccl_global char *use_queues_flag; - ccl_global float *buffer; - /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */ int dummy_sd_flag; } SplitParams; @@ -56,14 +43,6 @@ typedef struct SplitParams { /* SPLIT_DATA_ENTRY(type, name, num) */ -#if defined(WITH_CYCLES_DEBUG) || defined(__KERNEL_DEBUG__) -/* DebugData memory */ -# define SPLIT_DATA_DEBUG_ENTRIES \ - SPLIT_DATA_ENTRY(DebugData, debug_data, 1) -#else -# define SPLIT_DATA_DEBUG_ENTRIES -#endif /* DEBUG */ - #ifdef __BRANCHED_PATH__ typedef ccl_global struct SplitBranchedState { @@ -80,7 +59,6 @@ typedef ccl_global struct SplitBranchedState { /* indirect loop state */ int next_closure; int next_sample; - int num_samples; #ifdef __SUBSURFACE__ int ss_next_closure; @@ -122,9 +100,7 @@ typedef ccl_global struct SplitBranchedState { #endif /* __VOLUME__ */ #define SPLIT_DATA_ENTRIES \ - SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ - SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ @@ -133,19 +109,16 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ - SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \ + SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \ SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_DEBUG_ENTRIES \ /* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */ #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \ - SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ - SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ @@ -158,7 +131,6 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_DEBUG_ENTRIES \ /* struct that holds pointers to data in the shared state buffer */ typedef struct SplitData { diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index d5083b23f80..3b957856aea 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -38,7 +38,6 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; ShaderData *sd = &branched_state->sd; - RNG rng = kernel_split_state.rng[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; @@ -52,14 +51,12 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) { - branched_state->lcg_state = lcg_state_init(&rng, - branched_state->path_state.rng_offset, - branched_state->path_state.sample, - 0x68bc21eb); + branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state, + 0x68bc21eb); } int num_samples = kernel_data.integrator.subsurface_samples; float num_samples_inv = 1.0f/num_samples; - RNG bssrdf_rng = cmj_hash(rng, i); + uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); /* do subsurface scatter step with copy of shader data, this will * replace the BSSRDF with a diffuse BSDF closure */ @@ -67,7 +64,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it ccl_global SubsurfaceIntersection *ss_isect = &branched_state->ss_isect; float bssrdf_u, bssrdf_v; path_branched_rng_2D(kg, - &bssrdf_rng, + bssrdf_rng_hash, &branched_state->path_state, j, num_samples, @@ -77,7 +74,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it /* intersection is expensive so avoid doing multiple times for the same input */ if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) { - RNG lcg_state = branched_state->lcg_state; + uint lcg_state = branched_state->lcg_state; SubsurfaceIntersection ss_isect_private; branched_state->num_hits = subsurface_scatter_multi_intersect(kg, @@ -152,7 +149,6 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it int all = (kernel_data.integrator.sample_all_lights_direct) || (branched_state->path_state.flag & PATH_RAY_SHADOW_CATCHER); kernel_branched_path_surface_connect_light(kg, - &rng, bssrdf_sd, emission_sd, hit_state, @@ -229,7 +225,6 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - RNG rng = kernel_split_state.rng[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; @@ -246,7 +241,6 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) emission_sd, L, state, - &rng, ray, throughput, ss_indirect)) @@ -256,21 +250,17 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) #ifdef __BRANCHED_PATH__ } else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { - float bssrdf_probability; - ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability); + float bssrdf_u, bssrdf_v; + path_state_rng_2D(kg, + state, + PRNG_BSDF_U, + &bssrdf_u, &bssrdf_v); - /* modify throughput for picking bssrdf or bsdf */ - *throughput *= bssrdf_probability; + const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u); /* do bssrdf scatter step if we picked a bssrdf closure */ if(sc) { - uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb); - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, - &rng, - state, - PRNG_BSDF_U, - &bssrdf_u, &bssrdf_v); + uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb); subsurface_scatter_step(kg, sd, state, @@ -290,7 +280,6 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) } #endif } - kernel_split_state.rng[ray_index] = rng; } # ifdef __BRANCHED_PATH__ |