From 049932c4c3b5b833df7e8b6be777d641d73a99f7 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Mon, 21 Aug 2017 22:50:24 +0200 Subject: Fix panorama render crash with split kernel, due to incorrect buffer pointer. Also some refactoring to clarify variable usage scope. --- intern/cycles/kernel/split/kernel_buffer_update.h | 36 +++++++++------------- ..._holdout_emission_blurring_pathtermination_ao.h | 20 +++++------- intern/cycles/kernel/split/kernel_path_init.h | 20 +++++------- 3 files changed, 28 insertions(+), 48 deletions(-) diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index de0c4160ca0..3b61319e349 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -75,7 +75,6 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, if(ray_index != QUEUE_EMPTY_SLOT) { #endif - ccl_global uint *rng_state = kernel_split_params.rng_state; int stride = kernel_split_params.stride; ccl_global char *ray_state = kernel_split_state.ray_state; @@ -83,29 +82,17 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global float *buffer = kernel_split_params.buffer; - unsigned int work_index; - ccl_global uint *initial_rng; - - unsigned int sample; - unsigned int tile_x; - unsigned int tile_y; - unsigned int pixel_x; - unsigned int pixel_y; + if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + uint work_index = kernel_split_state.work_array[ray_index]; + uint sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; - work_index = kernel_split_state.work_array[ray_index]; - sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; - get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, - &tile_x, &tile_y, - work_index, - ray_index); - initial_rng = rng_state; + uint tile_x, tile_y, pixel_x, pixel_y; + get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, work_index, ray_index); - rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride; - buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; + ccl_global float *buffer = kernel_split_params.buffer; + buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; - if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { /* accumulate result in output buffer */ bool is_shadow_catcher = (state->flag & PATH_RAY_SHADOW_CATCHER); kernel_write_result(kg, buffer, sample, L, is_shadow_catcher); @@ -115,6 +102,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { /* We have completed current work; So get next work */ + uint work_index; int valid_work = get_next_work(kg, &work_index, ray_index); if(!valid_work) { /* If work is invalid, this means no more work is available and the thread may exit */ @@ -124,13 +112,17 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { kernel_split_state.work_array[ray_index] = work_index; /* Get the sample associated with the current work */ - sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; + uint sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; /* Get pixel and tile position associated with current work */ + uint tile_x, tile_y, pixel_x, pixel_y; get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, work_index, ray_index); /* Remap rng_state according to the current work */ - rng_state = initial_rng + kernel_split_params.offset + pixel_x + pixel_y*stride; + ccl_global uint *rng_state = kernel_split_params.rng_state; + rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride; + /* Remap buffer according to the current work */ + ccl_global float *buffer = kernel_split_params.buffer; buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; /* Initialize random numbers and ray. */ diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index 3fc45afbd92..253b78526e7 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -92,28 +92,19 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( int stride = kernel_split_params.stride; - unsigned int work_index; - unsigned int pixel_x; - unsigned int pixel_y; - - unsigned int tile_x; - unsigned int tile_y; - unsigned int sample; - ccl_global PathState *state = 0x0; float3 throughput; + uint sample; ccl_global char *ray_state = kernel_split_state.ray_state; ShaderData *sd = &kernel_split_state.sd[ray_index]; ccl_global float *buffer = kernel_split_params.buffer; if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - - throughput = kernel_split_state.throughput[ray_index]; - state = &kernel_split_state.path_state[ray_index]; - - work_index = kernel_split_state.work_array[ray_index]; + uint work_index = kernel_split_state.work_array[ray_index]; sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; + + uint pixel_x, pixel_y, tile_x, tile_y; get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, work_index, @@ -121,6 +112,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( buffer += (kernel_split_params.offset + pixel_x + pixel_y * stride) * kernel_data.film.pass_stride; + throughput = kernel_split_state.throughput[ray_index]; + state = &kernel_split_state.path_state[ray_index]; + #ifdef __SHADOW_TRICKS__ if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) { if(state->flag & PATH_RAY_CAMERA) { diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index 8b70df16d2a..c75931855b2 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -29,13 +29,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { */ kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; - unsigned int my_sample; - unsigned int pixel_x; - unsigned int pixel_y; - unsigned int tile_x; - unsigned int tile_y; - - unsigned int work_index = 0; + uint work_index = 0; /* Get work. */ if(!get_next_work(kg, &work_index, ray_index)) { /* No more work, mark ray as inactive */ @@ -45,9 +39,10 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { } /* Get the sample associated with the work. */ - my_sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; + uint sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; /* Get pixel and tile position associated with the work. */ + uint pixel_x, pixel_y, tile_x, tile_y; get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, work_index, @@ -60,12 +55,11 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { ccl_global float *buffer = kernel_split_params.buffer; buffer += (kernel_split_params.offset + pixel_x + pixel_y * kernel_split_params.stride) * kernel_data.film.pass_stride; - uint rng_hash; - /* Initialize random numbers and ray. */ + uint rng_hash; kernel_path_trace_setup(kg, rng_state, - my_sample, + sample, pixel_x, pixel_y, &rng_hash, &kernel_split_state.ray[ray_index]); @@ -80,7 +74,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { &kernel_split_state.sd_DL_shadow[ray_index], &kernel_split_state.path_state[ray_index], rng_hash, - my_sample, + sample, &kernel_split_state.ray[ray_index]); #ifdef __SUBSURFACE__ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); @@ -90,7 +84,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { /* These rays do not participate in path-iteration. */ float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* Accumulate result in output buffer. */ - kernel_write_pass_float4(buffer, my_sample, L_rad); + kernel_write_pass_float4(buffer, sample, L_rad); ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); } } -- cgit v1.2.3