diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_path_branched.h')
-rw-r--r-- | intern/cycles/kernel/kernel_path_branched.h | 231 |
1 files changed, 63 insertions, 168 deletions
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index 10816d3e5d1..2597d684a36 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -23,7 +23,6 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *emission_sd, PathRadiance *L, ccl_addr_space PathState *state, - RNG *rng, float3 throughput) { int num_samples = kernel_data.integrator.ao_samples; @@ -35,7 +34,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, for(int j = 0; j < num_samples; j++) { float bsdf_u, bsdf_v; - path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v); float3 ao_D; float ao_pdf; @@ -49,13 +48,11 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, light_ray.P = ray_offset(sd->P, sd->Ng); light_ray.D = ao_D; light_ray.t = kernel_data.background.ao_distance; -#ifdef __OBJECT_MOTION__ light_ray.time = sd->time; -#endif /* __OBJECT_MOTION__ */ light_ray.dP = sd->dP; light_ray.dD = differential3_zero(); - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { path_radiance_accum_ao(L, state, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow); } else { @@ -69,7 +66,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, /* bounce off surface and integrate indirect light */ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, - RNG *rng, ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd, + ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd, float3 throughput, float num_samples_adjust, PathState *state, PathRadiance *L) { float sum_sample_weight = 0.0f; @@ -113,35 +110,38 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba num_samples = ceil_to_int(num_samples_adjust*num_samples); float num_samples_inv = num_samples_adjust/num_samples; - RNG bsdf_rng = cmj_hash(*rng, i); for(int j = 0; j < num_samples; j++) { PathState ps = *state; float3 tp = throughput; Ray bsdf_ray; +#ifdef __SHADOW_TRICKS__ + float shadow_transparency = L->shadow_transparency; +#endif + + ps.rng_hash = cmj_hash(state->rng_hash, i); if(!kernel_branched_path_surface_bounce(kg, - &bsdf_rng, sd, sc, j, num_samples, &tp, &ps, - L, + &L->state, &bsdf_ray, sum_sample_weight)) { continue; } + ps.rng_hash = state->rng_hash; + kernel_path_indirect(kg, indirect_sd, emission_sd, - rng, &bsdf_ray, tp*num_samples_inv, - num_samples, &ps, L); @@ -149,6 +149,10 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba * for the next samples */ path_radiance_sum_indirect(L); path_radiance_reset_indirect(L); + +#ifdef __SHADOW_TRICKS__ + L->shadow_transparency = shadow_transparency; +#endif } } } @@ -160,7 +164,6 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, ShaderData *emission_sd, PathRadiance *L, PathState *state, - RNG *rng, Ray *ray, float3 throughput) { @@ -171,17 +174,17 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, continue; /* set up random number generator */ - uint lcg_state = lcg_state_init(rng, state->rng_offset, state->sample, 0x68bc21eb); + uint lcg_state = lcg_state_init(state, 0x68bc21eb); int num_samples = kernel_data.integrator.subsurface_samples; float num_samples_inv = 1.0f/num_samples; - RNG bssrdf_rng = cmj_hash(*rng, i); + uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i); /* do subsurface scatter step with copy of shader data, this will * replace the BSSRDF with a diffuse BSDF closure */ for(int j = 0; j < num_samples; j++) { SubsurfaceIntersection ss_isect; float bssrdf_u, bssrdf_v; - path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + path_branched_rng_2D(kg, bssrdf_rng_hash, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); int num_hits = subsurface_scatter_multi_intersect(kg, &ss_isect, sd, @@ -234,7 +237,6 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, (state->flag & PATH_RAY_SHADOW_CATCHER); kernel_branched_path_surface_connect_light( kg, - rng, &bssrdf_sd, emission_sd, &hit_state, @@ -248,7 +250,6 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, /* indirect light */ kernel_branched_path_surface_indirect_light( kg, - rng, &bssrdf_sd, indirect_sd, emission_sd, @@ -262,17 +263,15 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, } #endif /* __SUBSURFACE__ */ -ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, - RNG *rng, - int sample, - Ray ray, - ccl_global float *buffer, - PathRadiance *L, - bool *is_shadow_catcher) +ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, + uint rng_hash, + int sample, + Ray ray, + ccl_global float *buffer, + PathRadiance *L) { /* initialize */ float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float L_transparent = 0.0f; path_radiance_init(L, kernel_data.film.use_light_pass); @@ -282,48 +281,16 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, ShaderData emission_sd, indirect_sd; PathState state; - path_state_init(kg, &emission_sd, &state, rng, sample, &ray); - -#ifdef __KERNEL_DEBUG__ - DebugData debug_data; - debug_data_init(&debug_data); -#endif /* __KERNEL_DEBUG__ */ + path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray); /* Main Loop * Here we only handle transparency intersections from the camera ray. * Indirect bounces are handled in kernel_branched_path_surface_indirect_light(). */ for(;;) { - /* intersect scene */ + /* Find intersection with objects in scene. */ Intersection isect; - uint visibility = path_state_ray_visibility(kg, &state); - -#ifdef __HAIR__ - float difl = 0.0f, extmax = 0.0f; - uint lcg_state = 0; - - if(kernel_data.bvh.have_curves) { - if(kernel_data.cam.resolution == 1) { - float3 pixdiff = ray.dD.dx + ray.dD.dy; - /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/ - difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f; - } - - extmax = kernel_data.curve.maximum_width; - lcg_state = lcg_state_init(rng, state.rng_offset, state.sample, 0x51633e2d); - } - - bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax); -#else - bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f); -#endif /* __HAIR__ */ - -#ifdef __KERNEL_DEBUG__ - debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes; - debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; - debug_data.num_bvh_intersections += isect.num_intersections; - debug_data.num_ray_bounces++; -#endif /* __KERNEL_DEBUG__ */ + bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L); #ifdef __VOLUME__ /* Sanitize volume stack. */ @@ -353,7 +320,7 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, int all = kernel_data.integrator.sample_all_lights_direct; - kernel_branched_path_volume_connect_light(kg, rng, &sd, + kernel_branched_path_volume_connect_light(kg, &sd, &emission_sd, throughput, &state, L, all, &volume_ray, &volume_segment); @@ -372,30 +339,25 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, /* scatter sample. if we use distance sampling and take just one * sample for direct and indirect light, we could share this * computation, but makes code a bit complex */ - float rphase = path_state_rng_1D_for_decision(kg, rng, &ps, PRNG_PHASE); - float rscatter = path_state_rng_1D_for_decision(kg, rng, &ps, PRNG_SCATTER_DISTANCE); + float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL); + float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE); VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false); - (void)result; - kernel_assert(result == VOLUME_PATH_SCATTERED); - - if(kernel_path_volume_bounce(kg, - rng, + if(result == VOLUME_PATH_SCATTERED && + kernel_path_volume_bounce(kg, &sd, &tp, &ps, - L, + &L->state, &pray)) { kernel_path_indirect(kg, &indirect_sd, &emission_sd, - rng, &pray, tp*num_samples_inv, - num_samples, &ps, L); @@ -409,7 +371,7 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, /* emission and transmittance */ if(volume_segment.closure_flag & SD_EMISSION) - path_radiance_accum_emission(L, throughput, volume_segment.accum_emission, state.bounce); + path_radiance_accum_emission(L, &state, throughput, volume_segment.accum_emission); throughput *= volume_segment.accum_transmittance; /* free cached steps */ @@ -431,29 +393,26 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, path_state_branch(&ps, j, num_samples); VolumeIntegrateResult result = kernel_volume_integrate( - kg, &ps, &sd, &volume_ray, L, &tp, rng, heterogeneous); + kg, &ps, &sd, &volume_ray, L, &tp, heterogeneous); #ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* todo: support equiangular, MIS and all light sampling. * alternatively get decoupled ray marching working on the GPU */ - kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, tp, &state, L); + kernel_path_volume_connect_light(kg, &sd, &emission_sd, tp, &state, L); if(kernel_path_volume_bounce(kg, - rng, &sd, &tp, &ps, - L, + &L->state, &pray)) { kernel_path_indirect(kg, &indirect_sd, &emission_sd, - rng, &pray, tp, - num_samples, &ps, L); @@ -472,89 +431,42 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, } #endif /* __VOLUME__ */ + /* Shade background. */ if(!hit) { - /* eval background shader if nothing hit */ - if(kernel_data.background.transparent) { - L_transparent += average(throughput); - -#ifdef __PASSES__ - if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) -#endif /* __PASSES__ */ - break; - } - -#ifdef __BACKGROUND__ - /* sample background shader */ - float3 L_background = indirect_background(kg, &emission_sd, &state, &ray); - path_radiance_accum_background(L, &state, throughput, L_background); -#endif /* __BACKGROUND__ */ - + kernel_path_background(kg, &state, &ray, throughput, &emission_sd, L); break; } - /* setup shading */ + /* Setup and evaluate shader. */ shader_setup_from_ray(kg, &sd, &isect, &ray); - shader_eval_surface(kg, &sd, rng, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN); + shader_eval_surface(kg, &sd, &state, state.flag); shader_merge_closures(&sd); -#ifdef __SHADOW_TRICKS__ - if((sd.object_flag & SD_OBJECT_SHADOW_CATCHER)) { - if(state.flag & PATH_RAY_CAMERA) { - state.flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_SHADOW_CATCHER_ONLY | PATH_RAY_STORE_SHADOW_INFO); - state.catcher_object = sd.object; - if(!kernel_data.background.transparent) { - L->shadow_color = indirect_background(kg, &emission_sd, &state, &ray); - } - } - } - else { - state.flag &= ~PATH_RAY_SHADOW_CATCHER_ONLY; - } -#endif /* __SHADOW_TRICKS__ */ - - /* holdout */ -#ifdef __HOLDOUT__ - if((sd.flag & SD_HOLDOUT) || (sd.object_flag & SD_OBJECT_HOLDOUT_MASK)) { - if(kernel_data.background.transparent) { - float3 holdout_weight; - if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) { - holdout_weight = make_float3(1.0f, 1.0f, 1.0f); - } - else { - holdout_weight = shader_holdout_eval(kg, &sd); - } - /* any throughput is ok, should all be identical here */ - L_transparent += average(holdout_weight*throughput); - } - if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) { - break; - } - } -#endif /* __HOLDOUT__ */ - - /* holdout mask objects do not write data passes */ - kernel_write_data_passes(kg, buffer, L, &sd, sample, &state, throughput); - -#ifdef __EMISSION__ - /* emission */ - if(sd.flag & SD_EMISSION) { - float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf); - path_radiance_accum_emission(L, throughput, emission, state.bounce); + /* Apply shadow catcher, holdout, emission. */ + if(!kernel_path_shader_apply(kg, + &sd, + &state, + &ray, + throughput, + &emission_sd, + L, + buffer)) + { + break; } -#endif /* __EMISSION__ */ /* transparency termination */ if(state.flag & PATH_RAY_TRANSPARENT) { /* path termination. this is a strange place to put the termination, it's * mainly due to the mixed in MIS that we use. gives too many unneeded * shader evaluations, only need emission if we are going to terminate */ - float probability = path_state_terminate_probability(kg, &state, throughput); + float probability = path_state_continuation_probability(kg, &state, throughput); if(probability == 0.0f) { break; } else if(probability != 1.0f) { - float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE); + float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE); if(terminate >= probability) break; @@ -568,7 +480,7 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, #ifdef __AO__ /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { - kernel_branched_path_ao(kg, &sd, &emission_sd, L, &state, rng, throughput); + kernel_branched_path_ao(kg, &sd, &emission_sd, L, &state, throughput); } #endif /* __AO__ */ @@ -576,7 +488,7 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, /* bssrdf scatter to a different location on the same object */ if(sd.flag & SD_BSSRDF) { kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd, - L, &state, rng, &ray, throughput); + L, &state, &ray, throughput); } #endif /* __SUBSURFACE__ */ @@ -588,13 +500,13 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, if(kernel_data.integrator.use_direct_light) { int all = (kernel_data.integrator.sample_all_lights_direct) || (state.flag & PATH_RAY_SHADOW_CATCHER); - kernel_branched_path_surface_connect_light(kg, rng, + kernel_branched_path_surface_connect_light(kg, &sd, &emission_sd, &hit_state, throughput, 1.0f, L, all); } #endif /* __EMISSION__ */ /* indirect light */ - kernel_branched_path_surface_indirect_light(kg, rng, + kernel_branched_path_surface_indirect_light(kg, &sd, &indirect_sd, &emission_sd, throughput, 1.0f, &hit_state, L); /* continue in case of transparency */ @@ -623,48 +535,31 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack); #endif /* __VOLUME__ */ } - -#ifdef __SHADOW_TRICKS__ - *is_shadow_catcher = (state.flag & PATH_RAY_SHADOW_CATCHER); -#endif /* __SHADOW_TRICKS__ */ - -#ifdef __KERNEL_DEBUG__ - kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample); -#endif /* __KERNEL_DEBUG__ */ - - return 1.0f - L_transparent; } ccl_device void kernel_branched_path_trace(KernelGlobals *kg, - ccl_global float *buffer, ccl_global uint *rng_state, + ccl_global float *buffer, int sample, int x, int y, int offset, int stride) { /* buffer offset */ int index = offset + x + y*stride; int pass_stride = kernel_data.film.pass_stride; - rng_state += index; buffer += index*pass_stride; /* initialize random numbers and ray */ - RNG rng; + uint rng_hash; Ray ray; - kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray); + kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray); /* integrate */ PathRadiance L; - bool is_shadow_catcher; if(ray.t != 0.0f) { - float alpha = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher); - kernel_write_result(kg, buffer, sample, &L, alpha, is_shadow_catcher); + kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L); + kernel_write_result(kg, buffer, sample, &L); } - else { - kernel_write_result(kg, buffer, sample, NULL, 0.0f, false); - } - - path_rng_end(kg, rng_state, rng); } #endif /* __SPLIT_KERNEL__ */ |