/* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ CCL_NAMESPACE_BEGIN ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, int sample, float value) { ccl_global float *buf = buffer; #if defined(__SPLIT_KERNEL__) atomic_add_and_fetch_float(buf, value); #else *buf = (sample == 0)? value: *buf + value; #endif /* __SPLIT_KERNEL__ */ } ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sample, float3 value) { #if defined(__SPLIT_KERNEL__) ccl_global float *buf_x = buffer + 0; ccl_global float *buf_y = buffer + 1; ccl_global float *buf_z = buffer + 2; atomic_add_and_fetch_float(buf_x, value.x); atomic_add_and_fetch_float(buf_y, value.y); atomic_add_and_fetch_float(buf_z, value.z); #else ccl_global float3 *buf = (ccl_global float3*)buffer; *buf = (sample == 0)? value: *buf + value; #endif /* __SPLIT_KERNEL__ */ } ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sample, float4 value) { #if defined(__SPLIT_KERNEL__) ccl_global float *buf_x = buffer + 0; ccl_global float *buf_y = buffer + 1; ccl_global float *buf_z = buffer + 2; ccl_global float *buf_w = buffer + 3; atomic_add_and_fetch_float(buf_x, value.x); atomic_add_and_fetch_float(buf_y, value.y); atomic_add_and_fetch_float(buf_z, value.z); atomic_add_and_fetch_float(buf_w, value.w); #else ccl_global float4 *buf = (ccl_global float4*)buffer; *buf = (sample == 0)? value: *buf + value; #endif /* __SPLIT_KERNEL__ */ } #ifdef __DENOISING_FEATURES__ ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, int sample, float value) { kernel_write_pass_float(buffer, sample, value); /* The online one-pass variance update that's used for the megakernel can't easily be implemented * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */ # ifdef __SPLIT_KERNEL__ kernel_write_pass_float(buffer+1, sample, value*value); # else if(sample == 0) { kernel_write_pass_float(buffer+1, sample, 0.0f); } else { float new_mean = buffer[0] * (1.0f / (sample + 1)); float old_mean = (buffer[0] - value) * (1.0f / sample); kernel_write_pass_float(buffer+1, sample, (value - new_mean) * (value - old_mean)); } # endif } # if defined(__SPLIT_KERNEL__) # define kernel_write_pass_float3_unaligned kernel_write_pass_float3 # else ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, int sample, float3 value) { buffer[0] = (sample == 0)? value.x: buffer[0] + value.x; buffer[1] = (sample == 0)? value.y: buffer[1] + value.y; buffer[2] = (sample == 0)? value.z: buffer[2] + value.z; } # endif ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, int sample, float3 value) { kernel_write_pass_float3_unaligned(buffer, sample, value); # ifdef __SPLIT_KERNEL__ kernel_write_pass_float3_unaligned(buffer+3, sample, value*value); # else if(sample == 0) { kernel_write_pass_float3_unaligned(buffer+3, sample, make_float3(0.0f, 0.0f, 0.0f)); } else { float3 sum = make_float3(buffer[0], buffer[1], buffer[2]); float3 new_mean = sum * (1.0f / (sample + 1)); float3 old_mean = (sum - value) * (1.0f / sample); kernel_write_pass_float3_unaligned(buffer+3, sample, (value - new_mean) * (value - old_mean)); } # endif } ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer, int sample, float path_total, float path_total_shaded) { if(kernel_data.film.pass_denoising_data == 0) return; buffer += (sample & 1)? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A; path_total = ensure_finite(path_total); path_total_shaded = ensure_finite(path_total_shaded); kernel_write_pass_float(buffer, sample/2, path_total); kernel_write_pass_float(buffer+1, sample/2, path_total_shaded); float value = path_total_shaded / max(path_total, 1e-7f); # ifdef __SPLIT_KERNEL__ kernel_write_pass_float(buffer+2, sample/2, value*value); # else if(sample < 2) { kernel_write_pass_float(buffer+2, sample/2, 0.0f); } else { float old_value = (buffer[1] - path_total_shaded) / max(buffer[0] - path_total, 1e-7f); float new_value = buffer[1] / max(buffer[0], 1e-7f); kernel_write_pass_float(buffer+2, sample, (value - new_value) * (value - old_value)); } # endif } #endif /* __DENOISING_FEATURES__ */ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, ShaderData *sd, ccl_global PathState *state, PathRadiance *L) { #ifdef __DENOISING_FEATURES__ if(state->denoising_feature_weight == 0.0f) { return; } L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length); /* Skip implicitly transparent surfaces. */ if(sd->flag & SD_HAS_ONLY_VOLUME) { return; } float3 normal = make_float3(0.0f, 0.0f, 0.0f); float3 albedo = make_float3(0.0f, 0.0f, 0.0f); float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; for(int i = 0; i < sd->num_closure; i++) { ShaderClosure *sc = &sd->closure[i]; if(!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) continue; /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ normal += sc->N * sc->sample_weight; sum_weight += sc->sample_weight; if(!bsdf_is_specular_like(sc)) { albedo += sc->weight; sum_nonspecular_weight += sc->sample_weight; } } /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ if((sum_weight == 0.0f) || (sum_nonspecular_weight*4.0f > sum_weight)) { if(sum_weight != 0.0f) { normal /= sum_weight; } L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal); L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo); state->denoising_feature_weight = 0.0f; } #else (void) kg; (void) sd; (void) state; (void) L; #endif /* __DENOISING_FEATURES__ */ } ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput) { #ifdef __PASSES__ int path_flag = state->flag; if(!(path_flag & PATH_RAY_CAMERA)) return; int flag = kernel_data.film.pass_flag; if(!(flag & PASS_ALL)) return; if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { if(!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { if(sample == 0) { if(flag & PASS_DEPTH) { float depth = camera_distance(kg, sd->P); kernel_write_pass_float(buffer + kernel_data.film.pass_depth, sample, depth); } if(flag & PASS_OBJECT_ID) { float id = object_pass_id(kg, sd->object); kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, sample, id); } if(flag & PASS_MATERIAL_ID) { float id = shader_pass_id(kg, sd); kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, sample, id); } } if(flag & PASS_NORMAL) { float3 normal = sd->N; kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, sample, normal); } if(flag & PASS_UV) { float3 uv = primitive_uv(kg, sd); kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, sample, uv); } if(flag & PASS_MOTION) { float4 speed = primitive_motion_vector(kg, sd); kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, sample, speed); kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, sample, 1.0f); } state->flag |= PATH_RAY_SINGLE_PASS_DONE; } } if(flag & (PASS_DIFFUSE_INDIRECT|PASS_DIFFUSE_COLOR|PASS_DIFFUSE_DIRECT)) L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput; if(flag & (PASS_GLOSSY_INDIRECT|PASS_GLOSSY_COLOR|PASS_GLOSSY_DIRECT)) L->color_glossy += shader_bsdf_glossy(kg, sd)*throughput; if(flag & (PASS_TRANSMISSION_INDIRECT|PASS_TRANSMISSION_COLOR|PASS_TRANSMISSION_DIRECT)) L->color_transmission += shader_bsdf_transmission(kg, sd)*throughput; if(flag & (PASS_SUBSURFACE_INDIRECT|PASS_SUBSURFACE_COLOR|PASS_SUBSURFACE_DIRECT)) L->color_subsurface += shader_bsdf_subsurface(kg, sd)*throughput; if(flag & PASS_MIST) { /* bring depth into 0..1 range */ float mist_start = kernel_data.film.mist_start; float mist_inv_depth = kernel_data.film.mist_inv_depth; float depth = camera_distance(kg, sd->P); float mist = saturate((depth - mist_start)*mist_inv_depth); /* falloff */ float mist_falloff = kernel_data.film.mist_falloff; if(mist_falloff == 1.0f) ; else if(mist_falloff == 2.0f) mist = mist*mist; else if(mist_falloff == 0.5f) mist = sqrtf(mist); else mist = powf(mist, mist_falloff); /* modulate by transparency */ float3 alpha = shader_bsdf_alpha(kg, sd); L->mist += (1.0f - mist)*average(throughput*alpha); } #endif } ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, int sample) { #ifdef __PASSES__ int flag = kernel_data.film.pass_flag; if(!kernel_data.film.use_light_pass) return; if(flag & PASS_DIFFUSE_INDIRECT) kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, sample, L->indirect_diffuse); if(flag & PASS_GLOSSY_INDIRECT) kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, sample, L->indirect_glossy); if(flag & PASS_TRANSMISSION_INDIRECT) kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, sample, L->indirect_transmission); if(flag & PASS_SUBSURFACE_INDIRECT) kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, sample, L->indirect_subsurface); if(flag & PASS_DIFFUSE_DIRECT) kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, sample, L->direct_diffuse); if(flag & PASS_GLOSSY_DIRECT) kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, sample, L->direct_glossy); if(flag & PASS_TRANSMISSION_DIRECT) kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, sample, L->direct_transmission); if(flag & PASS_SUBSURFACE_DIRECT) kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, sample, L->direct_subsurface); if(flag & PASS_EMISSION) kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, sample, L->emission); if(flag & PASS_BACKGROUND) kernel_write_pass_float3(buffer + kernel_data.film.pass_background, sample, L->background); if(flag & PASS_AO) kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, sample, L->ao); if(flag & PASS_DIFFUSE_COLOR) kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, sample, L->color_diffuse); if(flag & PASS_GLOSSY_COLOR) kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, sample, L->color_glossy); if(flag & PASS_TRANSMISSION_COLOR) kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, sample, L->color_transmission); if(flag & PASS_SUBSURFACE_COLOR) kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, sample, L->color_subsurface); if(flag & PASS_SHADOW) { float4 shadow = L->shadow; shadow.w = kernel_data.film.pass_shadow_scale; kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, sample, shadow); } if(flag & PASS_MIST) kernel_write_pass_float(buffer + kernel_data.film.pass_mist, sample, 1.0f - L->mist); #endif } ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float *buffer, int sample, PathRadiance *L, float alpha, bool is_shadow_catcher) { if(L) { float3 L_sum; #ifdef __SHADOW_TRICKS__ if(is_shadow_catcher) { L_sum = path_radiance_sum_shadowcatcher(kg, L, &alpha); } else #endif /* __SHADOW_TRICKS__ */ { L_sum = path_radiance_clamp_and_sum(kg, L); } kernel_write_pass_float4(buffer, sample, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha)); kernel_write_light_passes(kg, buffer, L, sample); #ifdef __DENOISING_FEATURES__ if(kernel_data.film.pass_denoising_data) { # ifdef __SHADOW_TRICKS__ kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, average(L->path_total), average(L->path_total_shaded)); # else kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f); # endif if(kernel_data.film.pass_denoising_clean) { float3 noisy, clean; #ifdef __SHADOW_TRICKS__ if(is_shadow_catcher) { noisy = L_sum; clean = make_float3(0.0f, 0.0f, 0.0f); } else #endif /* __SHADOW_TRICKS__ */ { path_radiance_split_denoising(kg, L, &noisy, &clean); } kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample, noisy); kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, sample, clean); } else { kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample, ensure_finite3(L_sum)); } kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample, L->denoising_normal); kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample, L->denoising_albedo); kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, sample, L->denoising_depth); } #endif /* __DENOISING_FEATURES__ */ } else { kernel_write_pass_float4(buffer, sample, make_float4(0.0f, 0.0f, 0.0f, 0.0f)); #ifdef __DENOISING_FEATURES__ if(kernel_data.film.pass_denoising_data) { kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f); kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample, make_float3(0.0f, 0.0f, 0.0f)); kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample, make_float3(0.0f, 0.0f, 0.0f)); kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample, make_float3(0.0f, 0.0f, 0.0f)); kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, sample, 0.0f); if(kernel_data.film.pass_denoising_clean) { kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, sample, make_float3(0.0f, 0.0f, 0.0f)); } } #endif /* __DENOISING_FEATURES__ */ } } CCL_NAMESPACE_END