diff options
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/kernel_bake.h | 152 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_film.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_light.h | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 17 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 19 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel.cu | 11 |
7 files changed, 129 insertions, 97 deletions
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index f1fc697553a..2709a9da734 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -18,38 +18,40 @@ CCL_NAMESPACE_BEGIN #ifdef __BAKING__ -ccl_device_inline void compute_light_pass( +ccl_device_noinline void compute_light_pass( KernelGlobals *kg, ShaderData *sd, PathRadiance *L, uint rng_hash, int pass_filter, int sample) { kernel_assert(kernel_data.film.use_light_pass); - PathRadiance L_sample; - PathState state; - Ray ray; float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - /* emission and indirect shader data memory used by various functions */ - ShaderData emission_sd, indirect_sd; - - ray.P = sd->P + sd->Ng; - ray.D = -sd->Ng; - ray.t = FLT_MAX; -# ifdef __CAMERA_MOTION__ - ray.time = 0.5f; -# endif + /* Emission and indirect shader data memory used by various functions. */ + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + ShaderData indirect_sd; - /* init radiance */ - path_radiance_init(kg, &L_sample); + /* Init radiance. */ + path_radiance_init(kg, L); - /* init path state */ - path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL); + /* Init path state. */ + PathState state; + path_state_init(kg, emission_sd, &state, rng_hash, sample, NULL); - /* evaluate surface shader */ + /* Evaluate surface shader. */ shader_eval_surface(kg, sd, &state, NULL, state.flag); /* TODO, disable more closures we don't need besides transparent */ shader_bsdf_disable_transparency(kg, sd); + /* Init ray. */ + Ray ray; + ray.P = sd->P + sd->Ng; + ray.D = -sd->Ng; + ray.t = FLT_MAX; +# ifdef __CAMERA_MOTION__ + ray.time = 0.5f; +# endif + # ifdef __BRANCHED_PATH__ if (!kernel_data.integrator.branched) { /* regular path tracer */ @@ -57,14 +59,13 @@ ccl_device_inline void compute_light_pass( /* sample ambient occlusion */ if (pass_filter & BAKE_FILTER_AO) { - kernel_path_ao( - kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd)); + kernel_path_ao(kg, sd, emission_sd, L, &state, throughput, shader_bsdf_alpha(kg, sd)); } /* sample emission */ if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) { float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); - path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission); + path_radiance_accum_emission(kg, L, &state, throughput, emission); } bool is_sss_sample = false; @@ -77,12 +78,10 @@ ccl_device_inline void compute_light_pass( SubsurfaceIndirectRays ss_indirect; kernel_path_subsurface_init_indirect(&ss_indirect); if (kernel_path_subsurface_scatter( - kg, sd, &emission_sd, &L_sample, &state, &ray, &throughput, &ss_indirect)) { + kg, sd, emission_sd, L, &state, &ray, &throughput, &ss_indirect)) { while (ss_indirect.num_rays) { - kernel_path_subsurface_setup_indirect( - kg, &ss_indirect, &state, &ray, &L_sample, &throughput); - kernel_path_indirect( - kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample); + kernel_path_subsurface_setup_indirect(kg, &ss_indirect, &state, &ray, L, &throughput); + kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L); } is_sss_sample = true; } @@ -91,18 +90,18 @@ ccl_device_inline void compute_light_pass( /* sample light and BSDF */ if (!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) { - kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample); + kernel_path_surface_connect_light(kg, sd, emission_sd, throughput, &state, L); - if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) { + if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L->state, &ray)) { # ifdef __LAMP_MIS__ state.ray_t = 0.0f; # endif /* compute indirect light */ - kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample); + kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L); /* sum and reset indirect light pass variables for the next samples */ - path_radiance_sum_indirect(&L_sample); - path_radiance_reset_indirect(&L_sample); + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); } } # ifdef __BRANCHED_PATH__ @@ -112,13 +111,13 @@ ccl_device_inline void compute_light_pass( /* sample ambient occlusion */ if (pass_filter & BAKE_FILTER_AO) { - kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput); + kernel_branched_path_ao(kg, sd, emission_sd, L, &state, throughput); } /* sample emission */ if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) { float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); - path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission); + path_radiance_accum_emission(kg, L, &state, throughput, emission); } # ifdef __SUBSURFACE__ @@ -127,7 +126,7 @@ ccl_device_inline void compute_light_pass( /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting * if scattering was successful. */ kernel_branched_path_subsurface_scatter( - kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput); + kg, sd, &indirect_sd, emission_sd, L, &state, &ray, throughput); } # endif @@ -138,19 +137,16 @@ ccl_device_inline void compute_light_pass( if (kernel_data.integrator.use_direct_light) { int all = kernel_data.integrator.sample_all_lights_direct; kernel_branched_path_surface_connect_light( - kg, sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all); + kg, sd, emission_sd, &state, throughput, 1.0f, L, all); } # endif /* indirect light */ kernel_branched_path_surface_indirect_light( - kg, sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample); + kg, sd, &indirect_sd, emission_sd, throughput, 1.0f, &state, L); } } # endif - - /* accumulate into master L */ - path_radiance_accum_sample(L, &L_sample); } /* this helps with AA but it's not the real solution as it does not AA the geometry @@ -225,41 +221,28 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg, return out; } -ccl_device void kernel_bake_evaluate(KernelGlobals *kg, - ccl_global uint4 *input, - ccl_global float4 *output, - ShaderEvalType type, - int pass_filter, - int i, - int offset, - int sample) +ccl_device void kernel_bake_evaluate( + KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride) { - ShaderData sd; - PathState state = {0}; - uint4 in = input[i * 2]; - uint4 diff = input[i * 2 + 1]; - - float3 out = make_float3(0.0f, 0.0f, 0.0f); + /* Setup render buffers. */ + const int index = offset + x + y * stride; + const int pass_stride = kernel_data.film.pass_stride; + buffer += index * pass_stride; - int object = in.x; - int prim = in.y; + ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive; + ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential; + ccl_global float *output = buffer + kernel_data.film.pass_combined; + int prim = __float_as_uint(primitive[1]); if (prim == -1) return; - float u = __uint_as_float(in.z); - float v = __uint_as_float(in.w); - - float dudx = __uint_as_float(diff.x); - float dudy = __uint_as_float(diff.y); - float dvdx = __uint_as_float(diff.z); - float dvdy = __uint_as_float(diff.w); + prim += kernel_data.bake.tri_offset; + /* Random number generator. */ + uint rng_hash = hash_uint2(x, y) ^ kernel_data.integrator.seed; int num_samples = kernel_data.integrator.aa_samples; - /* random number generator */ - uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed); - float filter_x, filter_y; if (sample == 0) { filter_x = filter_y = 0.5f; @@ -268,23 +251,29 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y); } - /* subpixel u/v offset */ + /* Barycentric UV with subpixel offset. */ + float u = primitive[2]; + float v = primitive[3]; + + float dudx = differential[0]; + float dudy = differential[1]; + float dvdx = differential[2]; + float dvdy = differential[3]; + if (sample > 0) { u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f); v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f), 1.0f - u); } - /* triangle */ + /* Shader data setup. */ + int object = kernel_data.bake.object_index; int shader; float3 P, Ng; triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); - /* light passes */ - PathRadiance L; - path_radiance_init(kg, &L); - + ShaderData sd; shader_setup_from_sample( kg, &sd, @@ -302,7 +291,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, LAMP_NONE); sd.I = sd.N; - /* update differentials */ + /* Setup differentials. */ sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx; sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy; sd.du.dx = dudx; @@ -310,17 +299,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, sd.dv.dx = dvdx; sd.dv.dy = dvdy; - /* set RNG state for shaders that use sampling */ + /* Set RNG state for shaders that use sampling. */ + PathState state = {0}; state.rng_hash = rng_hash; state.rng_offset = 0; state.sample = sample; state.num_samples = num_samples; state.min_ray_pdf = FLT_MAX; - /* light passes if we need more than color */ - if (pass_filter & ~BAKE_FILTER_COLOR) + /* Light passes if we need more than color. */ + PathRadiance L; + int pass_filter = kernel_data.bake.pass_filter; + + if (kernel_data.bake.pass_filter & ~BAKE_FILTER_COLOR) compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample); + float3 out = make_float3(0.0f, 0.0f, 0.0f); + + ShaderEvalType type = (ShaderEvalType)kernel_data.bake.type; switch (type) { /* data passes */ case SHADER_EVAL_NORMAL: @@ -441,10 +437,8 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, } /* write output */ - const float output_fac = 1.0f / num_samples; - const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac; - - output[i] = (sample == 0) ? scaled_result : output[i] + scaled_result; + const float4 result = make_float4(out.x, out.y, out.z, 1.0f); + kernel_write_pass_float4(output, result); } #endif /* __BAKING__ */ diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index 3829426f261..8344f4b4f47 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -28,13 +28,13 @@ ccl_device float4 film_get_pass_result(KernelGlobals *kg, int display_pass_components = kernel_data.film.display_pass_components; if (display_pass_components == 4) { - ccl_global float4 *in = (ccl_global float4 *)(buffer + display_pass_stride + - index * kernel_data.film.pass_stride); + float4 in = *(ccl_global float4 *)(buffer + display_pass_stride + + index * kernel_data.film.pass_stride); float alpha = use_display_sample_scale ? - (kernel_data.film.use_display_pass_alpha ? in->w : 1.0f / sample_scale) : + (kernel_data.film.use_display_pass_alpha ? in.w : 1.0f / sample_scale) : 1.0f; - pass_result = make_float4(in->x, in->y, in->z, alpha); + pass_result = make_float4(in.x, in.y, in.z, alpha); int display_divide_pass_stride = kernel_data.film.display_divide_pass_stride; if (display_divide_pass_stride != -1) { diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index ce908ce0fe2..d918abed381 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -1041,11 +1041,19 @@ ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg, } } else { - /* compute random point in triangle */ - randu = sqrtf(randu); + /* compute random point in triangle. From Eric Heitz's "A Low-Distortion Map Between Triangle + * and Square" */ + float u = randu; + float v = randv; + if (v > u) { + u *= 0.5f; + v -= u; + } + else { + v *= 0.5f; + u -= v; + } - const float u = 1.0f - randu; - const float v = randv * randu; const float t = 1.0f - u - v; ls->P = u * V[0] + v * V[1] + t * V[2]; /* compute incoming direction, distance and pdf */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index a1f8c35348d..304835a1685 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -395,6 +395,10 @@ typedef enum PassType { PASS_VOLUME_INDIRECT, /* No Scatter color since it's tricky to define what it would even mean. */ PASS_CATEGORY_LIGHT_END = 63, + + PASS_BAKE_PRIMITIVE, + PASS_BAKE_DIFFERENTIAL, + PASS_CATEGORY_BAKE_END = 95 } PassType; #define PASS_ANY (~0) @@ -1248,6 +1252,10 @@ typedef struct KernelFilm { float4 xyz_to_b; float4 rgb_to_y; + int pass_bake_primitive; + int pass_bake_differential; + int pad; + #ifdef __KERNEL_DEBUG__ int pass_bvh_traversed_nodes; int pass_bvh_traversed_instances; @@ -1427,6 +1435,14 @@ typedef struct KernelTables { } KernelTables; static_assert_align(KernelTables, 16); +typedef struct KernelBake { + int object_index; + int tri_offset; + int type; + int pass_filter; +} KernelBake; +static_assert_align(KernelBake, 16); + typedef struct KernelData { KernelCamera cam; KernelFilm film; @@ -1435,6 +1451,7 @@ typedef struct KernelData { KernelBVH bvh; KernelCurves curve; KernelTables tables; + KernelBake bake; } KernelData; static_assert_align(KernelData, 16); diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 683f4b88d79..ea3103f12c3 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -46,6 +46,9 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, int offset, int sample); +void KERNEL_FUNCTION_FULL_NAME(bake)( + KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride); + /* Split kernels */ void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg, diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 091e53cfd83..5aa3fb14318 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -132,6 +132,18 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, # endif /* KERNEL_STUB */ } +/* Bake */ + +void KERNEL_FUNCTION_FULL_NAME(bake)( + KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride) +{ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, bake); +# else + kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride); +# endif /* KERNEL_STUB */ +} + /* Shader Evaluate */ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, @@ -146,12 +158,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, # ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, shader); # else - if (type >= SHADER_EVAL_BAKE) { -# ifdef __BAKING__ - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample); -# endif - } - else if (type == SHADER_EVAL_DISPLACE) { + if (type == SHADER_EVAL_DISPLACE) { kernel_displace_evaluate(kg, input, output, i); } else { diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index c4c810c6a82..d4f41132a11 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -214,13 +214,16 @@ kernel_cuda_background(uint4 *input, #ifdef __BAKING__ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_bake(uint4 *input, float4 *output, int type, int filter, int sx, int sw, int offset, int sample) +kernel_cuda_bake(WorkTile *tile, uint total_work_size) { - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + int work_index = ccl_global_id(0); + + if(work_index < total_work_size) { + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); - if(x < sx + sw) { KernelGlobals kg; - kernel_bake_evaluate(&kg, input, output, (ShaderEvalType)type, filter, x, offset, sample); + kernel_bake_evaluate(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } } #endif |