diff options
author | Stefan Werner <stefan.werner@intel.com> | 2022-04-01 20:44:02 +0300 |
---|---|---|
committer | Stefan Werner <stefan.werner@intel.com> | 2022-04-01 20:44:02 +0300 |
commit | 9c6dff70c88ddefc5b26f85db3d86ad997409781 (patch) | |
tree | dccb0b1692577a264fc416015576fd733043517a /intern/cycles/kernel | |
parent | 542c03fed5dad18331bb2c40cb883ff220120c13 (diff) |
Cycles: Introduce postfix for kernel body definition
Increases flexibility of code-generation for kernel entry points.
Currently no functional changes, preparing for integration with oneAPI.
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/device/cuda/config.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 40 | ||||
-rw-r--r-- | intern/cycles/kernel/device/hip/config.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/compat.h | 1 |
4 files changed, 42 insertions, 1 deletions
diff --git a/intern/cycles/kernel/device/cuda/config.h b/intern/cycles/kernel/device/cuda/config.h index 1f66bb0175a..88149e92ec9 100644 --- a/intern/cycles/kernel/device/cuda/config.h +++ b/intern/cycles/kernel/device/cuda/config.h @@ -88,6 +88,7 @@ extern "C" __global__ void __launch_bounds__(block_num_threads) #define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__) +#define ccl_gpu_kernel_postfix #define ccl_gpu_kernel_call(x) x diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 26ab99766ad..82b51843864 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -58,6 +58,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_init_from_camera, @@ -89,6 +90,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call( integrator_init_from_camera(nullptr, state, tile, render_buffer, x, y, sample)); } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_init_from_bake, @@ -120,6 +122,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call( integrator_init_from_bake(nullptr, state, tile, render_buffer, x, y, sample)); } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_intersect_closest, @@ -134,6 +137,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(integrator_intersect_closest(NULL, state, render_buffer)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_intersect_shadow, @@ -147,6 +151,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(integrator_intersect_shadow(NULL, state)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_intersect_subsurface, @@ -160,6 +165,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(integrator_intersect_subsurface(NULL, state)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_intersect_volume_stack, @@ -173,6 +179,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(integrator_intersect_volume_stack(NULL, state)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_shade_background, @@ -187,6 +194,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(integrator_shade_background(NULL, state, render_buffer)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_shade_light, @@ -201,6 +209,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(integrator_shade_light(NULL, state, render_buffer)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_shade_shadow, @@ -215,6 +224,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(integrator_shade_shadow(NULL, state, render_buffer)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_shade_surface, @@ -229,6 +239,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(integrator_shade_surface(NULL, state, render_buffer)); } } +ccl_gpu_kernel_postfix #ifdef __KERNEL_METAL__ constant int __dummy_constant [[function_constant(0)]]; @@ -256,6 +267,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) #endif } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_shade_volume, @@ -270,6 +282,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(integrator_shade_volume(NULL, state, render_buffer)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_queued_paths_array, @@ -288,6 +301,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) num_indices, ccl_gpu_kernel_lambda_pass); } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_queued_shadow_paths_array, @@ -306,6 +320,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) num_indices, ccl_gpu_kernel_lambda_pass); } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_active_paths_array, @@ -321,6 +336,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) num_indices, ccl_gpu_kernel_lambda_pass); } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_terminated_paths_array, @@ -337,6 +353,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) num_indices, ccl_gpu_kernel_lambda_pass); } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_terminated_shadow_paths_array, @@ -353,6 +370,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) num_indices, ccl_gpu_kernel_lambda_pass); } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_sorted_paths_array, @@ -380,6 +398,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) key_prefix_sum, ccl_gpu_kernel_lambda_pass); } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_compact_paths_array, @@ -399,6 +418,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) num_indices, ccl_gpu_kernel_lambda_pass); } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_compact_states, @@ -416,6 +436,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_call(integrator_state_move(NULL, to_state, from_state)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_compact_shadow_paths_array, @@ -435,6 +456,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) num_indices, ccl_gpu_kernel_lambda_pass); } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_compact_shadow_states, @@ -452,12 +474,14 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_call(integrator_shadow_state_move(NULL, to_state, from_state)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature( prefix_sum, ccl_global int *counter, ccl_global int *prefix_sum, int num_values) { gpu_parallel_prefix_sum(ccl_gpu_global_id_x(), counter, prefix_sum, num_values); } +ccl_gpu_kernel_postfix /* -------------------------------------------------------------------- * Adaptive sampling. @@ -494,6 +518,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) atomic_fetch_and_add_uint32(num_active_pixels, popcount(num_active_pixels_mask)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(adaptive_sampling_filter_x, @@ -512,6 +537,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) kernel_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride)); } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(adaptive_sampling_filter_y, @@ -530,6 +556,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) kernel_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride)); } } +ccl_gpu_kernel_postfix /* -------------------------------------------------------------------- * Cryptomatte. @@ -546,6 +573,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(kernel_cryptomatte_post(nullptr, render_buffer, pixel_index)); } } +ccl_gpu_kernel_postfix /* -------------------------------------------------------------------- * Film. @@ -627,6 +655,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb \ FILM_GET_PASS_PIXEL_F32(variant, input_channel_count); \ } \ + ccl_gpu_kernel_postfix \ \ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ ccl_gpu_kernel_signature(film_convert_##variant##_half_rgba, \ @@ -666,7 +695,8 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb const half4 half_pixel = float4_to_half4_display( \ make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \ kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel); \ - } + } \ + ccl_gpu_kernel_postfix /* 1 channel inputs */ KERNEL_FILM_CONVERT_VARIANT(depth, 1) @@ -706,6 +736,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(kernel_displace_evaluate(NULL, input, output, offset + i)); } } +ccl_gpu_kernel_postfix /* Background */ @@ -721,6 +752,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_call(kernel_background_evaluate(NULL, input, output, offset + i)); } } +ccl_gpu_kernel_postfix /* Curve Shadow Transparency */ @@ -737,6 +769,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) kernel_curve_shadow_transparency_evaluate(NULL, input, output, offset + i)); } } +ccl_gpu_kernel_postfix /* -------------------------------------------------------------------- * Denoising. @@ -770,6 +803,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) color_out[1] = clamp(color_out[1], 0.0f, 10000.0f); color_out[2] = clamp(color_out[2], 0.0f, 10000.0f); } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(filter_guiding_preprocess, @@ -849,6 +883,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) flow_out[1] = -motion_in[1] * pixel_scale; } } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(filter_guiding_set_fake_albedo, @@ -877,6 +912,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) albedo_out[1] = 0.5f; albedo_out[2] = 0.5f; } +ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(filter_color_postprocess, @@ -936,6 +972,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) denoised_pixel[3] = 0; } } +ccl_gpu_kernel_postfix /* -------------------------------------------------------------------- * Shadow catcher. @@ -961,3 +998,4 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) atomic_fetch_and_add_uint32(num_possible_splits, popcount(can_split_mask)); } } +ccl_gpu_kernel_postfix diff --git a/intern/cycles/kernel/device/hip/config.h b/intern/cycles/kernel/device/hip/config.h index a5a5924d5e0..c7e7306d628 100644 --- a/intern/cycles/kernel/device/hip/config.h +++ b/intern/cycles/kernel/device/hip/config.h @@ -31,6 +31,7 @@ extern "C" __global__ void __launch_bounds__(block_num_threads) #define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__) +#define ccl_gpu_kernel_postfix #define ccl_gpu_kernel_call(x) x diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index c12987c0a91..4e309f16c08 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -132,6 +132,7 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \ uint simd_group_index, \ uint num_simd_groups) ccl_global const +#define ccl_gpu_kernel_postfix #define ccl_gpu_kernel_call(x) context.x /* define a function object where "func" is the lambda body, and additional parameters are used to specify captured state */ |