Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Werner <stefan.werner@intel.com>2022-04-01 20:44:02 +0300
committerStefan Werner <stefan.werner@intel.com>2022-04-01 20:44:02 +0300
commit9c6dff70c88ddefc5b26f85db3d86ad997409781 (patch)
treedccb0b1692577a264fc416015576fd733043517a
parent542c03fed5dad18331bb2c40cb883ff220120c13 (diff)
Cycles: Introduce postfix for kernel body definition
Increases flexibility of code-generation for kernel entry points. Currently no functional changes, preparing for integration with oneAPI.
-rw-r--r--intern/cycles/kernel/device/cuda/config.h1
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h40
-rw-r--r--intern/cycles/kernel/device/hip/config.h1
-rw-r--r--intern/cycles/kernel/device/metal/compat.h1
4 files changed, 42 insertions, 1 deletions
diff --git a/intern/cycles/kernel/device/cuda/config.h b/intern/cycles/kernel/device/cuda/config.h
index 1f66bb0175a..88149e92ec9 100644
--- a/intern/cycles/kernel/device/cuda/config.h
+++ b/intern/cycles/kernel/device/cuda/config.h
@@ -88,6 +88,7 @@
extern "C" __global__ void __launch_bounds__(block_num_threads)
#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
+#define ccl_gpu_kernel_postfix
#define ccl_gpu_kernel_call(x) x
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 26ab99766ad..82b51843864 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -58,6 +58,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_init_from_camera,
@@ -89,6 +90,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(
integrator_init_from_camera(nullptr, state, tile, render_buffer, x, y, sample));
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_init_from_bake,
@@ -120,6 +122,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(
integrator_init_from_bake(nullptr, state, tile, render_buffer, x, y, sample));
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_intersect_closest,
@@ -134,6 +137,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(integrator_intersect_closest(NULL, state, render_buffer));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_intersect_shadow,
@@ -147,6 +151,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(integrator_intersect_shadow(NULL, state));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_intersect_subsurface,
@@ -160,6 +165,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(integrator_intersect_subsurface(NULL, state));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
@@ -173,6 +179,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(integrator_intersect_volume_stack(NULL, state));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_background,
@@ -187,6 +194,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(integrator_shade_background(NULL, state, render_buffer));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_light,
@@ -201,6 +209,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(integrator_shade_light(NULL, state, render_buffer));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_shadow,
@@ -215,6 +224,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(integrator_shade_shadow(NULL, state, render_buffer));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_surface,
@@ -229,6 +239,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(integrator_shade_surface(NULL, state, render_buffer));
}
}
+ccl_gpu_kernel_postfix
#ifdef __KERNEL_METAL__
constant int __dummy_constant [[function_constant(0)]];
@@ -256,6 +267,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
#endif
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_volume,
@@ -270,6 +282,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(integrator_shade_volume(NULL, state, render_buffer));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_queued_paths_array,
@@ -288,6 +301,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
num_indices,
ccl_gpu_kernel_lambda_pass);
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_queued_shadow_paths_array,
@@ -306,6 +320,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
num_indices,
ccl_gpu_kernel_lambda_pass);
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_active_paths_array,
@@ -321,6 +336,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
num_indices,
ccl_gpu_kernel_lambda_pass);
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_terminated_paths_array,
@@ -337,6 +353,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
num_indices,
ccl_gpu_kernel_lambda_pass);
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_terminated_shadow_paths_array,
@@ -353,6 +370,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
num_indices,
ccl_gpu_kernel_lambda_pass);
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_sorted_paths_array,
@@ -380,6 +398,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
key_prefix_sum,
ccl_gpu_kernel_lambda_pass);
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_compact_paths_array,
@@ -399,6 +418,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
num_indices,
ccl_gpu_kernel_lambda_pass);
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_compact_states,
@@ -416,6 +436,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_call(integrator_state_move(NULL, to_state, from_state));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_compact_shadow_paths_array,
@@ -435,6 +456,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
num_indices,
ccl_gpu_kernel_lambda_pass);
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_compact_shadow_states,
@@ -452,12 +474,14 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
ccl_gpu_kernel_call(integrator_shadow_state_move(NULL, to_state, from_state));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(
prefix_sum, ccl_global int *counter, ccl_global int *prefix_sum, int num_values)
{
gpu_parallel_prefix_sum(ccl_gpu_global_id_x(), counter, prefix_sum, num_values);
}
+ccl_gpu_kernel_postfix
/* --------------------------------------------------------------------
* Adaptive sampling.
@@ -494,6 +518,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
atomic_fetch_and_add_uint32(num_active_pixels, popcount(num_active_pixels_mask));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(adaptive_sampling_filter_x,
@@ -512,6 +537,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
kernel_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride));
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(adaptive_sampling_filter_y,
@@ -530,6 +556,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
kernel_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride));
}
}
+ccl_gpu_kernel_postfix
/* --------------------------------------------------------------------
* Cryptomatte.
@@ -546,6 +573,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(kernel_cryptomatte_post(nullptr, render_buffer, pixel_index));
}
}
+ccl_gpu_kernel_postfix
/* --------------------------------------------------------------------
* Film.
@@ -627,6 +655,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
\
FILM_GET_PASS_PIXEL_F32(variant, input_channel_count); \
} \
+ ccl_gpu_kernel_postfix \
\
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
ccl_gpu_kernel_signature(film_convert_##variant##_half_rgba, \
@@ -666,7 +695,8 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
const half4 half_pixel = float4_to_half4_display( \
make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \
kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel); \
- }
+ } \
+ ccl_gpu_kernel_postfix
/* 1 channel inputs */
KERNEL_FILM_CONVERT_VARIANT(depth, 1)
@@ -706,6 +736,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(kernel_displace_evaluate(NULL, input, output, offset + i));
}
}
+ccl_gpu_kernel_postfix
/* Background */
@@ -721,6 +752,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_call(kernel_background_evaluate(NULL, input, output, offset + i));
}
}
+ccl_gpu_kernel_postfix
/* Curve Shadow Transparency */
@@ -737,6 +769,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
kernel_curve_shadow_transparency_evaluate(NULL, input, output, offset + i));
}
}
+ccl_gpu_kernel_postfix
/* --------------------------------------------------------------------
* Denoising.
@@ -770,6 +803,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
color_out[1] = clamp(color_out[1], 0.0f, 10000.0f);
color_out[2] = clamp(color_out[2], 0.0f, 10000.0f);
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(filter_guiding_preprocess,
@@ -849,6 +883,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
flow_out[1] = -motion_in[1] * pixel_scale;
}
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(filter_guiding_set_fake_albedo,
@@ -877,6 +912,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
albedo_out[1] = 0.5f;
albedo_out[2] = 0.5f;
}
+ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(filter_color_postprocess,
@@ -936,6 +972,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
denoised_pixel[3] = 0;
}
}
+ccl_gpu_kernel_postfix
/* --------------------------------------------------------------------
* Shadow catcher.
@@ -961,3 +998,4 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
atomic_fetch_and_add_uint32(num_possible_splits, popcount(can_split_mask));
}
}
+ccl_gpu_kernel_postfix
diff --git a/intern/cycles/kernel/device/hip/config.h b/intern/cycles/kernel/device/hip/config.h
index a5a5924d5e0..c7e7306d628 100644
--- a/intern/cycles/kernel/device/hip/config.h
+++ b/intern/cycles/kernel/device/hip/config.h
@@ -31,6 +31,7 @@
extern "C" __global__ void __launch_bounds__(block_num_threads)
#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
+#define ccl_gpu_kernel_postfix
#define ccl_gpu_kernel_call(x) x
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index c12987c0a91..4e309f16c08 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -132,6 +132,7 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
uint simd_group_index, \
uint num_simd_groups) ccl_global const
+#define ccl_gpu_kernel_postfix
#define ccl_gpu_kernel_call(x) context.x
/* define a function object where "func" is the lambda body, and additional parameters are used to specify captured state */