Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXavier Hallade <xavier.hallade@intel.com>2022-10-06 19:35:51 +0300
committerXavier Hallade <xavier.hallade@intel.com>2022-10-07 10:50:05 +0300
commit7eeeaec6da33971ab7805c9a4bfd5f4e186273d1 (patch)
treef3090686dab61d3a25d77fdbece49f1bc4c04ca4 /intern/cycles/kernel/device
parentfc0b1627ebb821b1897cbca7f6ba9be29e52359a (diff)
Cycles: use direct linking for oneAPI backend
This is a minimal set of changes, allowing a lot of cleanup that can happen afterward as it allows sycl method and objects to be used outside of kernel.cpp. Reviewed By: brecht, sergey Differential Revision: https://developer.blender.org/D15397
Diffstat (limited to 'intern/cycles/kernel/device')
-rw-r--r--intern/cycles/kernel/device/oneapi/dll_interface_template.h54
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.cpp467
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.h19
3 files changed, 14 insertions, 526 deletions
diff --git a/intern/cycles/kernel/device/oneapi/dll_interface_template.h b/intern/cycles/kernel/device/oneapi/dll_interface_template.h
deleted file mode 100644
index 5dd0d4203a4..00000000000
--- a/intern/cycles/kernel/device/oneapi/dll_interface_template.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2022 Intel Corporation */
-
-/* device_capabilities() returns a C string that must be free'd with oneapi_free(). */
-DLL_INTERFACE_CALL(oneapi_device_capabilities, char *)
-DLL_INTERFACE_CALL(oneapi_free, void, void *)
-DLL_INTERFACE_CALL(oneapi_get_memcapacity, size_t, SyclQueue *queue)
-
-DLL_INTERFACE_CALL(oneapi_get_num_multiprocessors, int, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_get_max_num_threads_per_multiprocessor, int, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_iterate_devices, void, OneAPIDeviceIteratorCallback cb, void *user_ptr)
-DLL_INTERFACE_CALL(oneapi_set_error_cb, void, OneAPIErrorCallback, void *user_ptr)
-
-DLL_INTERFACE_CALL(oneapi_create_queue, bool, SyclQueue *&external_queue, int device_index)
-DLL_INTERFACE_CALL(oneapi_free_queue, void, SyclQueue *queue)
-DLL_INTERFACE_CALL(
- oneapi_usm_aligned_alloc_host, void *, SyclQueue *queue, size_t memory_size, size_t alignment)
-DLL_INTERFACE_CALL(oneapi_usm_alloc_device, void *, SyclQueue *queue, size_t memory_size)
-DLL_INTERFACE_CALL(oneapi_usm_free, void, SyclQueue *queue, void *usm_ptr)
-
-DLL_INTERFACE_CALL(
- oneapi_usm_memcpy, bool, SyclQueue *queue, void *dest, void *src, size_t num_bytes)
-DLL_INTERFACE_CALL(oneapi_queue_synchronize, bool, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_usm_memset,
- bool,
- SyclQueue *queue,
- void *usm_ptr,
- unsigned char value,
- size_t num_bytes)
-
-DLL_INTERFACE_CALL(oneapi_run_test_kernel, bool, SyclQueue *queue)
-
-/* Operation with Kernel globals structure - map of global/constant allocation - filled before
- * render/kernel execution As we don't know in cycles `sizeof` this - Cycles will manage just as
- * pointer. */
-DLL_INTERFACE_CALL(oneapi_kernel_globals_size, bool, SyclQueue *queue, size_t &kernel_global_size)
-DLL_INTERFACE_CALL(oneapi_set_global_memory,
- void,
- SyclQueue *queue,
- void *kernel_globals,
- const char *memory_name,
- void *memory_device_pointer)
-
-DLL_INTERFACE_CALL(oneapi_kernel_preferred_local_size,
- size_t,
- SyclQueue *queue,
- const DeviceKernel kernel,
- const size_t kernel_global_size)
-DLL_INTERFACE_CALL(oneapi_enqueue_kernel,
- bool,
- KernelContext *context,
- int kernel,
- size_t global_size,
- void **args)
diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp
index 3c7a9960588..1d1700f036d 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@@ -3,7 +3,6 @@
#ifdef WITH_ONEAPI
-/* clang-format off */
# include "kernel.h"
# include <iostream>
# include <map>
@@ -16,163 +15,16 @@
# include "kernel/device/oneapi/kernel_templates.h"
# include "kernel/device/gpu/kernel.h"
-/* clang-format on */
static OneAPIErrorCallback s_error_cb = nullptr;
static void *s_error_user_ptr = nullptr;
-static std::vector<sycl::device> oneapi_available_devices();
-
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
{
s_error_cb = cb;
s_error_user_ptr = user_ptr;
}
-void oneapi_check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_host = false)
-{
-# ifdef _DEBUG
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- sycl::info::device_type device_type =
- queue->get_device().get_info<sycl::info::device::device_type>();
- sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
- (void)usm_type;
- assert(usm_type == sycl::usm::alloc::device ||
- ((device_type == sycl::info::device_type::host ||
- device_type == sycl::info::device_type::is_cpu || allow_host) &&
- usm_type == sycl::usm::alloc::host));
-# endif
-}
-
-bool oneapi_create_queue(SyclQueue *&external_queue, int device_index)
-{
- bool finished_correct = true;
- try {
- std::vector<sycl::device> devices = oneapi_available_devices();
- if (device_index < 0 || device_index >= devices.size()) {
- return false;
- }
- sycl::queue *created_queue = new sycl::queue(devices[device_index],
- sycl::property::queue::in_order());
- external_queue = reinterpret_cast<SyclQueue *>(created_queue);
- }
- catch (sycl::exception const &e) {
- finished_correct = false;
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- }
- return finished_correct;
-}
-
-void oneapi_free_queue(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- delete queue;
-}
-
-void *oneapi_usm_aligned_alloc_host(SyclQueue *queue_, size_t memory_size, size_t alignment)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- return sycl::aligned_alloc_host(alignment, memory_size, *queue);
-}
-
-void *oneapi_usm_alloc_device(SyclQueue *queue_, size_t memory_size)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- return sycl::malloc_device(memory_size, *queue);
-}
-
-void oneapi_usm_free(SyclQueue *queue_, void *usm_ptr)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, usm_ptr, true);
- sycl::free(usm_ptr, *queue);
-}
-
-bool oneapi_usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_bytes)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, dest, true);
- oneapi_check_usm(queue_, src, true);
- sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
-# ifdef WITH_CYCLES_DEBUG
- try {
- /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
- * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
- */
- mem_event.wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-# else
- sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
- sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
- bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
- src_type == sycl::usm::alloc::device;
- bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
- src_type == sycl::usm::alloc::unknown;
- /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
- * may not wait until the end of the transfer before using the memory.
- */
- if (from_device_to_host || host_or_device_memop_with_offset)
- mem_event.wait();
- return true;
-# endif
-}
-
-bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, size_t num_bytes)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, usm_ptr, true);
- sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
-# ifdef WITH_CYCLES_DEBUG
- try {
- /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
- * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
- */
- mem_event.wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-# else
- (void)mem_event;
- return true;
-# endif
-}
-
-bool oneapi_queue_synchronize(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- try {
- queue->wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-}
-
/* NOTE(@nsirgien): Execution of this simple kernel will check basic functionality and
* also trigger runtime compilation of all existing oneAPI kernels */
bool oneapi_run_test_kernel(SyclQueue *queue_)
@@ -216,60 +68,13 @@ bool oneapi_run_test_kernel(SyclQueue *queue_)
return true;
}
-bool oneapi_kernel_globals_size(SyclQueue *queue_, size_t &kernel_global_size)
-{
- kernel_global_size = sizeof(KernelGlobalsGPU);
-
- return true;
-}
-
-void oneapi_set_global_memory(SyclQueue *queue_,
- void *kernel_globals,
- const char *memory_name,
- void *memory_device_pointer)
-{
- assert(queue_);
- assert(kernel_globals);
- assert(memory_name);
- assert(memory_device_pointer);
- KernelGlobalsGPU *globals = (KernelGlobalsGPU *)kernel_globals;
- oneapi_check_usm(queue_, memory_device_pointer);
- oneapi_check_usm(queue_, kernel_globals, true);
-
- std::string matched_name(memory_name);
-
-/* This macro will change global ptr of KernelGlobals via name matching. */
-# define KERNEL_DATA_ARRAY(type, name) \
- else if (#name == matched_name) \
- { \
- globals->__##name = (type *)memory_device_pointer; \
- return; \
- }
- if (false) {
- }
- else if ("integrator_state" == matched_name) {
- globals->integrator_state = (IntegratorStateGPU *)memory_device_pointer;
- return;
- }
- KERNEL_DATA_ARRAY(KernelData, data)
-# include "kernel/data_arrays.h"
- else
- {
- std::cerr << "Can't found global/constant memory with name \"" << matched_name << "\"!"
- << std::endl;
- assert(false);
- }
-# undef KERNEL_DATA_ARRAY
-}
-
/* TODO: Move device information to OneapiDevice initialized on creation and use it. */
/* TODO: Move below function to oneapi/queue.cpp. */
-size_t oneapi_kernel_preferred_local_size(SyclQueue *queue_,
+size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
const DeviceKernel kernel,
const size_t kernel_global_size)
{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ assert(queue);
(void)kernel_global_size;
const static size_t preferred_work_group_size_intersect_shading = 32;
const static size_t preferred_work_group_size_technical = 1024;
@@ -311,8 +116,10 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue_,
preferred_work_group_size = 512;
}
- const size_t limit_work_group_size =
- queue->get_device().get_info<sycl::info::device::max_work_group_size>();
+ const size_t limit_work_group_size = reinterpret_cast<sycl::queue *>(queue)
+ ->get_device()
+ .get_info<sycl::info::device::max_work_group_size>();
+
return std::min(limit_work_group_size, preferred_work_group_size);
}
@@ -664,266 +471,4 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
# endif
return success;
}
-
-/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
- * since Windows driver 101.3268. */
-/* The same min compute-runtime version is currently required across Windows and Linux.
- * For Windows driver 101.3430, compute-runtime version is 23904. */
-static const int lowest_supported_driver_version_win = 1013430;
-static const int lowest_supported_driver_version_neo = 23904;
-
-static int parse_driver_build_version(const sycl::device &device)
-{
- const std::string &driver_version = device.get_info<sycl::info::device::driver_version>();
- int driver_build_version = 0;
-
- size_t second_dot_position = driver_version.find('.', driver_version.find('.') + 1);
- if (second_dot_position == std::string::npos) {
- std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
- << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
- << " xx.xx.xxx.xxxx (Windows) for device \""
- << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
- }
- else {
- try {
- size_t third_dot_position = driver_version.find('.', second_dot_position + 1);
- if (third_dot_position != std::string::npos) {
- const std::string &third_number_substr = driver_version.substr(
- second_dot_position + 1, third_dot_position - second_dot_position - 1);
- const std::string &forth_number_substr = driver_version.substr(third_dot_position + 1);
- if (third_number_substr.length() == 3 && forth_number_substr.length() == 4)
- driver_build_version = std::stoi(third_number_substr) * 10000 +
- std::stoi(forth_number_substr);
- }
- else {
- const std::string &third_number_substr = driver_version.substr(second_dot_position + 1);
- driver_build_version = std::stoi(third_number_substr);
- }
- }
- catch (std::invalid_argument &e) {
- std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
- << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
- << " xx.xx.xxx.xxxx (Windows) for device \""
- << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
- }
- }
-
- return driver_build_version;
-}
-
-static std::vector<sycl::device> oneapi_available_devices()
-{
- bool allow_all_devices = false;
- if (getenv("CYCLES_ONEAPI_ALL_DEVICES") != nullptr)
- allow_all_devices = true;
-
- /* Host device is useful only for debugging at the moment
- * so we hide this device with default build settings. */
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
- bool allow_host = true;
-# else
- bool allow_host = false;
-# endif
-
- const std::vector<sycl::platform> &oneapi_platforms = sycl::platform::get_platforms();
-
- std::vector<sycl::device> available_devices;
- for (const sycl::platform &platform : oneapi_platforms) {
- /* ignore OpenCL platforms to avoid using the same devices through both Level-Zero and OpenCL.
- */
- if (platform.get_backend() == sycl::backend::opencl) {
- continue;
- }
-
- const std::vector<sycl::device> &oneapi_devices =
- (allow_all_devices || allow_host) ? platform.get_devices(sycl::info::device_type::all) :
- platform.get_devices(sycl::info::device_type::gpu);
-
- for (const sycl::device &device : oneapi_devices) {
- if (allow_all_devices) {
- /* still filter out host device if build doesn't support it. */
- if (allow_host || !device.is_host()) {
- available_devices.push_back(device);
- }
- }
- else {
- bool filter_out = false;
-
- /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU,
- * assuming they have either more than 96 Execution Units or not 7 threads per EU.
- * Official support can be broaden to older and smaller GPUs once ready. */
- if (device.is_gpu() && platform.get_backend() == sycl::backend::ext_oneapi_level_zero) {
- /* Filtered-out defaults in-case these values aren't available through too old L0
- * runtime. */
- int number_of_eus = 96;
- int threads_per_eu = 7;
- if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
- number_of_eus = device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
- }
- if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
- threads_per_eu =
- device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
- }
- /* This filters out all Level-Zero supported GPUs from older generation than Arc. */
- if (number_of_eus <= 96 && threads_per_eu == 7) {
- filter_out = true;
- }
- /* if not already filtered out, check driver version. */
- if (!filter_out) {
- int driver_build_version = parse_driver_build_version(device);
- if ((driver_build_version > 100000 &&
- driver_build_version < lowest_supported_driver_version_win) ||
- driver_build_version < lowest_supported_driver_version_neo) {
- filter_out = true;
- }
- }
- }
- else if (!allow_host && device.is_host()) {
- filter_out = true;
- }
- else if (!allow_all_devices) {
- filter_out = true;
- }
-
- if (!filter_out) {
- available_devices.push_back(device);
- }
- }
- }
- }
-
- return available_devices;
-}
-
-char *oneapi_device_capabilities()
-{
- std::stringstream capabilities;
-
- const std::vector<sycl::device> &oneapi_devices = oneapi_available_devices();
- for (const sycl::device &device : oneapi_devices) {
- const std::string &name = device.get_info<sycl::info::device::name>();
-
- capabilities << std::string("\t") << name << "\n";
-# define WRITE_ATTR(attribute_name, attribute_variable) \
- capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \
- << "\n";
-# define GET_NUM_ATTR(attribute) \
- { \
- size_t attribute = (size_t)device.get_info<sycl::info::device ::attribute>(); \
- capabilities << "\t\tsycl::info::device::" #attribute "\t\t\t" << attribute << "\n"; \
- }
-
- GET_NUM_ATTR(vendor_id)
- GET_NUM_ATTR(max_compute_units)
- GET_NUM_ATTR(max_work_item_dimensions)
-
- sycl::id<3> max_work_item_sizes =
- device.get_info<sycl::info::device::max_work_item_sizes<3>>();
- WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
- WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
- WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
-
- GET_NUM_ATTR(max_work_group_size)
- GET_NUM_ATTR(max_num_sub_groups)
- GET_NUM_ATTR(sub_group_independent_forward_progress)
-
- GET_NUM_ATTR(preferred_vector_width_char)
- GET_NUM_ATTR(preferred_vector_width_short)
- GET_NUM_ATTR(preferred_vector_width_int)
- GET_NUM_ATTR(preferred_vector_width_long)
- GET_NUM_ATTR(preferred_vector_width_float)
- GET_NUM_ATTR(preferred_vector_width_double)
- GET_NUM_ATTR(preferred_vector_width_half)
-
- GET_NUM_ATTR(native_vector_width_char)
- GET_NUM_ATTR(native_vector_width_short)
- GET_NUM_ATTR(native_vector_width_int)
- GET_NUM_ATTR(native_vector_width_long)
- GET_NUM_ATTR(native_vector_width_float)
- GET_NUM_ATTR(native_vector_width_double)
- GET_NUM_ATTR(native_vector_width_half)
-
- size_t max_clock_frequency =
- (size_t)(device.is_host() ? (size_t)0 :
- device.get_info<sycl::info::device::max_clock_frequency>());
- WRITE_ATTR("max_clock_frequency", max_clock_frequency)
-
- GET_NUM_ATTR(address_bits)
- GET_NUM_ATTR(max_mem_alloc_size)
-
- /* NOTE(@nsirgien): Implementation doesn't use image support as bindless images aren't
- * supported so we always return false, even if device supports HW texture usage acceleration.
- */
- bool image_support = false;
- WRITE_ATTR("image_support", (size_t)image_support)
-
- GET_NUM_ATTR(max_parameter_size)
- GET_NUM_ATTR(mem_base_addr_align)
- GET_NUM_ATTR(global_mem_size)
- GET_NUM_ATTR(local_mem_size)
- GET_NUM_ATTR(error_correction_support)
- GET_NUM_ATTR(profiling_timer_resolution)
- GET_NUM_ATTR(is_available)
-
-# undef GET_NUM_ATTR
-# undef WRITE_ATTR
- capabilities << "\n";
- }
-
- return ::strdup(capabilities.str().c_str());
-}
-
-void oneapi_free(void *p)
-{
- if (p) {
- ::free(p);
- }
-}
-
-void oneapi_iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr)
-{
- int num = 0;
- std::vector<sycl::device> devices = oneapi_available_devices();
- for (sycl::device &device : devices) {
- const std::string &platform_name =
- device.get_platform().get_info<sycl::info::platform::name>();
- std::string name = device.get_info<sycl::info::device::name>();
- std::string id = "ONEAPI_" + platform_name + "_" + name;
- if (device.has(sycl::aspect::ext_intel_pci_address)) {
- id.append("_" + device.get_info<sycl::info::device::ext_intel_pci_address>());
- }
- (cb)(id.c_str(), name.c_str(), num, user_ptr);
- num++;
- }
-}
-
-size_t oneapi_get_memcapacity(SyclQueue *queue)
-{
- return reinterpret_cast<sycl::queue *>(queue)
- ->get_device()
- .get_info<sycl::info::device::global_mem_size>();
-}
-
-int oneapi_get_num_multiprocessors(SyclQueue *queue)
-{
- const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device();
- if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
- return device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
- }
- else
- return 0;
-}
-
-int oneapi_get_max_num_threads_per_multiprocessor(SyclQueue *queue)
-{
- const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device();
- if (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) &&
- device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
- return device.get_info<sycl::info::device::ext_intel_gpu_eu_simd_width>() *
- device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
- }
- else
- return 0;
-}
-
#endif /* WITH_ONEAPI */
diff --git a/intern/cycles/kernel/device/oneapi/kernel.h b/intern/cycles/kernel/device/oneapi/kernel.h
index c5f853742ed..7456d0e4902 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.h
+++ b/intern/cycles/kernel/device/oneapi/kernel.h
@@ -25,11 +25,6 @@ enum DeviceKernel : int;
class SyclQueue;
-typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
- const char *name,
- int num,
- void *user_ptr);
-
typedef void (*OneAPIErrorCallback)(const char *error, void *user_ptr);
struct KernelContext {
@@ -45,13 +40,15 @@ struct KernelContext {
extern "C" {
# endif
-# define DLL_INTERFACE_CALL(function, return_type, ...) \
- CYCLES_KERNEL_ONEAPI_EXPORT return_type function(__VA_ARGS__);
-# include "kernel/device/oneapi/dll_interface_template.h"
-# undef DLL_INTERFACE_CALL
-
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_run_test_kernel(SyclQueue *queue_);
+CYCLES_KERNEL_ONEAPI_EXPORT void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr);
+CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
+ SyclQueue *queue, const DeviceKernel kernel, const size_t kernel_global_size);
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
+ int kernel,
+ size_t global_size,
+ void **args);
# ifdef __cplusplus
}
# endif
-
#endif /* WITH_ONEAPI */