Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build_files/cmake/platform/platform_unix.cmake10
-rw-r--r--build_files/cmake/platform/platform_win32.cmake16
-rw-r--r--intern/cycles/device/CMakeLists.txt16
-rw-r--r--intern/cycles/device/oneapi/device.cpp65
-rw-r--r--intern/cycles/device/oneapi/device_impl.cpp525
-rw-r--r--intern/cycles/device/oneapi/device_impl.h48
-rw-r--r--intern/cycles/device/oneapi/dll_interface.h17
-rw-r--r--intern/cycles/device/oneapi/queue.cpp11
-rw-r--r--intern/cycles/device/oneapi/queue.h4
-rw-r--r--intern/cycles/kernel/CMakeLists.txt38
-rw-r--r--intern/cycles/kernel/device/oneapi/dll_interface_template.h54
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.cpp467
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.h19
-rw-r--r--source/creator/CMakeLists.txt7
14 files changed, 591 insertions, 706 deletions
diff --git a/build_files/cmake/platform/platform_unix.cmake b/build_files/cmake/platform/platform_unix.cmake
index 35103ec441b..0aab46b1250 100644
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -335,10 +335,18 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
set(LEVEL_ZERO_ROOT_DIR ${CYCLES_LEVEL_ZERO})
endif()
- set(CYCLES_SYCL ${LIBDIR}/dpcpp CACHE PATH "Path to DPC++ and SYCL installation")
+ set(CYCLES_SYCL ${LIBDIR}/dpcpp CACHE PATH "Path to oneAPI DPC++ compiler")
if(EXISTS ${CYCLES_SYCL} AND NOT SYCL_ROOT_DIR)
set(SYCL_ROOT_DIR ${CYCLES_SYCL})
endif()
+ file(GLOB _sycl_runtime_libraries
+ ${SYCL_ROOT_DIR}/lib/libsycl.so
+ ${SYCL_ROOT_DIR}/lib/libsycl.so.[0-9]
+ ${SYCL_ROOT_DIR}/lib/libsycl.so.[0-9].[0-9].[0-9]-[0-9]
+ ${SYCL_ROOT_DIR}/lib/libpi_level_zero.so
+ )
+ list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
+ unset(_sycl_runtime_libraries)
endif()
if(WITH_OPENVDB)
diff --git a/build_files/cmake/platform/platform_win32.cmake b/build_files/cmake/platform/platform_win32.cmake
index 4778ddebea6..0f7f04203c6 100644
--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -952,5 +952,17 @@ endif()
set(ZSTD_INCLUDE_DIRS ${LIBDIR}/zstd/include)
set(ZSTD_LIBRARIES ${LIBDIR}/zstd/lib/zstd_static.lib)
-set(LEVEL_ZERO_ROOT_DIR ${LIBDIR}/level_zero)
-set(SYCL_ROOT_DIR ${LIBDIR}/dpcpp)
+if(WITH_CYCLES_DEVICE_ONEAPI)
+ set(LEVEL_ZERO_ROOT_DIR ${LIBDIR}/level_zero)
+ set(CYCLES_SYCL ${LIBDIR}/dpcpp CACHE PATH "Path to oneAPI DPC++ compiler")
+ if(EXISTS ${CYCLES_SYCL} AND NOT SYCL_ROOT_DIR)
+ set(SYCL_ROOT_DIR ${CYCLES_SYCL})
+ endif()
+ file(GLOB _sycl_runtime_libraries
+ ${SYCL_ROOT_DIR}/bin/sycl.dll
+ ${SYCL_ROOT_DIR}/bin/sycl[0-9].dll
+ ${SYCL_ROOT_DIR}/bin/pi_level_zero.dll
+ )
+ list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
+ unset(_sycl_runtime_libraries)
+endif()
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 24855d795d1..e5467121497 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -142,7 +142,6 @@ set(SRC
${SRC_DUMMY}
${SRC_MULTI}
${SRC_OPTIX}
- ${SRC_ONEAPI}
${SRC_HEADERS}
)
@@ -188,7 +187,22 @@ if(WITH_CYCLES_DEVICE_METAL)
)
endif()
if (WITH_CYCLES_DEVICE_ONEAPI)
+ if(WIN32)
+ set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/../kernel/cycles_kernel_oneapi.lib)
+ else()
+ set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/../kernel/libcycles_kernel_oneapi.so)
+ endif()
+ list(APPEND LIB
+ ${SYCL_LIBRARY}
+ ${cycles_kernel_oneapi_lib}
+ )
add_definitions(-DWITH_ONEAPI)
+ list(APPEND SRC
+ ${SRC_ONEAPI}
+ )
+ list(APPEND INC_SYS
+ ${SYCL_INCLUDE_DIR}
+ )
endif()
if(WITH_OPENIMAGEDENOISE)
diff --git a/intern/cycles/device/oneapi/device.cpp b/intern/cycles/device/oneapi/device.cpp
index 4aa307e9300..f303ab41627 100644
--- a/intern/cycles/device/oneapi/device.cpp
+++ b/intern/cycles/device/oneapi/device.cpp
@@ -19,62 +19,12 @@
CCL_NAMESPACE_BEGIN
-#ifdef WITH_ONEAPI
-static OneAPIDLLInterface oneapi_dll;
-#endif
-
-#ifdef _WIN32
-# define LOAD_ONEAPI_SHARED_LIBRARY(path) (void *)(LoadLibrary(path))
-# define LOAD_ONEAPI_SHARED_LIBRARY_ERROR() GetLastError()
-# define FREE_SHARED_LIBRARY(handle) FreeLibrary((HMODULE)handle)
-# define GET_SHARED_LIBRARY_SYMBOL(handle, name) GetProcAddress((HMODULE)handle, name)
-#elif __linux__
-# define LOAD_ONEAPI_SHARED_LIBRARY(path) dlopen(path, RTLD_NOW)
-# define LOAD_ONEAPI_SHARED_LIBRARY_ERROR() dlerror()
-# define FREE_SHARED_LIBRARY(handle) dlclose(handle)
-# define GET_SHARED_LIBRARY_SYMBOL(handle, name) dlsym(handle, name)
-#endif
-
bool device_oneapi_init()
{
#if !defined(WITH_ONEAPI)
return false;
#else
- string lib_path = path_get("lib");
-# ifdef _WIN32
- lib_path = path_join(lib_path, "cycles_kernel_oneapi.dll");
-# else
- lib_path = path_join(lib_path, "cycles_kernel_oneapi.so");
-# endif
- void *lib_handle = LOAD_ONEAPI_SHARED_LIBRARY(lib_path.c_str());
-
- /* This shouldn't happen, but it still makes sense to have a branch for this. */
- if (lib_handle == NULL) {
- LOG(ERROR) << "oneAPI kernel shared library cannot be loaded: "
- << LOAD_ONEAPI_SHARED_LIBRARY_ERROR();
- return false;
- }
-
-# define DLL_INTERFACE_CALL(function, return_type, ...) \
- (oneapi_dll.function) = reinterpret_cast<decltype(oneapi_dll.function)>( \
- GET_SHARED_LIBRARY_SYMBOL(lib_handle, #function)); \
- if (oneapi_dll.function == NULL) { \
- LOG(ERROR) << "oneAPI shared library function \"" << #function \
- << "\" has not been loaded from kernel shared - disable oneAPI " \
- "library disable oneAPI implementation due to this"; \
- FREE_SHARED_LIBRARY(lib_handle); \
- return false; \
- }
-# include "kernel/device/oneapi/dll_interface_template.h"
-# undef DLL_INTERFACE_CALL
-
- VLOG_INFO << "oneAPI kernel shared library has been loaded successfully";
-
- /* We need to have this oneapi kernel shared library during all life-span of the Blender.
- * So it is not unloaded because of this.
- * FREE_SHARED_LIBRARY(lib_handle); */
-
/* NOTE(@nsirgien): we need to enable JIT cache from here and
* right now this cache policy is controlled by env. variables. */
/* NOTE(hallade) we also disable use of copy engine as it
@@ -109,17 +59,10 @@ bool device_oneapi_init()
#endif
}
-#if defined(_WIN32) || defined(__linux__)
-# undef LOAD_SYCL_SHARED_LIBRARY
-# undef LOAD_ONEAPI_SHARED_LIBRARY
-# undef FREE_SHARED_LIBRARY
-# undef GET_SHARED_LIBRARY_SYMBOL
-#endif
-
Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
{
#ifdef WITH_ONEAPI
- return new OneapiDevice(info, oneapi_dll, stats, profiler);
+ return new OneapiDevice(info, stats, profiler);
#else
(void)info;
(void)stats;
@@ -165,7 +108,7 @@ static void device_iterator_cb(const char *id, const char *name, int num, void *
void device_oneapi_info(vector<DeviceInfo> &devices)
{
#ifdef WITH_ONEAPI
- (oneapi_dll.oneapi_iterate_devices)(device_iterator_cb, &devices);
+ OneapiDevice::iterate_devices(device_iterator_cb, &devices);
#else /* WITH_ONEAPI */
(void)devices;
#endif /* WITH_ONEAPI */
@@ -175,10 +118,10 @@ string device_oneapi_capabilities()
{
string capabilities;
#ifdef WITH_ONEAPI
- char *c_capabilities = (oneapi_dll.oneapi_device_capabilities)();
+ char *c_capabilities = OneapiDevice::device_capabilities();
if (c_capabilities) {
capabilities = c_capabilities;
- (oneapi_dll.oneapi_free)(c_capabilities);
+ free(c_capabilities);
}
#endif
return capabilities;
diff --git a/intern/cycles/device/oneapi/device_impl.cpp b/intern/cycles/device/oneapi/device_impl.cpp
index dd0622a5bd5..2df605fa047 100644
--- a/intern/cycles/device/oneapi/device_impl.cpp
+++ b/intern/cycles/device/oneapi/device_impl.cpp
@@ -8,7 +8,7 @@
# include "util/debug.h"
# include "util/log.h"
-# include "kernel/device/oneapi/kernel.h"
+# include "kernel/device/oneapi/globals.h"
CCL_NAMESPACE_BEGIN
@@ -19,26 +19,19 @@ static void queue_error_cb(const char *message, void *user_ptr)
}
}
-OneapiDevice::OneapiDevice(const DeviceInfo &info,
- OneAPIDLLInterface &oneapi_dll_object,
- Stats &stats,
- Profiler &profiler)
+OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: Device(info, stats, profiler),
device_queue_(nullptr),
texture_info_(this, "texture_info", MEM_GLOBAL),
kg_memory_(nullptr),
kg_memory_device_(nullptr),
- kg_memory_size_(0),
- oneapi_dll_(oneapi_dll_object)
+ kg_memory_size_(0)
{
need_texture_info_ = false;
- oneapi_dll_.oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
+ oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
- /* OneAPI calls should be initialized on this moment. */
- assert(oneapi_dll_.oneapi_create_queue != nullptr);
-
- bool is_finished_ok = oneapi_dll_.oneapi_create_queue(device_queue_, info.num);
+ bool is_finished_ok = create_queue(device_queue_, info.num);
if (is_finished_ok == false) {
set_error("oneAPI queue initialization error: got runtime exception \"" +
oneapi_error_string_ + "\"");
@@ -50,7 +43,7 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info,
}
size_t globals_segment_size;
- is_finished_ok = oneapi_dll_.oneapi_kernel_globals_size(device_queue_, globals_segment_size);
+ is_finished_ok = kernel_globals_size(device_queue_, globals_segment_size);
if (is_finished_ok == false) {
set_error("oneAPI constant memory initialization got runtime exception \"" +
oneapi_error_string_ + "\"");
@@ -59,27 +52,27 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info,
VLOG_DEBUG << "Successfully created global/constant memory segment (kernel globals object)";
}
- kg_memory_ = oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, globals_segment_size, 16);
- oneapi_dll_.oneapi_usm_memset(device_queue_, kg_memory_, 0, globals_segment_size);
+ kg_memory_ = usm_aligned_alloc_host(device_queue_, globals_segment_size, 16);
+ usm_memset(device_queue_, kg_memory_, 0, globals_segment_size);
- kg_memory_device_ = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, globals_segment_size);
+ kg_memory_device_ = usm_alloc_device(device_queue_, globals_segment_size);
kg_memory_size_ = globals_segment_size;
- max_memory_on_device_ = oneapi_dll_.oneapi_get_memcapacity(device_queue_);
+ max_memory_on_device_ = get_memcapacity();
}
OneapiDevice::~OneapiDevice()
{
texture_info_.free();
- oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_);
- oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_device_);
+ usm_free(device_queue_, kg_memory_);
+ usm_free(device_queue_, kg_memory_device_);
for (ConstMemMap::iterator mt = const_mem_map_.begin(); mt != const_mem_map_.end(); mt++)
delete mt->second;
if (device_queue_)
- oneapi_dll_.oneapi_free_queue(device_queue_);
+ free_queue(device_queue_);
}
bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
@@ -99,7 +92,7 @@ bool OneapiDevice::load_kernels(const uint requested_features)
* with specialization constants, but it hasn't been implemented yet. */
(void)requested_features;
- bool is_finished_ok = oneapi_dll_.oneapi_run_test_kernel(device_queue_);
+ bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
if (is_finished_ok == false) {
set_error("oneAPI kernel load: got runtime exception \"" + oneapi_error_string_ + "\"");
}
@@ -138,7 +131,7 @@ void OneapiDevice::generic_alloc(device_memory &mem)
* type has been used for oneAPI device in order to better fit in Cycles architecture. */
void *device_pointer = nullptr;
if (mem.memory_size() + stats.mem_used < max_memory_on_device_)
- device_pointer = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, memory_size);
+ device_pointer = usm_alloc_device(device_queue_, memory_size);
if (device_pointer == nullptr) {
set_error("oneAPI kernel - device memory allocation error for " +
string_human_readable_size(mem.memory_size()) +
@@ -163,8 +156,7 @@ void OneapiDevice::generic_copy_to(device_memory &mem)
/* Copy operation from host shouldn't be requested if there is no memory allocated on host. */
assert(mem.host_pointer);
assert(device_queue_);
- oneapi_dll_.oneapi_usm_memcpy(
- device_queue_, (void *)mem.device_pointer, (void *)mem.host_pointer, memory_size);
+ usm_memcpy(device_queue_, (void *)mem.device_pointer, (void *)mem.host_pointer, memory_size);
}
/* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */
@@ -178,11 +170,6 @@ string OneapiDevice::oneapi_error_message()
return string(oneapi_error_string_);
}
-OneAPIDLLInterface OneapiDevice::oneapi_dll_object()
-{
- return oneapi_dll_;
-}
-
void *OneapiDevice::kernel_globals_device_pointer()
{
return kg_memory_device_;
@@ -198,7 +185,7 @@ void OneapiDevice::generic_free(device_memory &mem)
mem.device_size = 0;
assert(device_queue_);
- oneapi_dll_.oneapi_usm_free(device_queue_, (void *)mem.device_pointer);
+ usm_free(device_queue_, (void *)mem.device_pointer);
mem.device_pointer = 0;
}
@@ -266,8 +253,7 @@ void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t
if (mem.device_pointer) {
char *shifted_host = reinterpret_cast<char *>(mem.host_pointer) + offset;
char *shifted_device = reinterpret_cast<char *>(mem.device_pointer) + offset;
- bool is_finished_ok = oneapi_dll_.oneapi_usm_memcpy(
- device_queue_, shifted_host, shifted_device, size);
+ bool is_finished_ok = usm_memcpy(device_queue_, shifted_host, shifted_device, size);
if (is_finished_ok == false) {
set_error("oneAPI memory operation error: got runtime exception \"" +
oneapi_error_string_ + "\"");
@@ -292,7 +278,7 @@ void OneapiDevice::mem_zero(device_memory &mem)
}
assert(device_queue_);
- bool is_finished_ok = oneapi_dll_.oneapi_usm_memset(
+ bool is_finished_ok = usm_memset(
device_queue_, (void *)mem.device_pointer, 0, mem.memory_size());
if (is_finished_ok == false) {
set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
@@ -349,10 +335,9 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
memcpy(data->data(), host, size);
data->copy_to_device();
- oneapi_dll_.oneapi_set_global_memory(
- device_queue_, kg_memory_, name, (void *)data->device_pointer);
+ set_global_memory(device_queue_, kg_memory_, name, (void *)data->device_pointer);
- oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
+ usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
}
void OneapiDevice::global_alloc(device_memory &mem)
@@ -367,10 +352,9 @@ void OneapiDevice::global_alloc(device_memory &mem)
generic_alloc(mem);
generic_copy_to(mem);
- oneapi_dll_.oneapi_set_global_memory(
- device_queue_, kg_memory_, mem.name, (void *)mem.device_pointer);
+ set_global_memory(device_queue_, kg_memory_, mem.name, (void *)mem.device_pointer);
- oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
+ usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
}
void OneapiDevice::global_free(device_memory &mem)
@@ -410,18 +394,6 @@ unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
return make_unique<OneapiDeviceQueue>(this);
}
-int OneapiDevice::get_num_multiprocessors()
-{
- assert(device_queue_);
- return oneapi_dll_.oneapi_get_num_multiprocessors(device_queue_);
-}
-
-int OneapiDevice::get_max_num_threads_per_multiprocessor()
-{
- assert(device_queue_);
- return oneapi_dll_.oneapi_get_max_num_threads_per_multiprocessor(device_queue_);
-}
-
bool OneapiDevice::should_use_graphics_interop()
{
/* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so
@@ -432,13 +404,460 @@ bool OneapiDevice::should_use_graphics_interop()
void *OneapiDevice::usm_aligned_alloc_host(size_t memory_size, size_t alignment)
{
assert(device_queue_);
- return oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, memory_size, alignment);
+ return usm_aligned_alloc_host(device_queue_, memory_size, alignment);
}
void OneapiDevice::usm_free(void *usm_ptr)
{
assert(device_queue_);
- return oneapi_dll_.oneapi_usm_free(device_queue_, usm_ptr);
+ return usm_free(device_queue_, usm_ptr);
+}
+
+void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_host = false)
+{
+# ifdef _DEBUG
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ sycl::info::device_type device_type =
+ queue->get_device().get_info<sycl::info::device::device_type>();
+ sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
+ (void)usm_type;
+ assert(usm_type == sycl::usm::alloc::device ||
+ ((device_type == sycl::info::device_type::host ||
+ device_type == sycl::info::device_type::cpu || allow_host) &&
+ usm_type == sycl::usm::alloc::host));
+# endif
+}
+
+bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
+{
+ bool finished_correct = true;
+ try {
+ std::vector<sycl::device> devices = OneapiDevice::available_devices();
+ if (device_index < 0 || device_index >= devices.size()) {
+ return false;
+ }
+ sycl::queue *created_queue = new sycl::queue(devices[device_index],
+ sycl::property::queue::in_order());
+ external_queue = reinterpret_cast<SyclQueue *>(created_queue);
+ }
+ catch (sycl::exception const &e) {
+ finished_correct = false;
+ oneapi_error_string_ = e.what();
+ }
+ return finished_correct;
+}
+
+void OneapiDevice::free_queue(SyclQueue *queue_)
+{
+ assert(queue_);
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ delete queue;
+}
+
+void *OneapiDevice::usm_aligned_alloc_host(SyclQueue *queue_, size_t memory_size, size_t alignment)
+{
+ assert(queue_);
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ return sycl::aligned_alloc_host(alignment, memory_size, *queue);
+}
+
+void *OneapiDevice::usm_alloc_device(SyclQueue *queue_, size_t memory_size)
+{
+ assert(queue_);
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ return sycl::malloc_device(memory_size, *queue);
+}
+
+void OneapiDevice::usm_free(SyclQueue *queue_, void *usm_ptr)
+{
+ assert(queue_);
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ OneapiDevice::check_usm(queue_, usm_ptr, true);
+ sycl::free(usm_ptr, *queue);
+}
+
+bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_bytes)
+{
+ assert(queue_);
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ OneapiDevice::check_usm(queue_, dest, true);
+ OneapiDevice::check_usm(queue_, src, true);
+ sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
+# ifdef WITH_CYCLES_DEBUG
+ try {
+ /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
+ * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
+ */
+ mem_event.wait_and_throw();
+ return true;
+ }
+ catch (sycl::exception const &e) {
+ oneapi_error_string_ = e.what();
+ return false;
+ }
+# else
+ sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
+ sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
+ bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
+ src_type == sycl::usm::alloc::device;
+ bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
+ src_type == sycl::usm::alloc::unknown;
+ /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
+ * may not wait until the end of the transfer before using the memory.
+ */
+ if (from_device_to_host || host_or_device_memop_with_offset)
+ mem_event.wait();
+ return true;
+# endif
+}
+
+bool OneapiDevice::usm_memset(SyclQueue *queue_,
+ void *usm_ptr,
+ unsigned char value,
+ size_t num_bytes)
+{
+ assert(queue_);
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ OneapiDevice::check_usm(queue_, usm_ptr, true);
+ sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
+# ifdef WITH_CYCLES_DEBUG
+ try {
+ /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
+ * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
+ */
+ mem_event.wait_and_throw();
+ return true;
+ }
+ catch (sycl::exception const &e) {
+ oneapi_error_string_ = e.what();
+ return false;
+ }
+# else
+ (void)mem_event;
+ return true;
+# endif
+}
+
+bool OneapiDevice::queue_synchronize(SyclQueue *queue_)
+{
+ assert(queue_);
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ try {
+ queue->wait_and_throw();
+ return true;
+ }
+ catch (sycl::exception const &e) {
+ oneapi_error_string_ = e.what();
+ return false;
+ }
+}
+
+bool OneapiDevice::kernel_globals_size(SyclQueue *queue_, size_t &kernel_global_size)
+{
+ kernel_global_size = sizeof(KernelGlobalsGPU);
+
+ return true;
+}
+
+void OneapiDevice::set_global_memory(SyclQueue *queue_,
+ void *kernel_globals,
+ const char *memory_name,
+ void *memory_device_pointer)
+{
+ assert(queue_);
+ assert(kernel_globals);
+ assert(memory_name);
+ assert(memory_device_pointer);
+ KernelGlobalsGPU *globals = (KernelGlobalsGPU *)kernel_globals;
+ OneapiDevice::check_usm(queue_, memory_device_pointer);
+ OneapiDevice::check_usm(queue_, kernel_globals, true);
+
+ std::string matched_name(memory_name);
+
+/* This macro will change global ptr of KernelGlobals via name matching. */
+# define KERNEL_DATA_ARRAY(type, name) \
+ else if (#name == matched_name) \
+ { \
+ globals->__##name = (type *)memory_device_pointer; \
+ return; \
+ }
+ if (false) {
+ }
+ else if ("integrator_state" == matched_name) {
+ globals->integrator_state = (IntegratorStateGPU *)memory_device_pointer;
+ return;
+ }
+ KERNEL_DATA_ARRAY(KernelData, data)
+# include "kernel/data_arrays.h"
+ else
+ {
+ std::cerr << "Can't found global/constant memory with name \"" << matched_name << "\"!"
+ << std::endl;
+ assert(false);
+ }
+# undef KERNEL_DATA_ARRAY
+}
+
+bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
+ int kernel,
+ size_t global_size,
+ void **args)
+{
+ return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args);
+}
+
+/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
+ * since Windows driver 101.3268. */
+/* The same min compute-runtime version is currently required across Windows and Linux.
+ * For Windows driver 101.3430, compute-runtime version is 23904. */
+static const int lowest_supported_driver_version_win = 1013430;
+static const int lowest_supported_driver_version_neo = 23904;
+
+int OneapiDevice::parse_driver_build_version(const sycl::device &device)
+{
+ const std::string &driver_version = device.get_info<sycl::info::device::driver_version>();
+ int driver_build_version = 0;
+
+ size_t second_dot_position = driver_version.find('.', driver_version.find('.') + 1);
+ if (second_dot_position == std::string::npos) {
+ std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
+ << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
+ << " xx.xx.xxx.xxxx (Windows) for device \""
+ << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
+ }
+ else {
+ try {
+ size_t third_dot_position = driver_version.find('.', second_dot_position + 1);
+ if (third_dot_position != std::string::npos) {
+ const std::string &third_number_substr = driver_version.substr(
+ second_dot_position + 1, third_dot_position - second_dot_position - 1);
+ const std::string &forth_number_substr = driver_version.substr(third_dot_position + 1);
+ if (third_number_substr.length() == 3 && forth_number_substr.length() == 4)
+ driver_build_version = std::stoi(third_number_substr) * 10000 +
+ std::stoi(forth_number_substr);
+ }
+ else {
+ const std::string &third_number_substr = driver_version.substr(second_dot_position + 1);
+ driver_build_version = std::stoi(third_number_substr);
+ }
+ }
+ catch (std::invalid_argument &) {
+ std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
+ << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
+ << " xx.xx.xxx.xxxx (Windows) for device \""
+ << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
+ }
+ }
+
+ return driver_build_version;
+}
+
+std::vector<sycl::device> OneapiDevice::available_devices()
+{
+ bool allow_all_devices = false;
+ if (getenv("CYCLES_ONEAPI_ALL_DEVICES") != nullptr)
+ allow_all_devices = true;
+
+ /* Host device is useful only for debugging at the moment
+ * so we hide this device with default build settings. */
+# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
+ bool allow_host = true;
+# else
+ bool allow_host = false;
+# endif
+
+ const std::vector<sycl::platform> &oneapi_platforms = sycl::platform::get_platforms();
+
+ std::vector<sycl::device> available_devices;
+ for (const sycl::platform &platform : oneapi_platforms) {
+ /* ignore OpenCL platforms to avoid using the same devices through both Level-Zero and OpenCL.
+ */
+ if (platform.get_backend() == sycl::backend::opencl) {
+ continue;
+ }
+
+ const std::vector<sycl::device> &oneapi_devices =
+ (allow_all_devices || allow_host) ? platform.get_devices(sycl::info::device_type::all) :
+ platform.get_devices(sycl::info::device_type::gpu);
+
+ for (const sycl::device &device : oneapi_devices) {
+ if (allow_all_devices) {
+ /* still filter out host device if build doesn't support it. */
+ if (allow_host || !device.is_host()) {
+ available_devices.push_back(device);
+ }
+ }
+ else {
+ bool filter_out = false;
+
+ /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU,
+ * assuming they have either more than 96 Execution Units or not 7 threads per EU.
+ * Official support can be broaden to older and smaller GPUs once ready. */
+ if (device.is_gpu() && platform.get_backend() == sycl::backend::ext_oneapi_level_zero) {
+ /* Filtered-out defaults in-case these values aren't available through too old L0
+ * runtime. */
+ int number_of_eus = 96;
+ int threads_per_eu = 7;
+ if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
+ number_of_eus = device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
+ }
+ if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
+ threads_per_eu =
+ device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
+ }
+ /* This filters out all Level-Zero supported GPUs from older generation than Arc. */
+ if (number_of_eus <= 96 && threads_per_eu == 7) {
+ filter_out = true;
+ }
+ /* if not already filtered out, check driver version. */
+ if (!filter_out) {
+ int driver_build_version = parse_driver_build_version(device);
+ if ((driver_build_version > 100000 &&
+ driver_build_version < lowest_supported_driver_version_win) ||
+ driver_build_version < lowest_supported_driver_version_neo) {
+ filter_out = true;
+ }
+ }
+ }
+ else if (!allow_host && device.is_host()) {
+ filter_out = true;
+ }
+ else if (!allow_all_devices) {
+ filter_out = true;
+ }
+
+ if (!filter_out) {
+ available_devices.push_back(device);
+ }
+ }
+ }
+ }
+
+ return available_devices;
+}
+
+char *OneapiDevice::device_capabilities()
+{
+ std::stringstream capabilities;
+
+ const std::vector<sycl::device> &oneapi_devices = available_devices();
+ for (const sycl::device &device : oneapi_devices) {
+ const std::string &name = device.get_info<sycl::info::device::name>();
+
+ capabilities << std::string("\t") << name << "\n";
+# define WRITE_ATTR(attribute_name, attribute_variable) \
+ capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \
+ << "\n";
+# define GET_NUM_ATTR(attribute) \
+ { \
+ size_t attribute = (size_t)device.get_info<sycl::info::device ::attribute>(); \
+ capabilities << "\t\tsycl::info::device::" #attribute "\t\t\t" << attribute << "\n"; \
+ }
+
+ GET_NUM_ATTR(vendor_id)
+ GET_NUM_ATTR(max_compute_units)
+ GET_NUM_ATTR(max_work_item_dimensions)
+
+ sycl::id<3> max_work_item_sizes =
+ device.get_info<sycl::info::device::max_work_item_sizes<3>>();
+ WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
+ WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
+ WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
+
+ GET_NUM_ATTR(max_work_group_size)
+ GET_NUM_ATTR(max_num_sub_groups)
+ GET_NUM_ATTR(sub_group_independent_forward_progress)
+
+ GET_NUM_ATTR(preferred_vector_width_char)
+ GET_NUM_ATTR(preferred_vector_width_short)
+ GET_NUM_ATTR(preferred_vector_width_int)
+ GET_NUM_ATTR(preferred_vector_width_long)
+ GET_NUM_ATTR(preferred_vector_width_float)
+ GET_NUM_ATTR(preferred_vector_width_double)
+ GET_NUM_ATTR(preferred_vector_width_half)
+
+ GET_NUM_ATTR(native_vector_width_char)
+ GET_NUM_ATTR(native_vector_width_short)
+ GET_NUM_ATTR(native_vector_width_int)
+ GET_NUM_ATTR(native_vector_width_long)
+ GET_NUM_ATTR(native_vector_width_float)
+ GET_NUM_ATTR(native_vector_width_double)
+ GET_NUM_ATTR(native_vector_width_half)
+
+ size_t max_clock_frequency =
+ (size_t)(device.is_host() ? (size_t)0 :
+ device.get_info<sycl::info::device::max_clock_frequency>());
+ WRITE_ATTR("max_clock_frequency", max_clock_frequency)
+
+ GET_NUM_ATTR(address_bits)
+ GET_NUM_ATTR(max_mem_alloc_size)
+
+ /* NOTE(@nsirgien): Implementation doesn't use image support as bindless images aren't
+ * supported so we always return false, even if device supports HW texture usage acceleration.
+ */
+ bool image_support = false;
+ WRITE_ATTR("image_support", (size_t)image_support)
+
+ GET_NUM_ATTR(max_parameter_size)
+ GET_NUM_ATTR(mem_base_addr_align)
+ GET_NUM_ATTR(global_mem_size)
+ GET_NUM_ATTR(local_mem_size)
+ GET_NUM_ATTR(error_correction_support)
+ GET_NUM_ATTR(profiling_timer_resolution)
+ GET_NUM_ATTR(is_available)
+
+# undef GET_NUM_ATTR
+# undef WRITE_ATTR
+ capabilities << "\n";
+ }
+
+ return ::strdup(capabilities.str().c_str());
+}
+
+void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr)
+{
+ int num = 0;
+ std::vector<sycl::device> devices = OneapiDevice::available_devices();
+ for (sycl::device &device : devices) {
+ const std::string &platform_name =
+ device.get_platform().get_info<sycl::info::platform::name>();
+ std::string name = device.get_info<sycl::info::device::name>();
+ std::string id = "ONEAPI_" + platform_name + "_" + name;
+ if (device.has(sycl::aspect::ext_intel_pci_address)) {
+ id.append("_" + device.get_info<sycl::info::device::ext_intel_pci_address>());
+ }
+ (cb)(id.c_str(), name.c_str(), num, user_ptr);
+ num++;
+ }
+}
+
+size_t OneapiDevice::get_memcapacity()
+{
+ return reinterpret_cast<sycl::queue *>(device_queue_)
+ ->get_device()
+ .get_info<sycl::info::device::global_mem_size>();
+}
+
+int OneapiDevice::get_num_multiprocessors()
+{
+ const sycl::device &device = reinterpret_cast<sycl::queue *>(device_queue_)->get_device();
+ if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
+ return device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
+ }
+ else
+ return 0;
+}
+
+int OneapiDevice::get_max_num_threads_per_multiprocessor()
+{
+ const sycl::device &device = reinterpret_cast<sycl::queue *>(device_queue_)->get_device();
+ if (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) &&
+ device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
+ return device.get_info<sycl::info::device::ext_intel_gpu_eu_simd_width>() *
+ device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
+ }
+ else
+ return 0;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/oneapi/device_impl.h b/intern/cycles/device/oneapi/device_impl.h
index 6abebf98684..3589e881a6e 100644
--- a/intern/cycles/device/oneapi/device_impl.h
+++ b/intern/cycles/device/oneapi/device_impl.h
@@ -3,9 +3,12 @@
#ifdef WITH_ONEAPI
+# include <CL/sycl.hpp>
+
# include "device/device.h"
# include "device/oneapi/device.h"
# include "device/oneapi/queue.h"
+# include "kernel/device/oneapi/kernel.h"
# include "util/map.h"
@@ -13,6 +16,11 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
+typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
+ const char *name,
+ int num,
+ void *user_ptr);
+
class OneapiDevice : public Device {
private:
SyclQueue *device_queue_;
@@ -25,16 +33,12 @@ class OneapiDevice : public Device {
void *kg_memory_device_;
size_t kg_memory_size_ = (size_t)0;
size_t max_memory_on_device_ = (size_t)0;
- OneAPIDLLInterface oneapi_dll_;
std::string oneapi_error_string_;
public:
virtual BVHLayoutMask get_bvh_layout_mask() const override;
- OneapiDevice(const DeviceInfo &info,
- OneAPIDLLInterface &oneapi_dll_object,
- Stats &stats,
- Profiler &profiler);
+ OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
virtual ~OneapiDevice();
@@ -50,12 +54,8 @@ class OneapiDevice : public Device {
void generic_free(device_memory &mem);
- SyclQueue *sycl_queue();
-
string oneapi_error_message();
- OneAPIDLLInterface oneapi_dll_object();
-
void *kernel_globals_device_pointer();
void mem_alloc(device_memory &mem) override;
@@ -90,13 +90,37 @@ class OneapiDevice : public Device {
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
- int get_num_multiprocessors();
- int get_max_num_threads_per_multiprocessor();
-
/* NOTE(@nsirgien): Create this methods to avoid some compilation problems on Windows with host
* side compilation (MSVC). */
void *usm_aligned_alloc_host(size_t memory_size, size_t alignment);
void usm_free(void *usm_ptr);
+
+ static std::vector<sycl::device> available_devices();
+ static char *device_capabilities();
+ static int parse_driver_build_version(const sycl::device &device);
+ static void iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr);
+
+ size_t get_memcapacity();
+ int get_num_multiprocessors();
+ int get_max_num_threads_per_multiprocessor();
+ bool queue_synchronize(SyclQueue *queue);
+ bool kernel_globals_size(SyclQueue *queue, size_t &kernel_global_size);
+ void set_global_memory(SyclQueue *queue,
+ void *kernel_globals,
+ const char *memory_name,
+ void *memory_device_pointer);
+ bool enqueue_kernel(KernelContext *kernel_context, int kernel, size_t global_size, void **args);
+ SyclQueue *sycl_queue();
+
+ protected:
+ void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
+ bool create_queue(SyclQueue *&external_queue, int device_index);
+ void free_queue(SyclQueue *queue);
+ void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
+ void *usm_alloc_device(SyclQueue *queue, size_t memory_size);
+ void usm_free(SyclQueue *queue, void *usm_ptr);
+ bool usm_memcpy(SyclQueue *queue, void *dest, void *src, size_t num_bytes);
+ bool usm_memset(SyclQueue *queue, void *usm_ptr, unsigned char value, size_t num_bytes);
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/oneapi/dll_interface.h b/intern/cycles/device/oneapi/dll_interface.h
deleted file mode 100644
index 0a888194e98..00000000000
--- a/intern/cycles/device/oneapi/dll_interface.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#pragma once
-
-/* Include kernel header to get access to SYCL-specific types, like SyclQueue and
- * OneAPIDeviceIteratorCallback. */
-#include "kernel/device/oneapi/kernel.h"
-
-#ifdef WITH_ONEAPI
-struct OneAPIDLLInterface {
-# define DLL_INTERFACE_CALL(function, return_type, ...) \
- return_type (*function)(__VA_ARGS__) = nullptr;
-# include "kernel/device/oneapi/dll_interface_template.h"
-# undef DLL_INTERFACE_CALL
-};
-#endif
diff --git a/intern/cycles/device/oneapi/queue.cpp b/intern/cycles/device/oneapi/queue.cpp
index f865b27f976..9632b14d485 100644
--- a/intern/cycles/device/oneapi/queue.cpp
+++ b/intern/cycles/device/oneapi/queue.cpp
@@ -22,10 +22,7 @@ struct KernelExecutionInfo {
/* OneapiDeviceQueue */
OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
- : DeviceQueue(device),
- oneapi_device_(device),
- oneapi_dll_(device->oneapi_dll_object()),
- kernel_context_(nullptr)
+ : DeviceQueue(device), oneapi_device_(device), kernel_context_(nullptr)
{
}
@@ -81,14 +78,14 @@ bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
assert(signed_kernel_work_size >= 0);
size_t kernel_work_size = (size_t)signed_kernel_work_size;
- size_t kernel_local_size = oneapi_dll_.oneapi_kernel_preferred_local_size(
+ size_t kernel_local_size = oneapi_kernel_preferred_local_size(
kernel_context_->queue, (::DeviceKernel)kernel, kernel_work_size);
size_t uniformed_kernel_work_size = round_up(kernel_work_size, kernel_local_size);
assert(kernel_context_);
/* Call the oneAPI kernel DLL to launch the requested kernel. */
- bool is_finished_ok = oneapi_dll_.oneapi_enqueue_kernel(
+ bool is_finished_ok = oneapi_device_->enqueue_kernel(
kernel_context_, kernel, uniformed_kernel_work_size, args);
if (is_finished_ok == false) {
@@ -108,7 +105,7 @@ bool OneapiDeviceQueue::synchronize()
return false;
}
- bool is_finished_ok = oneapi_dll_.oneapi_queue_synchronize(oneapi_device_->sycl_queue());
+ bool is_finished_ok = oneapi_device_->queue_synchronize(oneapi_device_->sycl_queue());
if (is_finished_ok == false)
oneapi_device_->set_error("oneAPI unknown kernel execution error: got runtime exception \"" +
oneapi_device_->oneapi_error_message() + "\"");
diff --git a/intern/cycles/device/oneapi/queue.h b/intern/cycles/device/oneapi/queue.h
index 716cbfdc88c..32363bf2a6e 100644
--- a/intern/cycles/device/oneapi/queue.h
+++ b/intern/cycles/device/oneapi/queue.h
@@ -10,7 +10,7 @@
# include "device/queue.h"
# include "device/oneapi/device.h"
-# include "device/oneapi/dll_interface.h"
+# include "kernel/device/oneapi/kernel.h"
CCL_NAMESPACE_BEGIN
@@ -41,9 +41,7 @@ class OneapiDeviceQueue : public DeviceQueue {
protected:
OneapiDevice *oneapi_device_;
- OneAPIDLLInterface oneapi_dll_;
KernelContext *kernel_context_;
- bool with_kernel_statistics_;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 9cf9b761651..bbf8fb8682b 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -716,7 +716,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
if(WIN32)
set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll)
else()
- set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.so)
+ set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/libcycles_kernel_oneapi.so)
endif()
set(cycles_oneapi_kernel_sources
@@ -815,6 +815,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
if(WIN32)
list(APPEND sycl_compiler_flags
+ -fuse-ld=link
-fms-extensions
-fms-compatibility
-D_WINDLL
@@ -888,33 +889,24 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
endif()
endif()
+ if(NOT WITH_BLENDER)
+ # For the Cycles standalone put libraries next to the Cycles application.
+ set(cycles_oneapi_target_path ${CYCLES_INSTALL_PATH})
+ else()
+ # For Blender put the libraries next to the Blender executable.
+ #
+ # Note that the installation path in the delayed_install is relative to the versioned folder,
+ # which means we need to go one level up.
+ set(cycles_oneapi_target_path "../")
+ endif()
+
# install dynamic libraries required at runtime
if(WIN32)
- set(SYCL_RUNTIME_DEPENDENCIES
- sycl.dll
- pi_level_zero.dll
- )
- if(NOT WITH_BLENDER)
- # For the Cycles standalone put libraries next to the Cycles application.
- delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH})
- else()
- # For Blender put the libraries next to the Blender executable.
- #
- # Note that the installation path in the delayed_install is relative to the versioned folder,
- # which means we need to go one level up.
- delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" "../")
- endif()
+ delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path})
elseif(UNIX AND NOT APPLE)
- file(GLOB SYCL_RUNTIME_DEPENDENCIES
- ${sycl_compiler_root}/../lib/libsycl.so
- ${sycl_compiler_root}/../lib/libsycl.so.[0-9]
- ${sycl_compiler_root}/../lib/libsycl.so.[0-9].[0-9].[0-9]-[0-9]
- )
- list(APPEND SYCL_RUNTIME_DEPENDENCIES ${sycl_compiler_root}/../lib/libpi_level_zero.so)
- delayed_install("" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH}/lib)
+ delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path}/lib)
endif()
- delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH}/lib)
add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})
endif()
diff --git a/intern/cycles/kernel/device/oneapi/dll_interface_template.h b/intern/cycles/kernel/device/oneapi/dll_interface_template.h
deleted file mode 100644
index 5dd0d4203a4..00000000000
--- a/intern/cycles/kernel/device/oneapi/dll_interface_template.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2022 Intel Corporation */
-
-/* device_capabilities() returns a C string that must be free'd with oneapi_free(). */
-DLL_INTERFACE_CALL(oneapi_device_capabilities, char *)
-DLL_INTERFACE_CALL(oneapi_free, void, void *)
-DLL_INTERFACE_CALL(oneapi_get_memcapacity, size_t, SyclQueue *queue)
-
-DLL_INTERFACE_CALL(oneapi_get_num_multiprocessors, int, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_get_max_num_threads_per_multiprocessor, int, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_iterate_devices, void, OneAPIDeviceIteratorCallback cb, void *user_ptr)
-DLL_INTERFACE_CALL(oneapi_set_error_cb, void, OneAPIErrorCallback, void *user_ptr)
-
-DLL_INTERFACE_CALL(oneapi_create_queue, bool, SyclQueue *&external_queue, int device_index)
-DLL_INTERFACE_CALL(oneapi_free_queue, void, SyclQueue *queue)
-DLL_INTERFACE_CALL(
- oneapi_usm_aligned_alloc_host, void *, SyclQueue *queue, size_t memory_size, size_t alignment)
-DLL_INTERFACE_CALL(oneapi_usm_alloc_device, void *, SyclQueue *queue, size_t memory_size)
-DLL_INTERFACE_CALL(oneapi_usm_free, void, SyclQueue *queue, void *usm_ptr)
-
-DLL_INTERFACE_CALL(
- oneapi_usm_memcpy, bool, SyclQueue *queue, void *dest, void *src, size_t num_bytes)
-DLL_INTERFACE_CALL(oneapi_queue_synchronize, bool, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_usm_memset,
- bool,
- SyclQueue *queue,
- void *usm_ptr,
- unsigned char value,
- size_t num_bytes)
-
-DLL_INTERFACE_CALL(oneapi_run_test_kernel, bool, SyclQueue *queue)
-
-/* Operation with Kernel globals structure - map of global/constant allocation - filled before
- * render/kernel execution As we don't know in cycles `sizeof` this - Cycles will manage just as
- * pointer. */
-DLL_INTERFACE_CALL(oneapi_kernel_globals_size, bool, SyclQueue *queue, size_t &kernel_global_size)
-DLL_INTERFACE_CALL(oneapi_set_global_memory,
- void,
- SyclQueue *queue,
- void *kernel_globals,
- const char *memory_name,
- void *memory_device_pointer)
-
-DLL_INTERFACE_CALL(oneapi_kernel_preferred_local_size,
- size_t,
- SyclQueue *queue,
- const DeviceKernel kernel,
- const size_t kernel_global_size)
-DLL_INTERFACE_CALL(oneapi_enqueue_kernel,
- bool,
- KernelContext *context,
- int kernel,
- size_t global_size,
- void **args)
diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp
index 3c7a9960588..1d1700f036d 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@@ -3,7 +3,6 @@
#ifdef WITH_ONEAPI
-/* clang-format off */
# include "kernel.h"
# include <iostream>
# include <map>
@@ -16,163 +15,16 @@
# include "kernel/device/oneapi/kernel_templates.h"
# include "kernel/device/gpu/kernel.h"
-/* clang-format on */
static OneAPIErrorCallback s_error_cb = nullptr;
static void *s_error_user_ptr = nullptr;
-static std::vector<sycl::device> oneapi_available_devices();
-
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
{
s_error_cb = cb;
s_error_user_ptr = user_ptr;
}
-void oneapi_check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_host = false)
-{
-# ifdef _DEBUG
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- sycl::info::device_type device_type =
- queue->get_device().get_info<sycl::info::device::device_type>();
- sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
- (void)usm_type;
- assert(usm_type == sycl::usm::alloc::device ||
- ((device_type == sycl::info::device_type::host ||
- device_type == sycl::info::device_type::is_cpu || allow_host) &&
- usm_type == sycl::usm::alloc::host));
-# endif
-}
-
-bool oneapi_create_queue(SyclQueue *&external_queue, int device_index)
-{
- bool finished_correct = true;
- try {
- std::vector<sycl::device> devices = oneapi_available_devices();
- if (device_index < 0 || device_index >= devices.size()) {
- return false;
- }
- sycl::queue *created_queue = new sycl::queue(devices[device_index],
- sycl::property::queue::in_order());
- external_queue = reinterpret_cast<SyclQueue *>(created_queue);
- }
- catch (sycl::exception const &e) {
- finished_correct = false;
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- }
- return finished_correct;
-}
-
-void oneapi_free_queue(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- delete queue;
-}
-
-void *oneapi_usm_aligned_alloc_host(SyclQueue *queue_, size_t memory_size, size_t alignment)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- return sycl::aligned_alloc_host(alignment, memory_size, *queue);
-}
-
-void *oneapi_usm_alloc_device(SyclQueue *queue_, size_t memory_size)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- return sycl::malloc_device(memory_size, *queue);
-}
-
-void oneapi_usm_free(SyclQueue *queue_, void *usm_ptr)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, usm_ptr, true);
- sycl::free(usm_ptr, *queue);
-}
-
-bool oneapi_usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_bytes)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, dest, true);
- oneapi_check_usm(queue_, src, true);
- sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
-# ifdef WITH_CYCLES_DEBUG
- try {
- /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
- * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
- */
- mem_event.wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-# else
- sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
- sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
- bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
- src_type == sycl::usm::alloc::device;
- bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
- src_type == sycl::usm::alloc::unknown;
- /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
- * may not wait until the end of the transfer before using the memory.
- */
- if (from_device_to_host || host_or_device_memop_with_offset)
- mem_event.wait();
- return true;
-# endif
-}
-
-bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, size_t num_bytes)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, usm_ptr, true);
- sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
-# ifdef WITH_CYCLES_DEBUG
- try {
- /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
- * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
- */
- mem_event.wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-# else
- (void)mem_event;
- return true;
-# endif
-}
-
-bool oneapi_queue_synchronize(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- try {
- queue->wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-}
-
/* NOTE(@nsirgien): Execution of this simple kernel will check basic functionality and
* also trigger runtime compilation of all existing oneAPI kernels */
bool oneapi_run_test_kernel(SyclQueue *queue_)
@@ -216,60 +68,13 @@ bool oneapi_run_test_kernel(SyclQueue *queue_)
return true;
}
-bool oneapi_kernel_globals_size(SyclQueue *queue_, size_t &kernel_global_size)
-{
- kernel_global_size = sizeof(KernelGlobalsGPU);
-
- return true;
-}
-
-void oneapi_set_global_memory(SyclQueue *queue_,
- void *kernel_globals,
- const char *memory_name,
- void *memory_device_pointer)
-{
- assert(queue_);
- assert(kernel_globals);
- assert(memory_name);
- assert(memory_device_pointer);
- KernelGlobalsGPU *globals = (KernelGlobalsGPU *)kernel_globals;
- oneapi_check_usm(queue_, memory_device_pointer);
- oneapi_check_usm(queue_, kernel_globals, true);
-
- std::string matched_name(memory_name);
-
-/* This macro will change global ptr of KernelGlobals via name matching. */
-# define KERNEL_DATA_ARRAY(type, name) \
- else if (#name == matched_name) \
- { \
- globals->__##name = (type *)memory_device_pointer; \
- return; \
- }
- if (false) {
- }
- else if ("integrator_state" == matched_name) {
- globals->integrator_state = (IntegratorStateGPU *)memory_device_pointer;
- return;
- }
- KERNEL_DATA_ARRAY(KernelData, data)
-# include "kernel/data_arrays.h"
- else
- {
- std::cerr << "Can't found global/constant memory with name \"" << matched_name << "\"!"
- << std::endl;
- assert(false);
- }
-# undef KERNEL_DATA_ARRAY
-}
-
/* TODO: Move device information to OneapiDevice initialized on creation and use it. */
/* TODO: Move below function to oneapi/queue.cpp. */
-size_t oneapi_kernel_preferred_local_size(SyclQueue *queue_,
+size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
const DeviceKernel kernel,
const size_t kernel_global_size)
{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ assert(queue);
(void)kernel_global_size;
const static size_t preferred_work_group_size_intersect_shading = 32;
const static size_t preferred_work_group_size_technical = 1024;
@@ -311,8 +116,10 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue_,
preferred_work_group_size = 512;
}
- const size_t limit_work_group_size =
- queue->get_device().get_info<sycl::info::device::max_work_group_size>();
+ const size_t limit_work_group_size = reinterpret_cast<sycl::queue *>(queue)
+ ->get_device()
+ .get_info<sycl::info::device::max_work_group_size>();
+
return std::min(limit_work_group_size, preferred_work_group_size);
}
@@ -664,266 +471,4 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
# endif
return success;
}
-
-/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
- * since Windows driver 101.3268. */
-/* The same min compute-runtime version is currently required across Windows and Linux.
- * For Windows driver 101.3430, compute-runtime version is 23904. */
-static const int lowest_supported_driver_version_win = 1013430;
-static const int lowest_supported_driver_version_neo = 23904;
-
-static int parse_driver_build_version(const sycl::device &device)
-{
- const std::string &driver_version = device.get_info<sycl::info::device::driver_version>();
- int driver_build_version = 0;
-
- size_t second_dot_position = driver_version.find('.', driver_version.find('.') + 1);
- if (second_dot_position == std::string::npos) {
- std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
- << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
- << " xx.xx.xxx.xxxx (Windows) for device \""
- << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
- }
- else {
- try {
- size_t third_dot_position = driver_version.find('.', second_dot_position + 1);
- if (third_dot_position != std::string::npos) {
- const std::string &third_number_substr = driver_version.substr(
- second_dot_position + 1, third_dot_position - second_dot_position - 1);
- const std::string &forth_number_substr = driver_version.substr(third_dot_position + 1);
- if (third_number_substr.length() == 3 && forth_number_substr.length() == 4)
- driver_build_version = std::stoi(third_number_substr) * 10000 +
- std::stoi(forth_number_substr);
- }
- else {
- const std::string &third_number_substr = driver_version.substr(second_dot_position + 1);
- driver_build_version = std::stoi(third_number_substr);
- }
- }
- catch (std::invalid_argument &e) {
- std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
- << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
- << " xx.xx.xxx.xxxx (Windows) for device \""
- << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
- }
- }
-
- return driver_build_version;
-}
-
-static std::vector<sycl::device> oneapi_available_devices()
-{
- bool allow_all_devices = false;
- if (getenv("CYCLES_ONEAPI_ALL_DEVICES") != nullptr)
- allow_all_devices = true;
-
- /* Host device is useful only for debugging at the moment
- * so we hide this device with default build settings. */
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
- bool allow_host = true;
-# else
- bool allow_host = false;
-# endif
-
- const std::vector<sycl::platform> &oneapi_platforms = sycl::platform::get_platforms();
-
- std::vector<sycl::device> available_devices;
- for (const sycl::platform &platform : oneapi_platforms) {
- /* ignore OpenCL platforms to avoid using the same devices through both Level-Zero and OpenCL.
- */
- if (platform.get_backend() == sycl::backend::opencl) {
- continue;
- }
-
- const std::vector<sycl::device> &oneapi_devices =
- (allow_all_devices || allow_host) ? platform.get_devices(sycl::info::device_type::all) :
- platform.get_devices(sycl::info::device_type::gpu);
-
- for (const sycl::device &device : oneapi_devices) {
- if (allow_all_devices) {
- /* still filter out host device if build doesn't support it. */
- if (allow_host || !device.is_host()) {
- available_devices.push_back(device);
- }
- }
- else {
- bool filter_out = false;
-
- /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU,
- * assuming they have either more than 96 Execution Units or not 7 threads per EU.
- * Official support can be broaden to older and smaller GPUs once ready. */
- if (device.is_gpu() && platform.get_backend() == sycl::backend::ext_oneapi_level_zero) {
- /* Filtered-out defaults in-case these values aren't available through too old L0
- * runtime. */
- int number_of_eus = 96;
- int threads_per_eu = 7;
- if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
- number_of_eus = device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
- }
- if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
- threads_per_eu =
- device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
- }
- /* This filters out all Level-Zero supported GPUs from older generation than Arc. */
- if (number_of_eus <= 96 && threads_per_eu == 7) {
- filter_out = true;
- }
- /* if not already filtered out, check driver version. */
- if (!filter_out) {
- int driver_build_version = parse_driver_build_version(device);
- if ((driver_build_version > 100000 &&
- driver_build_version < lowest_supported_driver_version_win) ||
- driver_build_version < lowest_supported_driver_version_neo) {
- filter_out = true;
- }
- }
- }
- else if (!allow_host && device.is_host()) {
- filter_out = true;
- }
- else if (!allow_all_devices) {
- filter_out = true;
- }
-
- if (!filter_out) {
- available_devices.push_back(device);
- }
- }
- }
- }
-
- return available_devices;
-}
-
-char *oneapi_device_capabilities()
-{
- std::stringstream capabilities;
-
- const std::vector<sycl::device> &oneapi_devices = oneapi_available_devices();
- for (const sycl::device &device : oneapi_devices) {
- const std::string &name = device.get_info<sycl::info::device::name>();
-
- capabilities << std::string("\t") << name << "\n";
-# define WRITE_ATTR(attribute_name, attribute_variable) \
- capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \
- << "\n";
-# define GET_NUM_ATTR(attribute) \
- { \
- size_t attribute = (size_t)device.get_info<sycl::info::device ::attribute>(); \
- capabilities << "\t\tsycl::info::device::" #attribute "\t\t\t" << attribute << "\n"; \
- }
-
- GET_NUM_ATTR(vendor_id)
- GET_NUM_ATTR(max_compute_units)
- GET_NUM_ATTR(max_work_item_dimensions)
-
- sycl::id<3> max_work_item_sizes =
- device.get_info<sycl::info::device::max_work_item_sizes<3>>();
- WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
- WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
- WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
-
- GET_NUM_ATTR(max_work_group_size)
- GET_NUM_ATTR(max_num_sub_groups)
- GET_NUM_ATTR(sub_group_independent_forward_progress)
-
- GET_NUM_ATTR(preferred_vector_width_char)
- GET_NUM_ATTR(preferred_vector_width_short)
- GET_NUM_ATTR(preferred_vector_width_int)
- GET_NUM_ATTR(preferred_vector_width_long)
- GET_NUM_ATTR(preferred_vector_width_float)
- GET_NUM_ATTR(preferred_vector_width_double)
- GET_NUM_ATTR(preferred_vector_width_half)
-
- GET_NUM_ATTR(native_vector_width_char)
- GET_NUM_ATTR(native_vector_width_short)
- GET_NUM_ATTR(native_vector_width_int)
- GET_NUM_ATTR(native_vector_width_long)
- GET_NUM_ATTR(native_vector_width_float)
- GET_NUM_ATTR(native_vector_width_double)
- GET_NUM_ATTR(native_vector_width_half)
-
- size_t max_clock_frequency =
- (size_t)(device.is_host() ? (size_t)0 :
- device.get_info<sycl::info::device::max_clock_frequency>());
- WRITE_ATTR("max_clock_frequency", max_clock_frequency)
-
- GET_NUM_ATTR(address_bits)
- GET_NUM_ATTR(max_mem_alloc_size)
-
- /* NOTE(@nsirgien): Implementation doesn't use image support as bindless images aren't
- * supported so we always return false, even if device supports HW texture usage acceleration.
- */
- bool image_support = false;
- WRITE_ATTR("image_support", (size_t)image_support)
-
- GET_NUM_ATTR(max_parameter_size)
- GET_NUM_ATTR(mem_base_addr_align)
- GET_NUM_ATTR(global_mem_size)
- GET_NUM_ATTR(local_mem_size)
- GET_NUM_ATTR(error_correction_support)
- GET_NUM_ATTR(profiling_timer_resolution)
- GET_NUM_ATTR(is_available)
-
-# undef GET_NUM_ATTR
-# undef WRITE_ATTR
- capabilities << "\n";
- }
-
- return ::strdup(capabilities.str().c_str());
-}
-
-void oneapi_free(void *p)
-{
- if (p) {
- ::free(p);
- }
-}
-
-void oneapi_iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr)
-{
- int num = 0;
- std::vector<sycl::device> devices = oneapi_available_devices();
- for (sycl::device &device : devices) {
- const std::string &platform_name =
- device.get_platform().get_info<sycl::info::platform::name>();
- std::string name = device.get_info<sycl::info::device::name>();
- std::string id = "ONEAPI_" + platform_name + "_" + name;
- if (device.has(sycl::aspect::ext_intel_pci_address)) {
- id.append("_" + device.get_info<sycl::info::device::ext_intel_pci_address>());
- }
- (cb)(id.c_str(), name.c_str(), num, user_ptr);
- num++;
- }
-}
-
-size_t oneapi_get_memcapacity(SyclQueue *queue)
-{
- return reinterpret_cast<sycl::queue *>(queue)
- ->get_device()
- .get_info<sycl::info::device::global_mem_size>();
-}
-
-int oneapi_get_num_multiprocessors(SyclQueue *queue)
-{
- const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device();
- if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
- return device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
- }
- else
- return 0;
-}
-
-int oneapi_get_max_num_threads_per_multiprocessor(SyclQueue *queue)
-{
- const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device();
- if (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) &&
- device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
- return device.get_info<sycl::info::device::ext_intel_gpu_eu_simd_width>() *
- device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
- }
- else
- return 0;
-}
-
#endif /* WITH_ONEAPI */
diff --git a/intern/cycles/kernel/device/oneapi/kernel.h b/intern/cycles/kernel/device/oneapi/kernel.h
index c5f853742ed..7456d0e4902 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.h
+++ b/intern/cycles/kernel/device/oneapi/kernel.h
@@ -25,11 +25,6 @@ enum DeviceKernel : int;
class SyclQueue;
-typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
- const char *name,
- int num,
- void *user_ptr);
-
typedef void (*OneAPIErrorCallback)(const char *error, void *user_ptr);
struct KernelContext {
@@ -45,13 +40,15 @@ struct KernelContext {
extern "C" {
# endif
-# define DLL_INTERFACE_CALL(function, return_type, ...) \
- CYCLES_KERNEL_ONEAPI_EXPORT return_type function(__VA_ARGS__);
-# include "kernel/device/oneapi/dll_interface_template.h"
-# undef DLL_INTERFACE_CALL
-
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_run_test_kernel(SyclQueue *queue_);
+CYCLES_KERNEL_ONEAPI_EXPORT void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr);
+CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
+ SyclQueue *queue, const DeviceKernel kernel, const size_t kernel_global_size);
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
+ int kernel,
+ size_t global_size,
+ void **args);
# ifdef __cplusplus
}
# endif
-
#endif /* WITH_ONEAPI */
diff --git a/source/creator/CMakeLists.txt b/source/creator/CMakeLists.txt
index 13a311cb55c..caaffa283ba 100644
--- a/source/creator/CMakeLists.txt
+++ b/source/creator/CMakeLists.txt
@@ -1076,6 +1076,13 @@ elseif(WIN32)
DESTINATION ${TARGETDIR_VER}/python/lib/site-packages
)
endif()
+
+ if(PLATFORM_BUNDLED_LIBRARIES)
+ install(
+ FILES ${PLATFORM_BUNDLED_LIBRARIES}
+ DESTINATION ${TARGETDIR_LIB}
+ )
+ endif()
elseif(APPLE)
if(NOT WITH_PYTHON_MODULE)
# Uppercase name for app bundle.