diff options
Diffstat (limited to 'intern/cycles/device/oneapi')
-rw-r--r-- | intern/cycles/device/oneapi/device.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/device/oneapi/device_impl.cpp | 72 | ||||
-rw-r--r-- | intern/cycles/device/oneapi/device_impl.h | 4 | ||||
-rw-r--r-- | intern/cycles/device/oneapi/queue.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/oneapi/queue.h | 2 |
5 files changed, 39 insertions, 45 deletions
diff --git a/intern/cycles/device/oneapi/device.cpp b/intern/cycles/device/oneapi/device.cpp index f303ab41627..66d6f749e30 100644 --- a/intern/cycles/device/oneapi/device.cpp +++ b/intern/cycles/device/oneapi/device.cpp @@ -39,7 +39,7 @@ bool device_oneapi_init() _putenv_s("SYCL_CACHE_THRESHOLD", "0"); } if (getenv("SYCL_DEVICE_FILTER") == nullptr) { - _putenv_s("SYCL_DEVICE_FILTER", "host,level_zero"); + _putenv_s("SYCL_DEVICE_FILTER", "level_zero"); } if (getenv("SYCL_ENABLE_PCI") == nullptr) { _putenv_s("SYCL_ENABLE_PCI", "1"); @@ -50,7 +50,7 @@ bool device_oneapi_init() # elif __linux__ setenv("SYCL_CACHE_PERSISTENT", "1", false); setenv("SYCL_CACHE_THRESHOLD", "0", false); - setenv("SYCL_DEVICE_FILTER", "host,level_zero", false); + setenv("SYCL_DEVICE_FILTER", "level_zero", false); setenv("SYCL_ENABLE_PCI", "1", false); setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false); # endif diff --git a/intern/cycles/device/oneapi/device_impl.cpp b/intern/cycles/device/oneapi/device_impl.cpp index 2df605fa047..3588b75713b 100644 --- a/intern/cycles/device/oneapi/device_impl.cpp +++ b/intern/cycles/device/oneapi/device_impl.cpp @@ -43,7 +43,7 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi } size_t globals_segment_size; - is_finished_ok = kernel_globals_size(device_queue_, globals_segment_size); + is_finished_ok = kernel_globals_size(globals_segment_size); if (is_finished_ok == false) { set_error("oneAPI constant memory initialization got runtime exception \"" + oneapi_error_string_ + "\""); @@ -88,18 +88,26 @@ BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const bool OneapiDevice::load_kernels(const uint requested_features) { assert(device_queue_); - /* NOTE(@nsirgien): oneAPI can support compilation of kernel code with certain feature set - * with specialization constants, but it hasn't been implemented yet. */ - (void)requested_features; bool is_finished_ok = oneapi_run_test_kernel(device_queue_); if (is_finished_ok == false) { - set_error("oneAPI kernel load: got runtime exception \"" + oneapi_error_string_ + "\""); + set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ + + "\""); + return false; } else { - VLOG_INFO << "Runtime compilation done for \"" << info.description << "\""; + VLOG_INFO << "Test kernel has been executed successfully for \"" << info.description << "\""; assert(device_queue_); } + + is_finished_ok = oneapi_load_kernels(device_queue_, (const unsigned int)requested_features); + if (is_finished_ok == false) { + set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\""); + } + else { + VLOG_INFO << "Kernels loading (compilation) has been done for \"" << info.description << "\""; + } + return is_finished_ok; } @@ -422,9 +430,14 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_ sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context()); (void)usm_type; assert(usm_type == sycl::usm::alloc::device || - ((device_type == sycl::info::device_type::host || - device_type == sycl::info::device_type::cpu || allow_host) && - usm_type == sycl::usm::alloc::host)); + ((device_type == sycl::info::device_type::cpu || allow_host) && + usm_type == sycl::usm::alloc::host || + usm_type == sycl::usm::alloc::unknown)); +# else + /* Silence warning about unused arguments. */ + (void)queue_; + (void)usm_ptr; + (void)allow_host; # endif } @@ -552,7 +565,7 @@ bool OneapiDevice::queue_synchronize(SyclQueue *queue_) } } -bool OneapiDevice::kernel_globals_size(SyclQueue *queue_, size_t &kernel_global_size) +bool OneapiDevice::kernel_globals_size(size_t &kernel_global_size) { kernel_global_size = sizeof(KernelGlobalsGPU); @@ -658,14 +671,6 @@ std::vector<sycl::device> OneapiDevice::available_devices() if (getenv("CYCLES_ONEAPI_ALL_DEVICES") != nullptr) allow_all_devices = true; - /* Host device is useful only for debugging at the moment - * so we hide this device with default build settings. */ -# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED - bool allow_host = true; -# else - bool allow_host = false; -# endif - const std::vector<sycl::platform> &oneapi_platforms = sycl::platform::get_platforms(); std::vector<sycl::device> available_devices; @@ -677,17 +682,11 @@ std::vector<sycl::device> OneapiDevice::available_devices() } const std::vector<sycl::device> &oneapi_devices = - (allow_all_devices || allow_host) ? platform.get_devices(sycl::info::device_type::all) : - platform.get_devices(sycl::info::device_type::gpu); + (allow_all_devices) ? platform.get_devices(sycl::info::device_type::all) : + platform.get_devices(sycl::info::device_type::gpu); for (const sycl::device &device : oneapi_devices) { - if (allow_all_devices) { - /* still filter out host device if build doesn't support it. */ - if (allow_host || !device.is_host()) { - available_devices.push_back(device); - } - } - else { + if (!allow_all_devices) { bool filter_out = false; /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU, @@ -699,11 +698,11 @@ std::vector<sycl::device> OneapiDevice::available_devices() int number_of_eus = 96; int threads_per_eu = 7; if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) { - number_of_eus = device.get_info<sycl::info::device::ext_intel_gpu_eu_count>(); + number_of_eus = device.get_info<sycl::ext::intel::info::device::gpu_eu_count>(); } if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) { threads_per_eu = - device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>(); + device.get_info<sycl::ext::intel::info::device::gpu_hw_threads_per_eu>(); } /* This filters out all Level-Zero supported GPUs from older generation than Arc. */ if (number_of_eus <= 96 && threads_per_eu == 7) { @@ -719,9 +718,6 @@ std::vector<sycl::device> OneapiDevice::available_devices() } } } - else if (!allow_host && device.is_host()) { - filter_out = true; - } else if (!allow_all_devices) { filter_out = true; } @@ -784,9 +780,7 @@ char *OneapiDevice::device_capabilities() GET_NUM_ATTR(native_vector_width_double) GET_NUM_ATTR(native_vector_width_half) - size_t max_clock_frequency = - (size_t)(device.is_host() ? (size_t)0 : - device.get_info<sycl::info::device::max_clock_frequency>()); + size_t max_clock_frequency = device.get_info<sycl::info::device::max_clock_frequency>(); WRITE_ATTR("max_clock_frequency", max_clock_frequency) GET_NUM_ATTR(address_bits) @@ -824,7 +818,7 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p std::string name = device.get_info<sycl::info::device::name>(); std::string id = "ONEAPI_" + platform_name + "_" + name; if (device.has(sycl::aspect::ext_intel_pci_address)) { - id.append("_" + device.get_info<sycl::info::device::ext_intel_pci_address>()); + id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>()); } (cb)(id.c_str(), name.c_str(), num, user_ptr); num++; @@ -842,7 +836,7 @@ int OneapiDevice::get_num_multiprocessors() { const sycl::device &device = reinterpret_cast<sycl::queue *>(device_queue_)->get_device(); if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) { - return device.get_info<sycl::info::device::ext_intel_gpu_eu_count>(); + return device.get_info<sycl::ext::intel::info::device::gpu_eu_count>(); } else return 0; @@ -853,8 +847,8 @@ int OneapiDevice::get_max_num_threads_per_multiprocessor() const sycl::device &device = reinterpret_cast<sycl::queue *>(device_queue_)->get_device(); if (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) && device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) { - return device.get_info<sycl::info::device::ext_intel_gpu_eu_simd_width>() * - device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>(); + return device.get_info<sycl::ext::intel::info::device::gpu_eu_simd_width>() * + device.get_info<sycl::ext::intel::info::device::gpu_hw_threads_per_eu>(); } else return 0; diff --git a/intern/cycles/device/oneapi/device_impl.h b/intern/cycles/device/oneapi/device_impl.h index 3589e881a6e..197cf03d60d 100644 --- a/intern/cycles/device/oneapi/device_impl.h +++ b/intern/cycles/device/oneapi/device_impl.h @@ -3,7 +3,7 @@ #ifdef WITH_ONEAPI -# include <CL/sycl.hpp> +# include <sycl/sycl.hpp> # include "device/device.h" # include "device/oneapi/device.h" @@ -104,7 +104,7 @@ class OneapiDevice : public Device { int get_num_multiprocessors(); int get_max_num_threads_per_multiprocessor(); bool queue_synchronize(SyclQueue *queue); - bool kernel_globals_size(SyclQueue *queue, size_t &kernel_global_size); + bool kernel_globals_size(size_t &kernel_global_size); void set_global_memory(SyclQueue *queue, void *kernel_globals, const char *memory_name, diff --git a/intern/cycles/device/oneapi/queue.cpp b/intern/cycles/device/oneapi/queue.cpp index 9632b14d485..3d019661aa8 100644 --- a/intern/cycles/device/oneapi/queue.cpp +++ b/intern/cycles/device/oneapi/queue.cpp @@ -43,7 +43,7 @@ int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const return num_states; } -int OneapiDeviceQueue::num_concurrent_busy_states() const +int OneapiDeviceQueue::num_concurrent_busy_states(const size_t /*state_size*/) const { const int max_num_threads = oneapi_device_->get_num_multiprocessors() * oneapi_device_->get_max_num_threads_per_multiprocessor(); diff --git a/intern/cycles/device/oneapi/queue.h b/intern/cycles/device/oneapi/queue.h index 32363bf2a6e..bbd947b49cb 100644 --- a/intern/cycles/device/oneapi/queue.h +++ b/intern/cycles/device/oneapi/queue.h @@ -25,7 +25,7 @@ class OneapiDeviceQueue : public DeviceQueue { virtual int num_concurrent_states(const size_t state_size) const override; - virtual int num_concurrent_busy_states() const override; + virtual int num_concurrent_busy_states(const size_t state_size) const override; virtual void init_execution() override; |