/* SPDX-License-Identifier: Apache-2.0 * Copyright 2021-2022 Intel Corporation */ #ifdef WITH_ONEAPI # include "device/oneapi/device_impl.h" # include "util/debug.h" # include "util/log.h" # include "kernel/device/oneapi/globals.h" CCL_NAMESPACE_BEGIN static void queue_error_cb(const char *message, void *user_ptr) { if (user_ptr) { *reinterpret_cast(user_ptr) = message; } } OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) : Device(info, stats, profiler), device_queue_(nullptr), texture_info_(this, "texture_info", MEM_GLOBAL), kg_memory_(nullptr), kg_memory_device_(nullptr), kg_memory_size_(0) { need_texture_info_ = false; oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_); bool is_finished_ok = create_queue(device_queue_, info.num); if (is_finished_ok == false) { set_error("oneAPI queue initialization error: got runtime exception \"" + oneapi_error_string_ + "\""); } else { VLOG_DEBUG << "oneAPI queue has been successfully created for the device \"" << info.description << "\""; assert(device_queue_); } size_t globals_segment_size; is_finished_ok = kernel_globals_size(globals_segment_size); if (is_finished_ok == false) { set_error("oneAPI constant memory initialization got runtime exception \"" + oneapi_error_string_ + "\""); } else { VLOG_DEBUG << "Successfully created global/constant memory segment (kernel globals object)"; } kg_memory_ = usm_aligned_alloc_host(device_queue_, globals_segment_size, 16); usm_memset(device_queue_, kg_memory_, 0, globals_segment_size); kg_memory_device_ = usm_alloc_device(device_queue_, globals_segment_size); kg_memory_size_ = globals_segment_size; max_memory_on_device_ = get_memcapacity(); } OneapiDevice::~OneapiDevice() { texture_info_.free(); usm_free(device_queue_, kg_memory_); usm_free(device_queue_, kg_memory_device_); for (ConstMemMap::iterator mt = const_mem_map_.begin(); mt != const_mem_map_.end(); mt++) delete mt->second; if (device_queue_) free_queue(device_queue_); } bool OneapiDevice::check_peer_access(Device * /*peer_device*/) { return false; } BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const { return BVH_LAYOUT_BVH2; } bool OneapiDevice::load_kernels(const uint requested_features) { assert(device_queue_); bool is_finished_ok = oneapi_run_test_kernel(device_queue_); if (is_finished_ok == false) { set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ + "\""); return false; } else { VLOG_INFO << "Test kernel has been executed successfully for \"" << info.description << "\""; assert(device_queue_); } is_finished_ok = oneapi_load_kernels(device_queue_, (const unsigned int)requested_features); if (is_finished_ok == false) { set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\""); } else { VLOG_INFO << "Kernels loading (compilation) has been done for \"" << info.description << "\""; } return is_finished_ok; } void OneapiDevice::load_texture_info() { if (need_texture_info_) { need_texture_info_ = false; texture_info_.copy_to_device(); } } void OneapiDevice::generic_alloc(device_memory &mem) { size_t memory_size = mem.memory_size(); /* TODO(@nsirgien): In future, if scene doesn't fit into device memory, then * we can use USM host memory. * Because of the expected performance impact, implementation of this has had a low priority * and is not implemented yet. */ assert(device_queue_); /* NOTE(@nsirgien): There are three types of Unified Shared Memory (USM) in oneAPI: host, device * and shared. For new project it maybe more beneficial to use USM shared memory, because it * provides automatic migration mechanism in order to allow to use the same pointer on host and * on device, without need to worry about explicit memory transfer operations. But for * Blender/Cycles this type of memory is not very suitable in current application architecture, * because Cycles already uses two different pointer for host activity and device activity, and * also has to perform all needed memory transfer operations. So, USM device memory * type has been used for oneAPI device in order to better fit in Cycles architecture. */ void *device_pointer = nullptr; if (mem.memory_size() + stats.mem_used < max_memory_on_device_) device_pointer = usm_alloc_device(device_queue_, memory_size); if (device_pointer == nullptr) { set_error("oneAPI kernel - device memory allocation error for " + string_human_readable_size(mem.memory_size()) + ", possibly caused by lack of available memory space on the device: " + string_human_readable_size(stats.mem_used) + " of " + string_human_readable_size(max_memory_on_device_) + " is already allocated"); } mem.device_pointer = reinterpret_cast(device_pointer); mem.device_size = memory_size; stats.mem_alloc(memory_size); } void OneapiDevice::generic_copy_to(device_memory &mem) { if (!mem.device_pointer) { return; } size_t memory_size = mem.memory_size(); /* Copy operation from host shouldn't be requested if there is no memory allocated on host. */ assert(mem.host_pointer); assert(device_queue_); usm_memcpy(device_queue_, (void *)mem.device_pointer, (void *)mem.host_pointer, memory_size); } /* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */ SyclQueue *OneapiDevice::sycl_queue() { return device_queue_; } string OneapiDevice::oneapi_error_message() { return string(oneapi_error_string_); } void *OneapiDevice::kernel_globals_device_pointer() { return kg_memory_device_; } void OneapiDevice::generic_free(device_memory &mem) { if (!mem.device_pointer) { return; } stats.mem_free(mem.device_size); mem.device_size = 0; assert(device_queue_); usm_free(device_queue_, (void *)mem.device_pointer); mem.device_pointer = 0; } void OneapiDevice::mem_alloc(device_memory &mem) { if (mem.type == MEM_TEXTURE) { assert(!"mem_alloc not supported for textures."); } else if (mem.type == MEM_GLOBAL) { assert(!"mem_alloc not supported for global memory."); } else { if (mem.name) { VLOG_DEBUG << "OneapiDevice::mem_alloc: \"" << mem.name << "\", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } generic_alloc(mem); } } void OneapiDevice::mem_copy_to(device_memory &mem) { if (mem.name) { VLOG_DEBUG << "OneapiDevice::mem_copy_to: \"" << mem.name << "\", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } if (mem.type == MEM_GLOBAL) { global_free(mem); global_alloc(mem); } else if (mem.type == MEM_TEXTURE) { tex_free((device_texture &)mem); tex_alloc((device_texture &)mem); } else { if (!mem.device_pointer) mem_alloc(mem); generic_copy_to(mem); } } void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) { if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) { assert(!"mem_copy_from not supported for textures."); } else if (mem.host_pointer) { const size_t size = (w > 0 || h > 0 || elem > 0) ? (elem * w * h) : mem.memory_size(); const size_t offset = elem * y * w; if (mem.name) { VLOG_DEBUG << "OneapiDevice::mem_copy_from: \"" << mem.name << "\" object of " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ") from offset " << offset << " data " << size << " bytes"; } assert(device_queue_); assert(size != 0); if (mem.device_pointer) { char *shifted_host = reinterpret_cast(mem.host_pointer) + offset; char *shifted_device = reinterpret_cast(mem.device_pointer) + offset; bool is_finished_ok = usm_memcpy(device_queue_, shifted_host, shifted_device, size); if (is_finished_ok == false) { set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ + "\""); } } } } void OneapiDevice::mem_zero(device_memory &mem) { if (mem.name) { VLOG_DEBUG << "OneapiDevice::mem_zero: \"" << mem.name << "\", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")\n"; } if (!mem.device_pointer) { mem_alloc(mem); } if (!mem.device_pointer) { return; } assert(device_queue_); bool is_finished_ok = usm_memset( device_queue_, (void *)mem.device_pointer, 0, mem.memory_size()); if (is_finished_ok == false) { set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ + "\""); } } void OneapiDevice::mem_free(device_memory &mem) { if (mem.name) { VLOG_DEBUG << "OneapiDevice::mem_free: \"" << mem.name << "\", " << string_human_readable_number(mem.device_size) << " bytes. (" << string_human_readable_size(mem.device_size) << ")\n"; } if (mem.type == MEM_GLOBAL) { global_free(mem); } else if (mem.type == MEM_TEXTURE) { tex_free((device_texture &)mem); } else { generic_free(mem); } } device_ptr OneapiDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) { return reinterpret_cast(reinterpret_cast(mem.device_pointer) + mem.memory_elements_size(offset)); } void OneapiDevice::const_copy_to(const char *name, void *host, size_t size) { assert(name); VLOG_DEBUG << "OneapiDevice::const_copy_to \"" << name << "\" object " << string_human_readable_number(size) << " bytes. (" << string_human_readable_size(size) << ")"; ConstMemMap::iterator i = const_mem_map_.find(name); device_vector *data; if (i == const_mem_map_.end()) { data = new device_vector(this, name, MEM_READ_ONLY); data->alloc(size); const_mem_map_.insert(ConstMemMap::value_type(name, data)); } else { data = i->second; } assert(data->memory_size() <= size); memcpy(data->data(), host, size); data->copy_to_device(); set_global_memory(device_queue_, kg_memory_, name, (void *)data->device_pointer); usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_); } void OneapiDevice::global_alloc(device_memory &mem) { assert(mem.name); size_t size = mem.memory_size(); VLOG_DEBUG << "OneapiDevice::global_alloc \"" << mem.name << "\" object " << string_human_readable_number(size) << " bytes. (" << string_human_readable_size(size) << ")"; generic_alloc(mem); generic_copy_to(mem); set_global_memory(device_queue_, kg_memory_, mem.name, (void *)mem.device_pointer); usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_); } void OneapiDevice::global_free(device_memory &mem) { if (mem.device_pointer) { generic_free(mem); } } void OneapiDevice::tex_alloc(device_texture &mem) { generic_alloc(mem); generic_copy_to(mem); /* Resize if needed. Also, in case of resize - allocate in advance for future allocs. */ const uint slot = mem.slot; if (slot >= texture_info_.size()) { texture_info_.resize(slot + 128); } texture_info_[slot] = mem.info; need_texture_info_ = true; texture_info_[slot].data = (uint64_t)mem.device_pointer; } void OneapiDevice::tex_free(device_texture &mem) { /* There is no texture memory in SYCL. */ if (mem.device_pointer) { generic_free(mem); } } unique_ptr OneapiDevice::gpu_queue_create() { return make_unique(this); } bool OneapiDevice::should_use_graphics_interop() { /* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so * return false. */ return false; } void *OneapiDevice::usm_aligned_alloc_host(size_t memory_size, size_t alignment) { assert(device_queue_); return usm_aligned_alloc_host(device_queue_, memory_size, alignment); } void OneapiDevice::usm_free(void *usm_ptr) { assert(device_queue_); return usm_free(device_queue_, usm_ptr); } void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_host = false) { # ifdef _DEBUG sycl::queue *queue = reinterpret_cast(queue_); sycl::info::device_type device_type = queue->get_device().get_info(); sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context()); (void)usm_type; assert(usm_type == sycl::usm::alloc::device || ((device_type == sycl::info::device_type::cpu || allow_host) && usm_type == sycl::usm::alloc::host || usm_type == sycl::usm::alloc::unknown)); # else /* Silence warning about unused arguments. */ (void)queue_; (void)usm_ptr; (void)allow_host; # endif } bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index) { bool finished_correct = true; try { std::vector devices = OneapiDevice::available_devices(); if (device_index < 0 || device_index >= devices.size()) { return false; } sycl::queue *created_queue = new sycl::queue(devices[device_index], sycl::property::queue::in_order()); external_queue = reinterpret_cast(created_queue); } catch (sycl::exception const &e) { finished_correct = false; oneapi_error_string_ = e.what(); } return finished_correct; } void OneapiDevice::free_queue(SyclQueue *queue_) { assert(queue_); sycl::queue *queue = reinterpret_cast(queue_); delete queue; } void *OneapiDevice::usm_aligned_alloc_host(SyclQueue *queue_, size_t memory_size, size_t alignment) { assert(queue_); sycl::queue *queue = reinterpret_cast(queue_); return sycl::aligned_alloc_host(alignment, memory_size, *queue); } void *OneapiDevice::usm_alloc_device(SyclQueue *queue_, size_t memory_size) { assert(queue_); sycl::queue *queue = reinterpret_cast(queue_); return sycl::malloc_device(memory_size, *queue); } void OneapiDevice::usm_free(SyclQueue *queue_, void *usm_ptr) { assert(queue_); sycl::queue *queue = reinterpret_cast(queue_); OneapiDevice::check_usm(queue_, usm_ptr, true); sycl::free(usm_ptr, *queue); } bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_bytes) { assert(queue_); sycl::queue *queue = reinterpret_cast(queue_); OneapiDevice::check_usm(queue_, dest, true); OneapiDevice::check_usm(queue_, src, true); sycl::event mem_event = queue->memcpy(dest, src, num_bytes); # ifdef WITH_CYCLES_DEBUG try { /* NOTE(@nsirgien) Waiting on memory operation may give more precise error * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug. */ mem_event.wait_and_throw(); return true; } catch (sycl::exception const &e) { oneapi_error_string_ = e.what(); return false; } # else sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context()); sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context()); bool from_device_to_host = dest_type == sycl::usm::alloc::host && src_type == sycl::usm::alloc::device; bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown || src_type == sycl::usm::alloc::unknown; /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host * may not wait until the end of the transfer before using the memory. */ if (from_device_to_host || host_or_device_memop_with_offset) mem_event.wait(); return true; # endif } bool OneapiDevice::usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, size_t num_bytes) { assert(queue_); sycl::queue *queue = reinterpret_cast(queue_); OneapiDevice::check_usm(queue_, usm_ptr, true); sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes); # ifdef WITH_CYCLES_DEBUG try { /* NOTE(@nsirgien) Waiting on memory operation may give more precise error * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug. */ mem_event.wait_and_throw(); return true; } catch (sycl::exception const &e) { oneapi_error_string_ = e.what(); return false; } # else (void)mem_event; return true; # endif } bool OneapiDevice::queue_synchronize(SyclQueue *queue_) { assert(queue_); sycl::queue *queue = reinterpret_cast(queue_); try { queue->wait_and_throw(); return true; } catch (sycl::exception const &e) { oneapi_error_string_ = e.what(); return false; } } bool OneapiDevice::kernel_globals_size(size_t &kernel_global_size) { kernel_global_size = sizeof(KernelGlobalsGPU); return true; } void OneapiDevice::set_global_memory(SyclQueue *queue_, void *kernel_globals, const char *memory_name, void *memory_device_pointer) { assert(queue_); assert(kernel_globals); assert(memory_name); assert(memory_device_pointer); KernelGlobalsGPU *globals = (KernelGlobalsGPU *)kernel_globals; OneapiDevice::check_usm(queue_, memory_device_pointer); OneapiDevice::check_usm(queue_, kernel_globals, true); std::string matched_name(memory_name); /* This macro will change global ptr of KernelGlobals via name matching. */ # define KERNEL_DATA_ARRAY(type, name) \ else if (#name == matched_name) \ { \ globals->__##name = (type *)memory_device_pointer; \ return; \ } if (false) { } else if ("integrator_state" == matched_name) { globals->integrator_state = (IntegratorStateGPU *)memory_device_pointer; return; } KERNEL_DATA_ARRAY(KernelData, data) # include "kernel/data_arrays.h" else { std::cerr << "Can't found global/constant memory with name \"" << matched_name << "\"!" << std::endl; assert(false); } # undef KERNEL_DATA_ARRAY } bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context, int kernel, size_t global_size, void **args) { return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args); } /* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows * since Windows driver 101.3268. */ /* The same min compute-runtime version is currently required across Windows and Linux. * For Windows driver 101.3430, compute-runtime version is 23904. */ static const int lowest_supported_driver_version_win = 1013430; static const int lowest_supported_driver_version_neo = 23904; int OneapiDevice::parse_driver_build_version(const sycl::device &device) { const std::string &driver_version = device.get_info(); int driver_build_version = 0; size_t second_dot_position = driver_version.find('.', driver_version.find('.') + 1); if (second_dot_position == std::string::npos) { std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0)," << " xx.xx.xxx.xxxx (Windows) for device \"" << device.get_info() << "\"." << std::endl; } else { try { size_t third_dot_position = driver_version.find('.', second_dot_position + 1); if (third_dot_position != std::string::npos) { const std::string &third_number_substr = driver_version.substr( second_dot_position + 1, third_dot_position - second_dot_position - 1); const std::string &forth_number_substr = driver_version.substr(third_dot_position + 1); if (third_number_substr.length() == 3 && forth_number_substr.length() == 4) driver_build_version = std::stoi(third_number_substr) * 10000 + std::stoi(forth_number_substr); } else { const std::string &third_number_substr = driver_version.substr(second_dot_position + 1); driver_build_version = std::stoi(third_number_substr); } } catch (std::invalid_argument &) { std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0)," << " xx.xx.xxx.xxxx (Windows) for device \"" << device.get_info() << "\"." << std::endl; } } return driver_build_version; } std::vector OneapiDevice::available_devices() { bool allow_all_devices = false; if (getenv("CYCLES_ONEAPI_ALL_DEVICES") != nullptr) allow_all_devices = true; const std::vector &oneapi_platforms = sycl::platform::get_platforms(); std::vector available_devices; for (const sycl::platform &platform : oneapi_platforms) { /* ignore OpenCL platforms to avoid using the same devices through both Level-Zero and OpenCL. */ if (platform.get_backend() == sycl::backend::opencl) { continue; } const std::vector &oneapi_devices = (allow_all_devices) ? platform.get_devices(sycl::info::device_type::all) : platform.get_devices(sycl::info::device_type::gpu); for (const sycl::device &device : oneapi_devices) { if (!allow_all_devices) { bool filter_out = false; /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU, * assuming they have either more than 96 Execution Units or not 7 threads per EU. * Official support can be broaden to older and smaller GPUs once ready. */ if (device.is_gpu() && platform.get_backend() == sycl::backend::ext_oneapi_level_zero) { /* Filtered-out defaults in-case these values aren't available through too old L0 * runtime. */ int number_of_eus = 96; int threads_per_eu = 7; if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) { number_of_eus = device.get_info(); } if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) { threads_per_eu = device.get_info(); } /* This filters out all Level-Zero supported GPUs from older generation than Arc. */ if (number_of_eus <= 96 && threads_per_eu == 7) { filter_out = true; } /* if not already filtered out, check driver version. */ if (!filter_out) { int driver_build_version = parse_driver_build_version(device); if ((driver_build_version > 100000 && driver_build_version < lowest_supported_driver_version_win) || driver_build_version < lowest_supported_driver_version_neo) { filter_out = true; } } } else if (!allow_all_devices) { filter_out = true; } if (!filter_out) { available_devices.push_back(device); } } } } return available_devices; } char *OneapiDevice::device_capabilities() { std::stringstream capabilities; const std::vector &oneapi_devices = available_devices(); for (const sycl::device &device : oneapi_devices) { const std::string &name = device.get_info(); capabilities << std::string("\t") << name << "\n"; # define WRITE_ATTR(attribute_name, attribute_variable) \ capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \ << "\n"; # define GET_NUM_ATTR(attribute) \ { \ size_t attribute = (size_t)device.get_info(); \ capabilities << "\t\tsycl::info::device::" #attribute "\t\t\t" << attribute << "\n"; \ } GET_NUM_ATTR(vendor_id) GET_NUM_ATTR(max_compute_units) GET_NUM_ATTR(max_work_item_dimensions) sycl::id<3> max_work_item_sizes = device.get_info>(); WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0))) WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1))) WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2))) GET_NUM_ATTR(max_work_group_size) GET_NUM_ATTR(max_num_sub_groups) GET_NUM_ATTR(sub_group_independent_forward_progress) GET_NUM_ATTR(preferred_vector_width_char) GET_NUM_ATTR(preferred_vector_width_short) GET_NUM_ATTR(preferred_vector_width_int) GET_NUM_ATTR(preferred_vector_width_long) GET_NUM_ATTR(preferred_vector_width_float) GET_NUM_ATTR(preferred_vector_width_double) GET_NUM_ATTR(preferred_vector_width_half) GET_NUM_ATTR(native_vector_width_char) GET_NUM_ATTR(native_vector_width_short) GET_NUM_ATTR(native_vector_width_int) GET_NUM_ATTR(native_vector_width_long) GET_NUM_ATTR(native_vector_width_float) GET_NUM_ATTR(native_vector_width_double) GET_NUM_ATTR(native_vector_width_half) size_t max_clock_frequency = device.get_info(); WRITE_ATTR("max_clock_frequency", max_clock_frequency) GET_NUM_ATTR(address_bits) GET_NUM_ATTR(max_mem_alloc_size) /* NOTE(@nsirgien): Implementation doesn't use image support as bindless images aren't * supported so we always return false, even if device supports HW texture usage acceleration. */ bool image_support = false; WRITE_ATTR("image_support", (size_t)image_support) GET_NUM_ATTR(max_parameter_size) GET_NUM_ATTR(mem_base_addr_align) GET_NUM_ATTR(global_mem_size) GET_NUM_ATTR(local_mem_size) GET_NUM_ATTR(error_correction_support) GET_NUM_ATTR(profiling_timer_resolution) GET_NUM_ATTR(is_available) # undef GET_NUM_ATTR # undef WRITE_ATTR capabilities << "\n"; } return ::strdup(capabilities.str().c_str()); } void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr) { int num = 0; std::vector devices = OneapiDevice::available_devices(); for (sycl::device &device : devices) { const std::string &platform_name = device.get_platform().get_info(); std::string name = device.get_info(); std::string id = "ONEAPI_" + platform_name + "_" + name; if (device.has(sycl::aspect::ext_intel_pci_address)) { id.append("_" + device.get_info()); } (cb)(id.c_str(), name.c_str(), num, user_ptr); num++; } } size_t OneapiDevice::get_memcapacity() { return reinterpret_cast(device_queue_) ->get_device() .get_info(); } int OneapiDevice::get_num_multiprocessors() { const sycl::device &device = reinterpret_cast(device_queue_)->get_device(); if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) { return device.get_info(); } else return 0; } int OneapiDevice::get_max_num_threads_per_multiprocessor() { const sycl::device &device = reinterpret_cast(device_queue_)->get_device(); if (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) && device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) { return device.get_info() * device.get_info(); } else return 0; } CCL_NAMESPACE_END #endif