From 8b11ed392c10f813c33d61cfdb5efebd9a679255 Mon Sep 17 00:00:00 2001 From: Nikita Sirgienko Date: Mon, 5 Sep 2022 23:04:43 +0200 Subject: Cycles: Fix crashes in oneAPI backend for scenes not fitting in dGPU memory Differential Revision: https://developer.blender.org/D15889 --- intern/cycles/device/oneapi/device_impl.cpp | 36 ++++++++++++++++++----------- intern/cycles/device/oneapi/device_impl.h | 1 + 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/intern/cycles/device/oneapi/device_impl.cpp b/intern/cycles/device/oneapi/device_impl.cpp index bdcc15bba56..dd0622a5bd5 100644 --- a/intern/cycles/device/oneapi/device_impl.cpp +++ b/intern/cycles/device/oneapi/device_impl.cpp @@ -65,6 +65,8 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, kg_memory_device_ = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, globals_segment_size); kg_memory_size_ = globals_segment_size; + + max_memory_on_device_ = oneapi_dll_.oneapi_get_memcapacity(device_queue_); } OneapiDevice::~OneapiDevice() @@ -134,17 +136,16 @@ void OneapiDevice::generic_alloc(device_memory &mem) * because Cycles already uses two different pointer for host activity and device activity, and * also has to perform all needed memory transfer operations. So, USM device memory * type has been used for oneAPI device in order to better fit in Cycles architecture. */ - void *device_pointer = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, memory_size); + void *device_pointer = nullptr; + if (mem.memory_size() + stats.mem_used < max_memory_on_device_) + device_pointer = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, memory_size); if (device_pointer == nullptr) { - size_t max_memory_on_device = oneapi_dll_.oneapi_get_memcapacity(device_queue_); set_error("oneAPI kernel - device memory allocation error for " + string_human_readable_size(mem.memory_size()) + ", possibly caused by lack of available memory space on the device: " + string_human_readable_size(stats.mem_used) + " of " + - string_human_readable_size(max_memory_on_device) + " is already allocated"); - return; + string_human_readable_size(max_memory_on_device_) + " is already allocated"); } - assert(device_pointer); mem.device_pointer = reinterpret_cast(device_pointer); mem.device_size = memory_size; @@ -154,6 +155,9 @@ void OneapiDevice::generic_alloc(device_memory &mem) void OneapiDevice::generic_copy_to(device_memory &mem) { + if (!mem.device_pointer) { + return; + } size_t memory_size = mem.memory_size(); /* Copy operation from host shouldn't be requested if there is no memory allocated on host. */ @@ -186,7 +190,10 @@ void *OneapiDevice::kernel_globals_device_pointer() void OneapiDevice::generic_free(device_memory &mem) { - assert(mem.device_pointer); + if (!mem.device_pointer) { + return; + } + stats.mem_free(mem.device_size); mem.device_size = 0; @@ -256,14 +263,15 @@ void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t assert(device_queue_); assert(size != 0); - assert(mem.device_pointer); - char *shifted_host = reinterpret_cast(mem.host_pointer) + offset; - char *shifted_device = reinterpret_cast(mem.device_pointer) + offset; - bool is_finished_ok = oneapi_dll_.oneapi_usm_memcpy( - device_queue_, shifted_host, shifted_device, size); - if (is_finished_ok == false) { - set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ + - "\""); + if (mem.device_pointer) { + char *shifted_host = reinterpret_cast(mem.host_pointer) + offset; + char *shifted_device = reinterpret_cast(mem.device_pointer) + offset; + bool is_finished_ok = oneapi_dll_.oneapi_usm_memcpy( + device_queue_, shifted_host, shifted_device, size); + if (is_finished_ok == false) { + set_error("oneAPI memory operation error: got runtime exception \"" + + oneapi_error_string_ + "\""); + } } } } diff --git a/intern/cycles/device/oneapi/device_impl.h b/intern/cycles/device/oneapi/device_impl.h index a0a747a3cf2..6abebf98684 100644 --- a/intern/cycles/device/oneapi/device_impl.h +++ b/intern/cycles/device/oneapi/device_impl.h @@ -24,6 +24,7 @@ class OneapiDevice : public Device { void *kg_memory_; void *kg_memory_device_; size_t kg_memory_size_ = (size_t)0; + size_t max_memory_on_device_ = (size_t)0; OneAPIDLLInterface oneapi_dll_; std::string oneapi_error_string_; -- cgit v1.2.3