Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/cuda/device_cuda_impl.cpp')
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp127
1 files changed, 97 insertions, 30 deletions
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index acf53c3eb1b..64c7f5e7d34 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -207,6 +207,7 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
map_host_limit = 0;
map_host_used = 0;
can_map_host = 0;
+ pitch_alignment = 0;
functions.loaded = false;
@@ -224,6 +225,9 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
cuda_assert(
cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
+ cuda_assert(cuDeviceGetAttribute(
+ &pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
+
unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
if (can_map_host) {
ctx_flags |= CU_CTX_MAP_HOST;
@@ -286,6 +290,49 @@ bool CUDADevice::support_device(const DeviceRequestedFeatures & /*requested_feat
return true;
}
+bool CUDADevice::check_peer_access(Device *peer_device)
+{
+ if (peer_device == this) {
+ return false;
+ }
+ if (peer_device->info.type != DEVICE_CUDA && peer_device->info.type != DEVICE_OPTIX) {
+ return false;
+ }
+
+ CUDADevice *const peer_device_cuda = static_cast<CUDADevice *>(peer_device);
+
+ int can_access = 0;
+ cuda_assert(cuDeviceCanAccessPeer(&can_access, cuDevice, peer_device_cuda->cuDevice));
+ if (can_access == 0) {
+ return false;
+ }
+
+ // Ensure array access over the link is possible as well (for 3D textures)
+ cuda_assert(cuDeviceGetP2PAttribute(&can_access,
+ CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED,
+ cuDevice,
+ peer_device_cuda->cuDevice));
+ if (can_access == 0) {
+ return false;
+ }
+
+ // Enable peer access in both directions
+ {
+ const CUDAContextScope scope(this);
+ if (cuda_error(cuCtxEnablePeerAccess(peer_device_cuda->cuContext, 0))) {
+ return false;
+ }
+ }
+ {
+ const CUDAContextScope scope(peer_device_cuda);
+ if (cuda_error(cuCtxEnablePeerAccess(cuContext, 0))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool CUDADevice::use_adaptive_compilation()
{
return DebugFlags().cuda.adaptive_compile;
@@ -674,6 +721,12 @@ void CUDADevice::load_texture_info()
void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
{
+ /* Break out of recursive call, which can happen when moving memory on a multi device. */
+ static bool any_device_moving_textures_to_host = false;
+ if (any_device_moving_textures_to_host) {
+ return;
+ }
+
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
@@ -687,6 +740,12 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
device_memory &mem = *pair.first;
CUDAMem *cmem = &pair.second;
+ /* Can only move textures allocated on this device (and not those from peer devices).
+ * And need to ignore memory that is already on the host. */
+ if (!mem.is_resident(this) || cmem->use_mapped_host) {
+ continue;
+ }
+
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
@@ -696,11 +755,6 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
continue;
}
- /* Already in host memory. */
- if (cmem->use_mapped_host) {
- continue;
- }
-
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
@@ -723,26 +777,30 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
- /* Preserve the original device pointer, in case of multi device
- * we can't change it because the pointer mapping would break. */
- device_ptr prev_pointer = max_mem->device_pointer;
- size_t prev_size = max_mem->device_size;
+ any_device_moving_textures_to_host = true;
- mem_copy_to(*max_mem);
+ /* Potentially need to call back into multi device, so pointer mapping
+ * and peer devices are updated. This is also necessary since the device
+ * pointer may just be a key here, so cannot be accessed and freed directly.
+ * Unfortunately it does mean that memory is reallocated on all other
+ * devices as well, which is potentially dangerous when still in use (since
+ * a thread rendering on another devices would only be caught in this mutex
+ * if it so happens to do an allocation at the same time as well. */
+ max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
- max_mem->device_pointer = prev_pointer;
- max_mem->device_size = prev_size;
+ any_device_moving_textures_to_host = false;
}
else {
break;
}
}
+ /* Unset flag before texture info is reloaded, since it should stay in device memory. */
+ move_texture_to_host = false;
+
/* Update texture info array with new pointers. */
load_texture_info();
-
- move_texture_to_host = false;
}
CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
@@ -808,9 +866,6 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
map_host_used += size;
status = " in host memory";
}
- else {
- status = " failed, out of host memory";
- }
}
if (mem_alloc_result != CUDA_SUCCESS) {
@@ -906,7 +961,7 @@ void CUDADevice::generic_free(device_memory &mem)
}
else {
/* Free device memory. */
- cuMemFree(mem.device_pointer);
+ cuda_assert(cuMemFree(mem.device_pointer));
}
stats.mem_free(mem.device_size);
@@ -1032,18 +1087,17 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size)
void CUDADevice::global_alloc(device_memory &mem)
{
- CUDAContextScope scope(this);
-
- generic_alloc(mem);
- generic_copy_to(mem);
+ if (mem.is_resident(this)) {
+ generic_alloc(mem);
+ generic_copy_to(mem);
+ }
const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer));
}
void CUDADevice::global_free(device_memory &mem)
{
- if (mem.device_pointer) {
- CUDAContextScope scope(this);
+ if (mem.is_resident(this) && mem.device_pointer) {
generic_free(mem);
}
}
@@ -1112,7 +1166,19 @@ void CUDADevice::tex_alloc(device_texture &mem)
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
- if (mem.data_depth > 1) {
+ if (!mem.is_resident(this)) {
+ cmem = &cuda_mem_map[&mem];
+ cmem->texobject = 0;
+
+ if (mem.data_depth > 1) {
+ array_3d = (CUarray)mem.device_pointer;
+ cmem->array = array_3d;
+ }
+ else if (mem.data_height > 0) {
+ dst_pitch = align_up(src_pitch, pitch_alignment);
+ }
+ }
+ else if (mem.data_depth > 1) {
/* 3D texture using array, there is no API for linear memory. */
CUDA_ARRAY3D_DESCRIPTOR desc;
@@ -1156,10 +1222,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
}
else if (mem.data_height > 0) {
/* 2D texture, using pitch aligned linear memory. */
- int alignment = 0;
- cuda_assert(
- cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
- dst_pitch = align_up(src_pitch, alignment);
+ dst_pitch = align_up(src_pitch, pitch_alignment);
size_t dst_size = dst_pitch * mem.data_height;
cmem = generic_alloc(mem, dst_size - mem.memory_size());
@@ -1251,7 +1314,11 @@ void CUDADevice::tex_free(device_texture &mem)
cuTexObjectDestroy(cmem.texobject);
}
- if (cmem.array) {
+ if (!mem.is_resident(this)) {
+ /* Do not free memory here, since it was allocated on a different device. */
+ cuda_mem_map.erase(cuda_mem_map.find(&mem));
+ }
+ else if (cmem.array) {
/* Free array. */
cuArrayDestroy(cmem.array);
stats.mem_free(mem.device_size);