Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Horsley <mmdanggg2>2021-01-27 16:04:37 +0300
committerPatrick Mours <pmours@nvidia.com>2021-01-27 17:27:57 +0300
commit4fbeb3e6be3ed03e55ebd8a9d41c80c2a4f08815 (patch)
treefb15dbd75178806d3684f30690fd9a7a5bd1e300 /intern/cycles/device
parentcd24712c2c51ab753369f7cae8c43c38115bd59f (diff)
Fix T85089: Crash when rendering scene that does not fit into GPU memory with CUDA/OptiX
The "cuda_mem_map_mutex" was potentially being locked recursively during the call to "CUDADevice::move_textures_to_host", which crashed. This moves around the locking and unlocking of "cuda_mem_map_mutex", so that it doesn't call a function that locks it while still holding the lock. Reviewed By: pmoursnv Maniphest Tasks: T85089, T84734 Differential Revision: https://developer.blender.org/D10219
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp22
1 files changed, 9 insertions, 13 deletions
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 307d1469ba5..cff30eb9b48 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -742,6 +742,7 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
size_t max_size = 0;
bool max_is_image = false;
+ thread_scoped_lock lock(cuda_mem_map_mutex);
foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
device_memory &mem = *pair.first;
CUDAMem *cmem = &pair.second;
@@ -773,6 +774,7 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
max_mem = &mem;
}
}
+ lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple CUDA devices could be moving the memory. The
@@ -894,6 +896,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
}
/* Insert into map of allocations. */
+ thread_scoped_lock lock(cuda_mem_map_mutex);
CUDAMem *cmem = &cuda_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
@@ -935,6 +938,7 @@ void CUDADevice::generic_copy_to(device_memory &mem)
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
* mem.host_pointer. */
+ thread_scoped_lock lock(cuda_mem_map_mutex);
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(this);
cuda_assert(
@@ -946,6 +950,7 @@ void CUDADevice::generic_free(device_memory &mem)
{
if (mem.device_pointer) {
CUDAContextScope scope(this);
+ thread_scoped_lock lock(cuda_mem_map_mutex);
const CUDAMem &cmem = cuda_mem_map[&mem];
/* If cmem.use_mapped_host is true, reference counting is used
@@ -990,7 +995,6 @@ void CUDADevice::mem_alloc(device_memory &mem)
assert(!"mem_alloc not supported for global memory.");
}
else {
- thread_scoped_lock lock(cuda_mem_map_mutex);
generic_alloc(mem);
}
}
@@ -1009,7 +1013,6 @@ void CUDADevice::mem_copy_to(device_memory &mem)
tex_alloc((device_texture &)mem);
}
else {
- thread_scoped_lock lock(cuda_mem_map_mutex);
if (!mem.device_pointer) {
generic_alloc(mem);
}
@@ -1073,7 +1076,6 @@ void CUDADevice::mem_free(device_memory &mem)
tex_free((device_texture &)mem);
}
else {
- thread_scoped_lock lock(cuda_mem_map_mutex);
generic_free(mem);
}
}
@@ -1097,7 +1099,6 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size)
void CUDADevice::global_alloc(device_memory &mem)
{
if (mem.is_resident(this)) {
- thread_scoped_lock lock(cuda_mem_map_mutex);
generic_alloc(mem);
generic_copy_to(mem);
}
@@ -1108,7 +1109,6 @@ void CUDADevice::global_alloc(device_memory &mem)
void CUDADevice::global_free(device_memory &mem)
{
if (mem.is_resident(this) && mem.device_pointer) {
- thread_scoped_lock lock(cuda_mem_map_mutex);
generic_free(mem);
}
}
@@ -1177,9 +1177,8 @@ void CUDADevice::tex_alloc(device_texture &mem)
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
- thread_scoped_lock lock(cuda_mem_map_mutex);
-
if (!mem.is_resident(this)) {
+ thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
cmem->texobject = 0;
@@ -1229,6 +1228,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
mem.device_size = size;
stats.mem_alloc(size);
+ thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
cmem->texobject = 0;
cmem->array = array_3d;
@@ -1266,9 +1266,6 @@ void CUDADevice::tex_alloc(device_texture &mem)
cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
}
- /* Unlock mutex before resizing texture info, since that may attempt to lock it again. */
- lock.unlock();
-
/* Resize once */
const uint slot = mem.slot;
if (slot >= texture_info.size()) {
@@ -1317,9 +1314,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
texDesc.filterMode = filter_mode;
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
- /* Lock again and refresh the data pointer (in case another thread modified the map in the
- * meantime). */
- lock.lock();
+ thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
@@ -1357,6 +1352,7 @@ void CUDADevice::tex_free(device_texture &mem)
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
else {
+ lock.unlock();
generic_free(mem);
}
}