diff options
Diffstat (limited to 'intern/cycles/device/cuda')
-rw-r--r-- | intern/cycles/device/cuda/device_cuda.h | 1 | ||||
-rw-r--r-- | intern/cycles/device/cuda/device_cuda_impl.cpp | 22 |
2 files changed, 21 insertions, 2 deletions
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h index e5e3e24165d..c3271c3cfcf 100644 --- a/intern/cycles/device/cuda/device_cuda.h +++ b/intern/cycles/device/cuda/device_cuda.h @@ -71,6 +71,7 @@ class CUDADevice : public Device { }; typedef map<device_memory *, CUDAMem> CUDAMemMap; CUDAMemMap cuda_mem_map; + thread_mutex cuda_mem_map_mutex; struct PixelMem { GLuint cuPBO; diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index 01adf10f252..48151365e5d 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -718,8 +718,10 @@ void CUDADevice::init_host_memory() void CUDADevice::load_texture_info() { if (need_texture_info) { - texture_info.copy_to_device(); + /* Unset flag before copying, so this does not loop indefinitely if the copy below calls + * into 'move_textures_to_host' (which calls 'load_texture_info' again). */ need_texture_info = false; + texture_info.copy_to_device(); } } @@ -988,6 +990,7 @@ void CUDADevice::mem_alloc(device_memory &mem) assert(!"mem_alloc not supported for global memory."); } else { + thread_scoped_lock lock(cuda_mem_map_mutex); generic_alloc(mem); } } @@ -1006,10 +1009,10 @@ void CUDADevice::mem_copy_to(device_memory &mem) tex_alloc((device_texture &)mem); } else { + thread_scoped_lock lock(cuda_mem_map_mutex); if (!mem.device_pointer) { generic_alloc(mem); } - generic_copy_to(mem); } } @@ -1048,6 +1051,7 @@ void CUDADevice::mem_zero(device_memory &mem) /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory * regardless of mem.host_pointer and mem.shared_pointer. */ + thread_scoped_lock lock(cuda_mem_map_mutex); if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) { const CUDAContextScope scope(this); cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size())); @@ -1069,6 +1073,7 @@ void CUDADevice::mem_free(device_memory &mem) tex_free((device_texture &)mem); } else { + thread_scoped_lock lock(cuda_mem_map_mutex); generic_free(mem); } } @@ -1092,6 +1097,7 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size) void CUDADevice::global_alloc(device_memory &mem) { if (mem.is_resident(this)) { + thread_scoped_lock lock(cuda_mem_map_mutex); generic_alloc(mem); generic_copy_to(mem); } @@ -1102,6 +1108,7 @@ void CUDADevice::global_alloc(device_memory &mem) void CUDADevice::global_free(device_memory &mem) { if (mem.is_resident(this) && mem.device_pointer) { + thread_scoped_lock lock(cuda_mem_map_mutex); generic_free(mem); } } @@ -1170,6 +1177,8 @@ void CUDADevice::tex_alloc(device_texture &mem) size_t src_pitch = mem.data_width * dsize * mem.data_elements; size_t dst_pitch = src_pitch; + thread_scoped_lock lock(cuda_mem_map_mutex); + if (!mem.is_resident(this)) { cmem = &cuda_mem_map[&mem]; cmem->texobject = 0; @@ -1257,6 +1266,9 @@ void CUDADevice::tex_alloc(device_texture &mem) cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size)); } + /* Unlock mutex before resizing texture info, since that may attempt to lock it again. */ + lock.unlock(); + /* Resize once */ const uint slot = mem.slot; if (slot >= texture_info.size()) { @@ -1305,6 +1317,11 @@ void CUDADevice::tex_alloc(device_texture &mem) texDesc.filterMode = filter_mode; texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + /* Lock again and refresh the data pointer (in case another thread modified the map in the + * meantime). */ + lock.lock(); + cmem = &cuda_mem_map[&mem]; + cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); texture_info[slot].data = (uint64_t)cmem->texobject; @@ -1318,6 +1335,7 @@ void CUDADevice::tex_free(device_texture &mem) { if (mem.device_pointer) { CUDAContextScope scope(this); + thread_scoped_lock lock(cuda_mem_map_mutex); const CUDAMem &cmem = cuda_mem_map[&mem]; if (cmem.texobject) { |