diff options
author | Patrick Mours <pmours@nvidia.com> | 2020-12-10 16:18:25 +0300 |
---|---|---|
committer | Patrick Mours <pmours@nvidia.com> | 2020-12-11 15:24:29 +0300 |
commit | bfb6fce6594e9cf133bd18aee311c1e5e32dc799 (patch) | |
tree | 7c813e17ea87e9aae64221b3ac7a8d42ab894c85 /intern/cycles/device/cuda/device_cuda_impl.cpp | |
parent | d72ec16e70721408c875040325c984941687b4a2 (diff) |
Cycles: Add CPU+GPU rendering support with OptiX
Adds support for building multiple BVH types in order to support using both CPU and OptiX
devices for rendering simultaneously. Primitive packing for Embree and OptiX is now
standalone, so it only needs to be run once and can be shared between the two. Additionally,
BVH building was made a device call, so that each device backend can decide how to
perform the building. The multi-device for instance creates a special multi-BVH that holds
references to several sub-BVHs, one for each sub-device.
Reviewed By: brecht, kevindietrich
Differential Revision: https://developer.blender.org/D9718
Diffstat (limited to 'intern/cycles/device/cuda/device_cuda_impl.cpp')
-rw-r--r-- | intern/cycles/device/cuda/device_cuda_impl.cpp | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index 01adf10f252..6c26b247743 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -718,8 +718,10 @@ void CUDADevice::init_host_memory() void CUDADevice::load_texture_info() { if (need_texture_info) { - texture_info.copy_to_device(); + /* Unset flag before copying, so this does not loop indenfinetly if the copy below calls + * into 'move_textures_to_host' (which calls 'load_texture_info' again). */ need_texture_info = false; + texture_info.copy_to_device(); } } @@ -988,6 +990,7 @@ void CUDADevice::mem_alloc(device_memory &mem) assert(!"mem_alloc not supported for global memory."); } else { + thread_scoped_lock lock(cuda_mem_map_mutex); generic_alloc(mem); } } @@ -1006,10 +1009,10 @@ void CUDADevice::mem_copy_to(device_memory &mem) tex_alloc((device_texture &)mem); } else { + thread_scoped_lock lock(cuda_mem_map_mutex); if (!mem.device_pointer) { generic_alloc(mem); } - generic_copy_to(mem); } } @@ -1048,6 +1051,7 @@ void CUDADevice::mem_zero(device_memory &mem) /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory * regardless of mem.host_pointer and mem.shared_pointer. */ + thread_scoped_lock lock(cuda_mem_map_mutex); if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) { const CUDAContextScope scope(this); cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size())); @@ -1069,6 +1073,7 @@ void CUDADevice::mem_free(device_memory &mem) tex_free((device_texture &)mem); } else { + thread_scoped_lock lock(cuda_mem_map_mutex); generic_free(mem); } } @@ -1092,6 +1097,7 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size) void CUDADevice::global_alloc(device_memory &mem) { if (mem.is_resident(this)) { + thread_scoped_lock lock(cuda_mem_map_mutex); generic_alloc(mem); generic_copy_to(mem); } @@ -1102,6 +1108,7 @@ void CUDADevice::global_alloc(device_memory &mem) void CUDADevice::global_free(device_memory &mem) { if (mem.is_resident(this) && mem.device_pointer) { + thread_scoped_lock lock(cuda_mem_map_mutex); generic_free(mem); } } @@ -1170,6 +1177,8 @@ void CUDADevice::tex_alloc(device_texture &mem) size_t src_pitch = mem.data_width * dsize * mem.data_elements; size_t dst_pitch = src_pitch; + thread_scoped_lock lock(cuda_mem_map_mutex); + if (!mem.is_resident(this)) { cmem = &cuda_mem_map[&mem]; cmem->texobject = 0; @@ -1257,6 +1266,9 @@ void CUDADevice::tex_alloc(device_texture &mem) cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size)); } + /* Unlock mutex before resizing texture info, since that may attempt to lock it again. */ + lock.unlock(); + /* Resize once */ const uint slot = mem.slot; if (slot >= texture_info.size()) { @@ -1305,6 +1317,11 @@ void CUDADevice::tex_alloc(device_texture &mem) texDesc.filterMode = filter_mode; texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + /* Lock again and refresh the data pointer (in case another thread modified the map in the + * meantime). */ + lock.lock(); + cmem = &cuda_mem_map[&mem]; + cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); texture_info[slot].data = (uint64_t)cmem->texobject; @@ -1318,6 +1335,7 @@ void CUDADevice::tex_free(device_texture &mem) { if (mem.device_pointer) { CUDAContextScope scope(this); + thread_scoped_lock lock(cuda_mem_map_mutex); const CUDAMem &cmem = cuda_mem_map[&mem]; if (cmem.texobject) { |