Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mours <pmours@nvidia.com>2020-12-10 16:18:25 +0300
committerPatrick Mours <pmours@nvidia.com>2020-12-11 15:24:29 +0300
commitbfb6fce6594e9cf133bd18aee311c1e5e32dc799 (patch)
tree7c813e17ea87e9aae64221b3ac7a8d42ab894c85 /intern/cycles/device/cuda/device_cuda_impl.cpp
parentd72ec16e70721408c875040325c984941687b4a2 (diff)
Cycles: Add CPU+GPU rendering support with OptiX
Adds support for building multiple BVH types in order to support using both CPU and OptiX devices for rendering simultaneously. Primitive packing for Embree and OptiX is now standalone, so it only needs to be run once and can be shared between the two. Additionally, BVH building was made a device call, so that each device backend can decide how to perform the building. The multi-device for instance creates a special multi-BVH that holds references to several sub-BVHs, one for each sub-device. Reviewed By: brecht, kevindietrich Differential Revision: https://developer.blender.org/D9718
Diffstat (limited to 'intern/cycles/device/cuda/device_cuda_impl.cpp')
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp22
1 files changed, 20 insertions, 2 deletions
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 01adf10f252..6c26b247743 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -718,8 +718,10 @@ void CUDADevice::init_host_memory()
void CUDADevice::load_texture_info()
{
if (need_texture_info) {
- texture_info.copy_to_device();
+ /* Unset flag before copying, so this does not loop indenfinetly if the copy below calls
+ * into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
+ texture_info.copy_to_device();
}
}
@@ -988,6 +990,7 @@ void CUDADevice::mem_alloc(device_memory &mem)
assert(!"mem_alloc not supported for global memory.");
}
else {
+ thread_scoped_lock lock(cuda_mem_map_mutex);
generic_alloc(mem);
}
}
@@ -1006,10 +1009,10 @@ void CUDADevice::mem_copy_to(device_memory &mem)
tex_alloc((device_texture &)mem);
}
else {
+ thread_scoped_lock lock(cuda_mem_map_mutex);
if (!mem.device_pointer) {
generic_alloc(mem);
}
-
generic_copy_to(mem);
}
}
@@ -1048,6 +1051,7 @@ void CUDADevice::mem_zero(device_memory &mem)
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
* regardless of mem.host_pointer and mem.shared_pointer. */
+ thread_scoped_lock lock(cuda_mem_map_mutex);
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(this);
cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
@@ -1069,6 +1073,7 @@ void CUDADevice::mem_free(device_memory &mem)
tex_free((device_texture &)mem);
}
else {
+ thread_scoped_lock lock(cuda_mem_map_mutex);
generic_free(mem);
}
}
@@ -1092,6 +1097,7 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size)
void CUDADevice::global_alloc(device_memory &mem)
{
if (mem.is_resident(this)) {
+ thread_scoped_lock lock(cuda_mem_map_mutex);
generic_alloc(mem);
generic_copy_to(mem);
}
@@ -1102,6 +1108,7 @@ void CUDADevice::global_alloc(device_memory &mem)
void CUDADevice::global_free(device_memory &mem)
{
if (mem.is_resident(this) && mem.device_pointer) {
+ thread_scoped_lock lock(cuda_mem_map_mutex);
generic_free(mem);
}
}
@@ -1170,6 +1177,8 @@ void CUDADevice::tex_alloc(device_texture &mem)
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
+ thread_scoped_lock lock(cuda_mem_map_mutex);
+
if (!mem.is_resident(this)) {
cmem = &cuda_mem_map[&mem];
cmem->texobject = 0;
@@ -1257,6 +1266,9 @@ void CUDADevice::tex_alloc(device_texture &mem)
cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
}
+ /* Unlock mutex before resizing texture info, since that may attempt to lock it again. */
+ lock.unlock();
+
/* Resize once */
const uint slot = mem.slot;
if (slot >= texture_info.size()) {
@@ -1305,6 +1317,11 @@ void CUDADevice::tex_alloc(device_texture &mem)
texDesc.filterMode = filter_mode;
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+ /* Lock again and refresh the data pointer (in case another thread modified the map in the
+ * meantime). */
+ lock.lock();
+ cmem = &cuda_mem_map[&mem];
+
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
texture_info[slot].data = (uint64_t)cmem->texobject;
@@ -1318,6 +1335,7 @@ void CUDADevice::tex_free(device_texture &mem)
{
if (mem.device_pointer) {
CUDAContextScope scope(this);
+ thread_scoped_lock lock(cuda_mem_map_mutex);
const CUDAMem &cmem = cuda_mem_map[&mem];
if (cmem.texobject) {