Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2019-11-05 19:35:27 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2019-11-05 19:35:27 +0300
commitaf9a50bfe6c6dbf6a8e77e1adaf4899be0cdf34d (patch)
treec463d6d4e447d8ac4c7c74891999ae2b4cb1e2e9 /intern/cycles
parentf5e98a0fa7dbd68956fd6fcfc81ee6127112f61e (diff)
parent9a9e93e8043b515360eacd71c20b193c9e15204d (diff)
Merge branch 'blender-v2.81-release'
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/device/device_cuda.cpp111
-rw-r--r--intern/cycles/device/device_memory.cpp9
-rw-r--r--intern/cycles/device/device_memory.h2
-rw-r--r--intern/cycles/device/device_optix.cpp107
4 files changed, 147 insertions, 82 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index b5e10b0c2cb..dfd80d678fd 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -142,14 +142,15 @@ class CUDADevice : public Device {
CUDASplitKernel *split_kernel;
struct CUDAMem {
- CUDAMem() : texobject(0), array(0), map_host_pointer(0), free_map_host(false)
+ CUDAMem() : texobject(0), array(0), use_mapped_host(false)
{
}
CUtexObject texobject;
CUarray array;
- void *map_host_pointer;
- bool free_map_host;
+
+ /* If true, a mapped host memory in shared_pointer is being used. */
+ bool use_mapped_host;
};
typedef map<device_memory *, CUDAMem> CUDAMemMap;
CUDAMemMap cuda_mem_map;
@@ -732,7 +733,7 @@ class CUDADevice : public Device {
}
/* Already in host memory. */
- if (cmem->map_host_pointer) {
+ if (cmem->use_mapped_host) {
continue;
}
@@ -807,7 +808,7 @@ class CUDADevice : public Device {
cuMemGetInfo(&free, &total);
/* Move textures to host memory if needed. */
- if (!move_texture_to_host && !is_image && (size + headroom) >= free) {
+ if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
cuMemGetInfo(&free, &total);
}
@@ -821,49 +822,36 @@ class CUDADevice : public Device {
}
/* Fall back to mapped host memory if needed and possible. */
- void *map_host_pointer = 0;
- bool free_map_host = false;
- if (mem_alloc_result != CUDA_SUCCESS && can_map_host &&
- map_host_used + size < map_host_limit) {
+ void *shared_pointer = 0;
+
+ if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = CUDA_SUCCESS;
- map_host_pointer = mem.shared_pointer;
+ shared_pointer = mem.shared_pointer;
}
- else {
+ else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = cuMemHostAlloc(
- &map_host_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
- mem.shared_pointer = map_host_pointer;
- free_map_host = true;
+ &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
+
+ assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
+ (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
}
if (mem_alloc_result == CUDA_SUCCESS) {
- cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, mem.shared_pointer, 0));
+ cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
map_host_used += size;
status = " in host memory";
-
- /* Replace host pointer with our host allocation. Only works if
- * CUDA memory layout is the same and has no pitch padding. Also
- * does not work if we move textures to host during a render,
- * since other devices might be using the memory. */
- if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
- mem.host_pointer != mem.shared_pointer) {
- memcpy(mem.shared_pointer, mem.host_pointer, size);
- mem.host_free();
- mem.host_pointer = mem.shared_pointer;
- }
}
else {
status = " failed, out of host memory";
}
}
- else if (mem_alloc_result != CUDA_SUCCESS) {
- status = " failed, out of device and host memory";
- }
if (mem_alloc_result != CUDA_SUCCESS) {
+ status = " failed, out of device and host memory";
cuda_assert(mem_alloc_result);
}
@@ -883,8 +871,34 @@ class CUDADevice : public Device {
/* Insert into map of allocations. */
CUDAMem *cmem = &cuda_mem_map[&mem];
- cmem->map_host_pointer = map_host_pointer;
- cmem->free_map_host = free_map_host;
+ if (shared_pointer != 0) {
+ /* Replace host pointer with our host allocation. Only works if
+ * CUDA memory layout is the same and has no pitch padding. Also
+ * does not work if we move textures to host during a render,
+ * since other devices might be using the memory. */
+
+ if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
+ mem.host_pointer != shared_pointer) {
+ memcpy(shared_pointer, mem.host_pointer, size);
+
+ /* A Call to device_memory::host_free() should be preceded by
+ * a call to device_memory::device_free() for host memory
+ * allocated by a device to be handled properly. Two exceptions
+ * are here and a call in OptiXDevice::generic_alloc(), where
+ * the current host memory can be assumed to be allocated by
+ * device_memory::host_alloc(), not by a device */
+
+ mem.host_free();
+ mem.host_pointer = shared_pointer;
+ }
+ mem.shared_pointer = shared_pointer;
+ mem.shared_counter++;
+ cmem->use_mapped_host = true;
+ }
+ else {
+ cmem->use_mapped_host = false;
+ }
+
return cmem;
}
@@ -893,7 +907,12 @@ class CUDADevice : public Device {
if (mem.host_pointer && mem.device_pointer) {
CUDAContextScope scope(this);
- if (mem.host_pointer != mem.shared_pointer) {
+ /* If use_mapped_host of mem is false, the current device only
+ * uses device memory allocated by cuMemAlloc regardless of
+ * mem.host_pointer and mem.shared_pointer, and should copy
+ * data from mem.host_pointer. */
+
+ if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) {
cuda_assert(cuMemcpyHtoD(
cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
}
@@ -906,16 +925,21 @@ class CUDADevice : public Device {
CUDAContextScope scope(this);
const CUDAMem &cmem = cuda_mem_map[&mem];
- if (cmem.map_host_pointer) {
- /* Free host memory. */
- if (cmem.free_map_host) {
- cuMemFreeHost(cmem.map_host_pointer);
- if (mem.host_pointer == mem.shared_pointer) {
- mem.host_pointer = 0;
+ /* If cmem.use_mapped_host is true, reference counting is used
+ * to safely free a mapped host memory. */
+
+ if (cmem.use_mapped_host) {
+ assert(mem.shared_pointer);
+ if (mem.shared_pointer) {
+ assert(mem.shared_counter > 0);
+ if (--mem.shared_counter == 0) {
+ if (mem.host_pointer == mem.shared_pointer) {
+ mem.host_pointer = 0;
+ }
+ cuMemFreeHost(mem.shared_pointer);
+ mem.shared_pointer = 0;
}
- mem.shared_pointer = 0;
}
-
map_host_used -= mem.device_size;
}
else {
@@ -995,7 +1019,12 @@ class CUDADevice : public Device {
memset(mem.host_pointer, 0, mem.memory_size());
}
- if (mem.device_pointer && (!mem.host_pointer || mem.host_pointer != mem.shared_pointer)) {
+ /* If use_mapped_host of mem is false, mem.device_pointer currently
+ * refers to device memory regardless of mem.host_pointer and
+ * mem.shared_pointer. */
+
+ if (mem.device_pointer &&
+ (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
CUDAContextScope scope(this);
cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
}
diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp
index c8a71bf4b3b..c106b4505db 100644
--- a/intern/cycles/device/device_memory.cpp
+++ b/intern/cycles/device/device_memory.cpp
@@ -36,12 +36,15 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
device(device),
device_pointer(0),
host_pointer(0),
- shared_pointer(0)
+ shared_pointer(0),
+ shared_counter(0)
{
}
device_memory::~device_memory()
{
+ assert(shared_pointer == 0);
+ assert(shared_counter == 0);
}
device_memory::device_memory(device_memory &&other)
@@ -59,12 +62,14 @@ device_memory::device_memory(device_memory &&other)
device(other.device),
device_pointer(other.device_pointer),
host_pointer(other.host_pointer),
- shared_pointer(other.shared_pointer)
+ shared_pointer(other.shared_pointer),
+ shared_counter(other.shared_counter)
{
other.device_size = 0;
other.device_pointer = 0;
other.host_pointer = 0;
other.shared_pointer = 0;
+ other.shared_counter = 0;
}
void *device_memory::host_alloc(size_t size)
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index 272da6a9ad3..f8324e2a214 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -216,6 +216,8 @@ class device_memory {
device_ptr device_pointer;
void *host_pointer;
void *shared_pointer;
+ /* reference counter for shared_pointer */
+ int shared_counter;
virtual ~device_memory();
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 370e3cf4085..3162b72a0dc 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -138,7 +138,7 @@ class OptiXDevice : public Device {
bool free_map_host = false;
CUarray array = NULL;
CUtexObject texobject = 0;
- void *map_host_pointer = nullptr;
+ bool use_mapped_host = false;
};
// Helper class to manage current CUDA context
@@ -1234,7 +1234,7 @@ class OptiXDevice : public Device {
cuMemGetInfo(&free, &total);
/* Move textures to host memory if needed. */
- if (!move_texture_to_host && !is_image && (size + headroom) >= free) {
+ if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
cuMemGetInfo(&free, &total);
}
@@ -1248,39 +1248,27 @@ class OptiXDevice : public Device {
}
/* Fall back to mapped host memory if needed and possible. */
- void *map_host_pointer = 0;
- bool free_map_host = false;
+ void *shared_pointer = 0;
- if (mem_alloc_result != CUDA_SUCCESS && can_map_host &&
- map_host_used + size < map_host_limit) {
+ if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = CUDA_SUCCESS;
- map_host_pointer = mem.shared_pointer;
+ shared_pointer = mem.shared_pointer;
}
- else {
+ else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = cuMemHostAlloc(
- &map_host_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
- mem.shared_pointer = map_host_pointer;
- free_map_host = true;
+ &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
+
+ assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
+ (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
}
if (mem_alloc_result == CUDA_SUCCESS) {
- cuMemHostGetDevicePointer_v2(&device_pointer, mem.shared_pointer, 0);
+ cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0);
map_host_used += size;
status = " in host memory";
-
- /* Replace host pointer with our host allocation. Only works if
- * CUDA memory layout is the same and has no pitch padding. Also
- * does not work if we move textures to host during a render,
- * since other devices might be using the memory. */
- if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
- mem.host_pointer != mem.shared_pointer) {
- memcpy(mem.shared_pointer, mem.host_pointer, size);
- mem.host_free();
- mem.host_pointer = mem.shared_pointer;
- }
}
else {
status = " failed, out of host memory";
@@ -1311,8 +1299,34 @@ class OptiXDevice : public Device {
/* Insert into map of allocations. */
CUDAMem *cmem = &cuda_mem_map[&mem];
- cmem->map_host_pointer = map_host_pointer;
- cmem->free_map_host = free_map_host;
+ if (shared_pointer != 0) {
+ /* Replace host pointer with our host allocation. Only works if
+ * CUDA memory layout is the same and has no pitch padding. Also
+ * does not work if we move textures to host during a render,
+ * since other devices might be using the memory. */
+
+ if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
+ mem.host_pointer != shared_pointer) {
+ memcpy(shared_pointer, mem.host_pointer, size);
+
+ /* A call to device_memory::host_free() should be preceded by
+ * a call to device_memory::device_free() for host memory
+ * allocated by a device to be handled properly. Two exceptions
+ * are here and a call in CUDADevice::generic_alloc(), where
+ * the current host memory can be assumed to be allocated by
+ * device_memory::host_alloc(), not by a device */
+
+ mem.host_free();
+ mem.host_pointer = shared_pointer;
+ }
+ mem.shared_pointer = shared_pointer;
+ mem.shared_counter++;
+ cmem->use_mapped_host = true;
+ }
+ else {
+ cmem->use_mapped_host = false;
+ }
+
return cmem;
}
@@ -1560,7 +1574,12 @@ class OptiXDevice : public Device {
if (mem.host_pointer && mem.device_pointer) {
CUDAContextScope scope(cuda_context);
- if (mem.host_pointer != mem.shared_pointer) {
+ /* If use_mapped_host of mem is false, the current device only
+ * uses device memory allocated by cuMemAlloc regardless of
+ * mem.host_pointer and mem.shared_pointer, and should copy
+ * data from mem.host_pointer. */
+
+ if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) {
check_result_cuda(
cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
}
@@ -1595,14 +1614,19 @@ class OptiXDevice : public Device {
{
if (mem.host_pointer)
memset(mem.host_pointer, 0, mem.memory_size());
- if (mem.host_pointer && mem.host_pointer == mem.shared_pointer)
- return; // This is shared host memory, so no device memory to update
if (!mem.device_pointer)
mem_alloc(mem); // Need to allocate memory first if it does not exist yet
- const CUDAContextScope scope(cuda_context);
- check_result_cuda(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
+ /* If use_mapped_host of mem is false, mem.device_pointer currently
+ * refers to device memory regardless of mem.host_pointer and
+ * mem.shared_pointer. */
+
+ if (mem.device_pointer &&
+ (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
+ const CUDAContextScope scope(cuda_context);
+ check_result_cuda(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
+ }
}
void mem_free(device_memory &mem) override
@@ -1624,16 +1648,21 @@ class OptiXDevice : public Device {
CUDAContextScope scope(cuda_context);
const CUDAMem &cmem = cuda_mem_map[&mem];
- if (cmem.map_host_pointer) {
- /* Free host memory. */
- if (cmem.free_map_host) {
- cuMemFreeHost(cmem.map_host_pointer);
- if (mem.host_pointer == mem.shared_pointer) {
- mem.host_pointer = 0;
+ /* If cmem.use_mapped_host is true, reference counting is used
+ * to safely free a mapped host memory. */
+
+ if (cmem.use_mapped_host) {
+ assert(mem.shared_pointer);
+ if (mem.shared_pointer) {
+ assert(mem.shared_counter > 0);
+ if (--mem.shared_counter == 0) {
+ if (mem.host_pointer == mem.shared_pointer) {
+ mem.host_pointer = 0;
+ }
+ cuMemFreeHost(mem.shared_pointer);
+ mem.shared_pointer = 0;
}
- mem.shared_pointer = 0;
}
-
map_host_used -= mem.device_size;
}
else {
@@ -1699,7 +1728,7 @@ class OptiXDevice : public Device {
}
/* Already in host memory. */
- if (cmem->map_host_pointer) {
+ if (cmem->use_mapped_host) {
continue;
}