Code refactor: device memory cleanups, preparing for mapped host memory.

author: Brecht Van Lommel <brechtvanlommel@gmail.com> 2017-11-05 02:34:30 +0300
committer: Brecht Van Lommel <brechtvanlommel@gmail.com> 2017-11-05 17:22:04 +0300
commit: 5801ef71e40bc932c69e67f06076cd8b41132e52 (patch)
tree: 1fec0c65b9ead5720f54229497060af1f0194bbb /intern/cycles/device/device_cuda.cpp
parent: 5475314f4955dbc3af305577a26fe0b537380313 (diff)
1 files changed, 65 insertions, 59 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 59d4fb055d0..4ab3cb9da75 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -128,20 +128,26 @@ public:
 	CUdevice cuDevice;
 	CUcontext cuContext;
 	CUmodule cuModule, cuFilterModule;
-	map<device_ptr, bool> tex_interp_map;
-	map<device_ptr, CUtexObject> tex_bindless_map;
 	int cuDevId;
 	int cuDevArchitecture;
 	bool first_error;
 	CUDASplitKernel *split_kernel;
 
+	struct CUDAMem {
+		CUDAMem()
+		: texobject(0), array(0) {}
+
+		CUtexObject texobject;
+		CUarray array;
+	};
+	map<device_memory*, CUDAMem> cuda_mem_map;
+
 	struct PixelMem {
 		GLuint cuPBO;
 		CUgraphicsResource cuPBOresource;
 		GLuint cuTexId;
 		int w, h;
 	};
-
 	map<device_ptr, PixelMem> pixel_mem_map;
 
 	/* Bindless Textures */
@@ -615,7 +621,7 @@ public:
 		}
 	}
 
-	void generic_alloc(device_memory& mem, size_t padding = 0)
+	CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0)
 	{
 		CUDAContextScope scope(this);
 
@@ -625,19 +631,28 @@ public:
 					<< string_human_readable_size(mem.memory_size()) << ")";
 		}
 
-		CUdeviceptr device_pointer;
+		/* Allocate memory on device. */
+		CUdeviceptr device_pointer = 0;
 		size_t size = mem.memory_size();
 		cuda_assert(cuMemAlloc(&device_pointer, size + padding));
 		mem.device_pointer = (device_ptr)device_pointer;
 		mem.device_size = size;
 		stats.mem_alloc(size);
+
+		if(!mem.device_pointer) {
+			return NULL;
+		}
+
+		/* Insert into map of allocations. */
+		CUDAMem *cmem = &cuda_mem_map[&mem];
+		return cmem;
 	}
 
 	void generic_copy_to(device_memory& mem)
 	{
 		if(mem.device_pointer) {
 			CUDAContextScope scope(this);
-			cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
+			cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
 		}
 	}
 
@@ -648,10 +663,11 @@ public:
 
 			cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
 
-			mem.device_pointer = 0;
-
 			stats.mem_free(mem.device_size);
+			mem.device_pointer = 0;
 			mem.device_size = 0;
+
+			cuda_mem_map.erase(cuda_mem_map.find(&mem));
 		}
 	}
 
@@ -700,11 +716,11 @@ public:
 			size_t size = elem*w*h;
 
 			if(mem.device_pointer) {
-				cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
+				cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
 										 (CUdeviceptr)(mem.device_pointer + offset), size));
 			}
 			else {
-				memset((char*)mem.data_pointer + offset, 0, size);
+				memset((char*)mem.host_pointer + offset, 0, size);
 			}
 		}
 	}
@@ -715,8 +731,8 @@ public:
 			mem_alloc(mem);
 		}
 
-		if(mem.data_pointer) {
-			memset((void*)mem.data_pointer, 0, mem.memory_size());
+		if(mem.host_pointer) {
+			memset(mem.host_pointer, 0, mem.memory_size());
 		}
 
 		if(mem.device_pointer) {
@@ -814,8 +830,6 @@ public:
 				uint32_t ptr = (uint32_t)mem.device_pointer;
 				cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
 			}
-
-			tex_interp_map[mem.device_pointer] = false;
 			return;
 		}
 
@@ -851,7 +865,7 @@ public:
 			default: assert(0); return;
 		}
 
-
+		CUDAMem *cmem = NULL;
 		CUarray array_3d = NULL;
 		size_t src_pitch = mem.data_width * dsize * mem.data_elements;
 		size_t dst_pitch = src_pitch;
@@ -878,7 +892,7 @@ public:
 			param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
 			param.dstArray = array_3d;
 			param.srcMemoryType = CU_MEMORYTYPE_HOST;
-			param.srcHost = (void*)mem.data_pointer;
+			param.srcHost = mem.host_pointer;
 			param.srcPitch = src_pitch;
 			param.WidthInBytes = param.srcPitch;
 			param.Height = mem.data_height;
@@ -889,6 +903,10 @@ public:
 			mem.device_pointer = (device_ptr)array_3d;
 			mem.device_size = size;
 			stats.mem_alloc(size);
+
+			cmem = &cuda_mem_map[&mem];
+			cmem->texobject = 0;
+			cmem->array = array_3d;
 		}
 		else if(mem.data_height > 1) {
 			/* 2D texture, using pitch aligned linear memory. */
@@ -897,7 +915,10 @@ public:
 			dst_pitch = align_up(src_pitch, alignment);
 			size_t dst_size = dst_pitch * mem.data_height;
 
-			generic_alloc(mem, dst_size - mem.memory_size());
+			cmem = generic_alloc(mem, dst_size - mem.memory_size());
+			if(!cmem) {
+				return;
+			}
 
 			CUDA_MEMCPY2D param;
 			memset(&param, 0, sizeof(param));
@@ -905,7 +926,7 @@ public:
 			param.dstDevice = mem.device_pointer;
 			param.dstPitch = dst_pitch;
 			param.srcMemoryType = CU_MEMORYTYPE_HOST;
-			param.srcHost = (void*)mem.data_pointer;
+			param.srcHost = mem.host_pointer;
 			param.srcPitch = src_pitch;
 			param.WidthInBytes = param.srcPitch;
 			param.Height = mem.data_height;
@@ -914,8 +935,12 @@ public:
 		}
 		else {
 			/* 1D texture, using linear memory. */
-			generic_alloc(mem);
-			cuda_assert(cuMemcpyHtoD(mem.device_pointer, (void*)mem.data_pointer, size));
+			cmem = generic_alloc(mem);
+			if(!cmem) {
+				return;
+			}
+
+			cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
 		}
 
 		if(!has_fermi_limits) {
@@ -932,7 +957,7 @@ public:
 			CUDA_RESOURCE_DESC resDesc;
 			memset(&resDesc, 0, sizeof(resDesc));
 
-			if(mem.data_depth > 1) {
+			if(array_3d) {
 				resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
 				resDesc.res.array.hArray = array_3d;
 				resDesc.flags = 0;
@@ -962,13 +987,7 @@ public:
 			texDesc.filterMode = filter_mode;
 			texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
 
-			CUtexObject tex = 0;
-			cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL));
-
-			/* Safety check */
-			if((uint)tex > UINT_MAX) {
-				assert(0);
-			}
+			cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
 
 			/* Resize once */
 			if(flat_slot >= texture_info.size()) {
@@ -979,20 +998,18 @@ public:
 
 			/* Set Mapping and tag that we need to (re-)upload to device */
 			TextureInfo& info = texture_info[flat_slot];
-			info.data = (uint64_t)tex;
+			info.data = (uint64_t)cmem->texobject;
 			info.cl_buffer = 0;
 			info.interpolation = mem.interpolation;
 			info.extension = mem.extension;
 			info.width = mem.data_width;
 			info.height = mem.data_height;
 			info.depth = mem.data_depth;
-
-			tex_bindless_map[mem.device_pointer] = tex;
 			need_texture_info = true;
 		}
 		else {
 			/* Fermi, fixed texture slots. */
-			if(mem.data_depth > 1) {
+			if(array_3d) {
 				cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
 			}
 			else if(mem.data_height > 1) {
@@ -1017,38 +1034,27 @@ public:
 				cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
 			}
 		}
-
-		/* Fermi and Kepler */
-		tex_interp_map[mem.device_pointer] = true;
 	}
 
 	void tex_free(device_memory& mem)
 	{
 		if(mem.device_pointer) {
-			bool interp = tex_interp_map[mem.device_pointer];
-			tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
+			CUDAContextScope scope(this);
+			const CUDAMem& cmem = cuda_mem_map[&mem];
 
-			if(interp) {
-				CUDAContextScope scope(this);
+			if(cmem.texobject) {
+				/* Free bindless texture. */
+				cuTexObjectDestroy(cmem.texobject);
+			}
 
-				if(!info.has_fermi_limits) {
-					/* Free bindless texture. */
-					if(tex_bindless_map[mem.device_pointer]) {
-						CUtexObject tex = tex_bindless_map[mem.device_pointer];
-						cuTexObjectDestroy(tex);
-					}
-				}
+			if(cmem.array) {
+				/* Free array. */
+				cuArrayDestroy(cmem.array);
+				stats.mem_free(mem.device_size);
+				mem.device_pointer = 0;
+				mem.device_size = 0;
 
-				if(mem.data_depth > 1) {
-					/* Free array. */
-					cuArrayDestroy((CUarray)mem.device_pointer);
-					stats.mem_free(mem.device_size);
-					mem.device_pointer = 0;
-					mem.device_size = 0;
-				}
-				else {
-					generic_free(mem);
-				}
+				cuda_mem_map.erase(cuda_mem_map.find(&mem));
 			}
 			else {
 				generic_free(mem);
@@ -1058,7 +1064,7 @@ public:
 
 	bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
 	{
-		TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
+		TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
 		for(int i = 0; i < 9; i++) {
 			tiles->buffers[i] = buffers[i];
 		}
@@ -1455,7 +1461,7 @@ public:
 		/* Allocate work tile. */
 		work_tiles.alloc(1);
 
-		WorkTile *wtile = work_tiles.get_data();
+		WorkTile *wtile = work_tiles.data();
 		wtile->x = rtile.x;
 		wtile->y = rtile.y;
 		wtile->w = rtile.w;
@@ -1716,7 +1722,7 @@ public:
 		glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
 		uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
 		size_t offset = sizeof(uchar)*4*y*w;
-		memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
+		memcpy((uchar*)mem.host_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
 		glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
 		glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
 	}
author	Brecht Van Lommel <brechtvanlommel@gmail.com>	2017-11-05 02:34:30 +0300
committer	Brecht Van Lommel <brechtvanlommel@gmail.com>	2017-11-05 17:22:04 +0300
commit	5801ef71e40bc932c69e67f06076cd8b41132e52 (patch)
tree	1fec0c65b9ead5720f54229497060af1f0194bbb /intern/cycles/device/device_cuda.cpp
parent	5475314f4955dbc3af305577a26fe0b537380313 (diff)