From 5801ef71e40bc932c69e67f06076cd8b41132e52 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 5 Nov 2017 00:34:30 +0100 Subject: Code refactor: device memory cleanups, preparing for mapped host memory. --- intern/cycles/device/device_cuda.cpp | 124 ++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 59 deletions(-) (limited to 'intern/cycles/device/device_cuda.cpp') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 59d4fb055d0..4ab3cb9da75 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -128,20 +128,26 @@ public: CUdevice cuDevice; CUcontext cuContext; CUmodule cuModule, cuFilterModule; - map tex_interp_map; - map tex_bindless_map; int cuDevId; int cuDevArchitecture; bool first_error; CUDASplitKernel *split_kernel; + struct CUDAMem { + CUDAMem() + : texobject(0), array(0) {} + + CUtexObject texobject; + CUarray array; + }; + map cuda_mem_map; + struct PixelMem { GLuint cuPBO; CUgraphicsResource cuPBOresource; GLuint cuTexId; int w, h; }; - map pixel_mem_map; /* Bindless Textures */ @@ -615,7 +621,7 @@ public: } } - void generic_alloc(device_memory& mem, size_t padding = 0) + CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0) { CUDAContextScope scope(this); @@ -625,19 +631,28 @@ public: << string_human_readable_size(mem.memory_size()) << ")"; } - CUdeviceptr device_pointer; + /* Allocate memory on device. */ + CUdeviceptr device_pointer = 0; size_t size = mem.memory_size(); cuda_assert(cuMemAlloc(&device_pointer, size + padding)); mem.device_pointer = (device_ptr)device_pointer; mem.device_size = size; stats.mem_alloc(size); + + if(!mem.device_pointer) { + return NULL; + } + + /* Insert into map of allocations. */ + CUDAMem *cmem = &cuda_mem_map[&mem]; + return cmem; } void generic_copy_to(device_memory& mem) { if(mem.device_pointer) { CUDAContextScope scope(this); - cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); + cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size())); } } @@ -648,10 +663,11 @@ public: cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); - mem.device_pointer = 0; - stats.mem_free(mem.device_size); + mem.device_pointer = 0; mem.device_size = 0; + + cuda_mem_map.erase(cuda_mem_map.find(&mem)); } } @@ -700,11 +716,11 @@ public: size_t size = elem*w*h; if(mem.device_pointer) { - cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, + cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size)); } else { - memset((char*)mem.data_pointer + offset, 0, size); + memset((char*)mem.host_pointer + offset, 0, size); } } } @@ -715,8 +731,8 @@ public: mem_alloc(mem); } - if(mem.data_pointer) { - memset((void*)mem.data_pointer, 0, mem.memory_size()); + if(mem.host_pointer) { + memset(mem.host_pointer, 0, mem.memory_size()); } if(mem.device_pointer) { @@ -814,8 +830,6 @@ public: uint32_t ptr = (uint32_t)mem.device_pointer; cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); } - - tex_interp_map[mem.device_pointer] = false; return; } @@ -851,7 +865,7 @@ public: default: assert(0); return; } - + CUDAMem *cmem = NULL; CUarray array_3d = NULL; size_t src_pitch = mem.data_width * dsize * mem.data_elements; size_t dst_pitch = src_pitch; @@ -878,7 +892,7 @@ public: param.dstMemoryType = CU_MEMORYTYPE_ARRAY; param.dstArray = array_3d; param.srcMemoryType = CU_MEMORYTYPE_HOST; - param.srcHost = (void*)mem.data_pointer; + param.srcHost = mem.host_pointer; param.srcPitch = src_pitch; param.WidthInBytes = param.srcPitch; param.Height = mem.data_height; @@ -889,6 +903,10 @@ public: mem.device_pointer = (device_ptr)array_3d; mem.device_size = size; stats.mem_alloc(size); + + cmem = &cuda_mem_map[&mem]; + cmem->texobject = 0; + cmem->array = array_3d; } else if(mem.data_height > 1) { /* 2D texture, using pitch aligned linear memory. */ @@ -897,7 +915,10 @@ public: dst_pitch = align_up(src_pitch, alignment); size_t dst_size = dst_pitch * mem.data_height; - generic_alloc(mem, dst_size - mem.memory_size()); + cmem = generic_alloc(mem, dst_size - mem.memory_size()); + if(!cmem) { + return; + } CUDA_MEMCPY2D param; memset(¶m, 0, sizeof(param)); @@ -905,7 +926,7 @@ public: param.dstDevice = mem.device_pointer; param.dstPitch = dst_pitch; param.srcMemoryType = CU_MEMORYTYPE_HOST; - param.srcHost = (void*)mem.data_pointer; + param.srcHost = mem.host_pointer; param.srcPitch = src_pitch; param.WidthInBytes = param.srcPitch; param.Height = mem.data_height; @@ -914,8 +935,12 @@ public: } else { /* 1D texture, using linear memory. */ - generic_alloc(mem); - cuda_assert(cuMemcpyHtoD(mem.device_pointer, (void*)mem.data_pointer, size)); + cmem = generic_alloc(mem); + if(!cmem) { + return; + } + + cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size)); } if(!has_fermi_limits) { @@ -932,7 +957,7 @@ public: CUDA_RESOURCE_DESC resDesc; memset(&resDesc, 0, sizeof(resDesc)); - if(mem.data_depth > 1) { + if(array_3d) { resDesc.resType = CU_RESOURCE_TYPE_ARRAY; resDesc.res.array.hArray = array_3d; resDesc.flags = 0; @@ -962,13 +987,7 @@ public: texDesc.filterMode = filter_mode; texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; - CUtexObject tex = 0; - cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL)); - - /* Safety check */ - if((uint)tex > UINT_MAX) { - assert(0); - } + cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); /* Resize once */ if(flat_slot >= texture_info.size()) { @@ -979,20 +998,18 @@ public: /* Set Mapping and tag that we need to (re-)upload to device */ TextureInfo& info = texture_info[flat_slot]; - info.data = (uint64_t)tex; + info.data = (uint64_t)cmem->texobject; info.cl_buffer = 0; info.interpolation = mem.interpolation; info.extension = mem.extension; info.width = mem.data_width; info.height = mem.data_height; info.depth = mem.data_depth; - - tex_bindless_map[mem.device_pointer] = tex; need_texture_info = true; } else { /* Fermi, fixed texture slots. */ - if(mem.data_depth > 1) { + if(array_3d) { cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT)); } else if(mem.data_height > 1) { @@ -1017,38 +1034,27 @@ public: cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode)); } } - - /* Fermi and Kepler */ - tex_interp_map[mem.device_pointer] = true; } void tex_free(device_memory& mem) { if(mem.device_pointer) { - bool interp = tex_interp_map[mem.device_pointer]; - tex_interp_map.erase(tex_interp_map.find(mem.device_pointer)); + CUDAContextScope scope(this); + const CUDAMem& cmem = cuda_mem_map[&mem]; - if(interp) { - CUDAContextScope scope(this); + if(cmem.texobject) { + /* Free bindless texture. */ + cuTexObjectDestroy(cmem.texobject); + } - if(!info.has_fermi_limits) { - /* Free bindless texture. */ - if(tex_bindless_map[mem.device_pointer]) { - CUtexObject tex = tex_bindless_map[mem.device_pointer]; - cuTexObjectDestroy(tex); - } - } + if(cmem.array) { + /* Free array. */ + cuArrayDestroy(cmem.array); + stats.mem_free(mem.device_size); + mem.device_pointer = 0; + mem.device_size = 0; - if(mem.data_depth > 1) { - /* Free array. */ - cuArrayDestroy((CUarray)mem.device_pointer); - stats.mem_free(mem.device_size); - mem.device_pointer = 0; - mem.device_size = 0; - } - else { - generic_free(mem); - } + cuda_mem_map.erase(cuda_mem_map.find(&mem)); } else { generic_free(mem); @@ -1058,7 +1064,7 @@ public: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; + TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer; for(int i = 0; i < 9; i++) { tiles->buffers[i] = buffers[i]; } @@ -1455,7 +1461,7 @@ public: /* Allocate work tile. */ work_tiles.alloc(1); - WorkTile *wtile = work_tiles.get_data(); + WorkTile *wtile = work_tiles.data(); wtile->x = rtile.x; wtile->y = rtile.y; wtile->w = rtile.w; @@ -1716,7 +1722,7 @@ public: glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); size_t offset = sizeof(uchar)*4*y*w; - memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h); + memcpy((uchar*)mem.host_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h); glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); } -- cgit v1.2.3