From 7ad9333fad25b9a7cabea0d659eaf724f89912c8 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Fri, 20 Oct 2017 23:31:13 +0200 Subject: Code refactor: store device/interp/extension/type in each device_memory. --- intern/cycles/device/device_cuda.cpp | 58 +++++++++++++++++------------------- 1 file changed, 28 insertions(+), 30 deletions(-) (limited to 'intern/cycles/device/device_cuda.cpp') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 0f17b67c8c6..1295ec86355 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -217,7 +217,8 @@ public: } CUDADevice(DeviceInfo& info, Stats &stats, bool background_) - : Device(info, stats, background_) + : Device(info, stats, background_), + texture_info(this, "__texture_info") { first_error = true; background = background_; @@ -548,17 +549,17 @@ public: { if(info.has_bindless_textures && need_texture_info) { tex_free(texture_info); - tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT); + tex_alloc(texture_info); need_texture_info = false; } } - void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/) + void mem_alloc(device_memory& mem) { CUDAContextScope scope(this); - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -619,7 +620,7 @@ public: } } - virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/) + virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/) { return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset)); } @@ -635,14 +636,11 @@ public: cuda_assert(cuMemcpyHtoD(mem, host, size)); } - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) + void tex_alloc(device_memory& mem) { CUDAContextScope scope(this); - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; @@ -650,12 +648,12 @@ public: bool has_bindless_textures = info.has_bindless_textures; /* General variables for both architectures */ - string bind_name = name; + string bind_name = mem.name; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP; - switch(extension) { + switch(mem.extension) { case EXTENSION_REPEAT: address_mode = CU_TR_ADDRESS_MODE_WRAP; break; @@ -671,7 +669,7 @@ public: } CUfilter_mode filter_mode; - if(interpolation == INTERPOLATION_CLOSEST) { + if(mem.interpolation == INTERPOLATION_CLOSEST) { filter_mode = CU_TR_FILTER_MODE_POINT; } else { @@ -681,13 +679,13 @@ public: /* General variables for Fermi */ CUtexref texref = NULL; - if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) { + if(!has_bindless_textures && mem.interpolation != INTERPOLATION_NONE) { if(mem.data_depth > 1) { /* Kernel uses different bind names for 2d and 3d float textures, * so we have to adjust couple of things here. */ vector tokens; - string_split(tokens, name, "_"); + string_split(tokens, mem.name, "_"); bind_name = string_printf("__tex_image_%s_3d_%s", tokens[2].c_str(), tokens[3].c_str()); @@ -700,9 +698,9 @@ public: } } - if(interpolation == INTERPOLATION_NONE) { + if(mem.interpolation == INTERPOLATION_NONE) { /* Data Storage */ - mem_alloc(NULL, mem, MEM_READ_ONLY); + mem_alloc(mem); mem_copy_to(mem); CUdeviceptr cumem; @@ -802,9 +800,9 @@ public: if(has_bindless_textures) { /* Bindless Textures - Kepler */ int flat_slot = 0; - if(string_startswith(name, "__tex_image")) { - int pos = string(name).rfind("_"); - flat_slot = atoi(name + pos + 1); + if(string_startswith(mem.name, "__tex_image")) { + int pos = string(mem.name).rfind("_"); + flat_slot = atoi(mem.name + pos + 1); } else { assert(0); @@ -843,8 +841,8 @@ public: TextureInfo& info = texture_info[flat_slot]; info.data = (uint64_t)tex; info.cl_buffer = 0; - info.interpolation = interpolation; - info.extension = extension; + info.interpolation = mem.interpolation; + info.extension = mem.extension; info.width = mem.data_width; info.height = mem.data_height; info.depth = mem.data_depth; @@ -869,7 +867,7 @@ public: } /* Fermi and Kepler */ - tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE); + tex_interp_map[mem.device_pointer] = (mem.interpolation != INTERPOLATION_NONE); } void tex_free(device_memory& mem) @@ -900,7 +898,7 @@ public: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY); + mem_alloc(task->tiles_mem); TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { @@ -1297,7 +1295,7 @@ public: cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); /* Allocate work tile. */ - device_vector work_tiles; + device_vector work_tiles(this, "work_tiles", MEM_READ_ONLY); work_tiles.resize(1); WorkTile *wtile = work_tiles.get_data(); @@ -1308,7 +1306,7 @@ public: wtile->offset = rtile.offset; wtile->stride = rtile.stride; wtile->buffer = (float*)cuda_device_ptr(rtile.buffer); - mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY); + mem_alloc(work_tiles); CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); @@ -1730,7 +1728,7 @@ public: while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel()) { - device_memory void_buffer; + device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY); split_kernel->path_trace(task, tile, void_buffer, void_buffer); } else { @@ -1885,9 +1883,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory { CUDAContextScope scope(device); - device_vector size_buffer; + device_vector size_buffer(device, "size_buffer", MEM_READ_WRITE); size_buffer.resize(1); - device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); + device->mem_alloc(size_buffer); uint threads = num_threads; CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); -- cgit v1.2.3 From aa8b4c5d8124c0379eeee9eacd1a0887a573d7d7 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Mon, 23 Oct 2017 19:32:59 +0200 Subject: Code refactor: use device_only_memory and device_vector in more places. --- intern/cycles/device/device_cuda.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'intern/cycles/device/device_cuda.cpp') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 1295ec86355..be606a92434 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1728,7 +1728,7 @@ public: while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel()) { - device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY); + device_only_memory void_buffer(this, "void_buffer"); split_kernel->path_trace(task, tile, void_buffer, void_buffer); } else { -- cgit v1.2.3 From 070a668d04844610059aaedc80c49e9038fd1779 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sat, 21 Oct 2017 01:09:59 +0200 Subject: Code refactor: move more memory allocation logic into device API. * Remove tex_* and pixels_* functions, replace by mem_*. * Add MEM_TEXTURE and MEM_PIXELS as memory types recognized by devices. * No longer create device_memory and call mem_* directly, always go through device_only_memory, device_vector and device_pixels. --- intern/cycles/device/device_cuda.cpp | 263 ++++++++++++++++++++--------------- 1 file changed, 151 insertions(+), 112 deletions(-) (limited to 'intern/cycles/device/device_cuda.cpp') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index be606a92434..aa6386e455b 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -218,7 +218,7 @@ public: CUDADevice(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_), - texture_info(this, "__texture_info") + texture_info(this, "__texture_info", MEM_TEXTURE) { first_error = true; background = background_; @@ -275,7 +275,7 @@ public: delete split_kernel; if(info.has_bindless_textures) { - tex_free(texture_info); + texture_info.free(); } cuda_assert(cuCtxDestroy(cuContext)); @@ -548,20 +548,19 @@ public: void load_texture_info() { if(info.has_bindless_textures && need_texture_info) { - tex_free(texture_info); - tex_alloc(texture_info); + texture_info.copy_to_device(); need_texture_info = false; } } - void mem_alloc(device_memory& mem) + void generic_alloc(device_memory& mem) { CUDAContextScope scope(this); if(mem.name) { VLOG(1) << "Buffer allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; } CUdeviceptr device_pointer; @@ -572,31 +571,88 @@ public: stats.mem_alloc(size); } + void generic_copy_to(device_memory& mem) + { + if(mem.device_pointer) { + CUDAContextScope scope(this); + cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); + } + } + + void generic_free(device_memory& mem) + { + if(mem.device_pointer) { + CUDAContextScope scope(this); + + cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); + + mem.device_pointer = 0; + + stats.mem_free(mem.device_size); + mem.device_size = 0; + } + } + + void mem_alloc(device_memory& mem) + { + if(mem.type == MEM_PIXELS && !background) { + pixels_alloc(mem); + } + else if(mem.type == MEM_TEXTURE) { + assert(!"mem_alloc not supported for textures."); + } + else { + generic_alloc(mem); + } + } + void mem_copy_to(device_memory& mem) { - CUDAContextScope scope(this); + if(mem.type == MEM_PIXELS) { + assert(!"mem_copy_to not supported for pixels."); + } + else if(mem.type == MEM_TEXTURE) { + tex_free(mem); + tex_alloc(mem); + } + else { + if(!mem.device_pointer) { + generic_alloc(mem); + } - if(mem.device_pointer) - cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); + generic_copy_to(mem); + } } void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) { - CUDAContextScope scope(this); - size_t offset = elem*y*w; - size_t size = elem*w*h; - - if(mem.device_pointer) { - cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, - (CUdeviceptr)(mem.device_pointer + offset), size)); + if(mem.type == MEM_PIXELS && !background) { + pixels_copy_from(mem, y, w, h); + } + else if(mem.type == MEM_TEXTURE) { + assert(!"mem_copy_from not supported for textures."); } else { - memset((char*)mem.data_pointer + offset, 0, size); + CUDAContextScope scope(this); + size_t offset = elem*y*w; + size_t size = elem*w*h; + + if(mem.device_pointer) { + cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, + (CUdeviceptr)(mem.device_pointer + offset), size)); + } + else { + memset((char*)mem.data_pointer + offset, 0, size); + } } } void mem_zero(device_memory& mem) { + if(!mem.device_pointer) { + mem_alloc(mem); + } + if(mem.data_pointer) { memset((void*)mem.data_pointer, 0, mem.memory_size()); } @@ -609,14 +665,14 @@ public: void mem_free(device_memory& mem) { - if(mem.device_pointer) { - CUDAContextScope scope(this); - cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); - - mem.device_pointer = 0; - - stats.mem_free(mem.device_size); - mem.device_size = 0; + if(mem.type == MEM_PIXELS && !background) { + pixels_free(mem); + } + else if(mem.type == MEM_TEXTURE) { + tex_free(mem); + } + else { + generic_free(mem); } } @@ -700,8 +756,8 @@ public: if(mem.interpolation == INTERPOLATION_NONE) { /* Data Storage */ - mem_alloc(mem); - mem_copy_to(mem); + generic_alloc(mem); + generic_copy_to(mem); CUdeviceptr cumem; size_t cubytes; @@ -891,21 +947,19 @@ public: } else { tex_interp_map.erase(tex_interp_map.find(mem.device_pointer)); - mem_free(mem); + generic_free(mem); } } } bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc(task->tiles_mem); - TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { tiles->buffers[i] = buffers[i]; } - mem_copy_to(task->tiles_mem); + task->tiles_mem.copy_to_device(); return !have_error(); } @@ -1272,7 +1326,7 @@ public: task.unmap_neighbor_tiles(rtiles, this); } - void path_trace(DeviceTask& task, RenderTile& rtile) + void path_trace(DeviceTask& task, RenderTile& rtile, device_vector& work_tiles) { if(have_error()) return; @@ -1295,8 +1349,7 @@ public: cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); /* Allocate work tile. */ - device_vector work_tiles(this, "work_tiles", MEM_READ_ONLY); - work_tiles.resize(1); + work_tiles.alloc(1); WorkTile *wtile = work_tiles.get_data(); wtile->x = rtile.x; @@ -1306,9 +1359,6 @@ public: wtile->offset = rtile.offset; wtile->stride = rtile.stride; wtile->buffer = (float*)cuda_device_ptr(rtile.buffer); - mem_alloc(work_tiles); - - CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); /* Prepare work size. More step samples render faster, but for now we * remain conservative for GPUs connected to a display to avoid driver @@ -1329,8 +1379,9 @@ public: /* Setup and copy work tile to device. */ wtile->start_sample = sample; wtile->num_samples = min(step_samples, end_sample - sample);; - mem_copy_to(work_tiles); + work_tiles.copy_to_device(); + CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); uint total_work_size = wtile->w * wtile->h * wtile->num_samples; uint num_blocks = divide_up(total_work_size, num_threads_per_block); @@ -1354,8 +1405,6 @@ public: break; } } - - mem_free(work_tiles); } void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) @@ -1508,104 +1557,90 @@ public: void pixels_alloc(device_memory& mem) { - if(!background) { - PixelMem pmem; - - pmem.w = mem.data_width; - pmem.h = mem.data_height; + PixelMem pmem; - CUDAContextScope scope(this); + pmem.w = mem.data_width; + pmem.h = mem.data_height; - glGenBuffers(1, &pmem.cuPBO); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); - if(mem.data_type == TYPE_HALF) - glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW); - else - glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW); + CUDAContextScope scope(this); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glGenBuffers(1, &pmem.cuPBO); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); + if(mem.data_type == TYPE_HALF) + glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW); + else + glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW); - glGenTextures(1, &pmem.cuTexId); - glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); - if(mem.data_type == TYPE_HALF) - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL); - else - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glBindTexture(GL_TEXTURE_2D, 0); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); + glGenTextures(1, &pmem.cuTexId); + glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); + if(mem.data_type == TYPE_HALF) + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL); + else + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glBindTexture(GL_TEXTURE_2D, 0); - if(result == CUDA_SUCCESS) { - mem.device_pointer = pmem.cuTexId; - pixel_mem_map[mem.device_pointer] = pmem; + CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); - mem.device_size = mem.memory_size(); - stats.mem_alloc(mem.device_size); + if(result == CUDA_SUCCESS) { + mem.device_pointer = pmem.cuTexId; + pixel_mem_map[mem.device_pointer] = pmem; - return; - } - else { - /* failed to register buffer, fallback to no interop */ - glDeleteBuffers(1, &pmem.cuPBO); - glDeleteTextures(1, &pmem.cuTexId); + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); - background = true; - } + return; } + else { + /* failed to register buffer, fallback to no interop */ + glDeleteBuffers(1, &pmem.cuPBO); + glDeleteTextures(1, &pmem.cuTexId); - Device::pixels_alloc(mem); + background = true; + } } void pixels_copy_from(device_memory& mem, int y, int w, int h) { - if(!background) { - PixelMem pmem = pixel_mem_map[mem.device_pointer]; - - CUDAContextScope scope(this); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); - uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); - size_t offset = sizeof(uchar)*4*y*w; - memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h); - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + PixelMem pmem = pixel_mem_map[mem.device_pointer]; - return; - } + CUDAContextScope scope(this); - Device::pixels_copy_from(mem, y, w, h); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); + uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); + size_t offset = sizeof(uchar)*4*y*w; + memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h); + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); } void pixels_free(device_memory& mem) { if(mem.device_pointer) { - if(!background) { - PixelMem pmem = pixel_mem_map[mem.device_pointer]; - - CUDAContextScope scope(this); - - cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); - glDeleteBuffers(1, &pmem.cuPBO); - glDeleteTextures(1, &pmem.cuTexId); + PixelMem pmem = pixel_mem_map[mem.device_pointer]; - pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); - mem.device_pointer = 0; + CUDAContextScope scope(this); - stats.mem_free(mem.device_size); - mem.device_size = 0; + cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); + glDeleteBuffers(1, &pmem.cuPBO); + glDeleteTextures(1, &pmem.cuTexId); - return; - } + pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); + mem.device_pointer = 0; - Device::pixels_free(mem); + stats.mem_free(mem.device_size); + mem.device_size = 0; } } void draw_pixels(device_memory& mem, int y, int w, int h, int dx, int dy, int width, int height, bool transparent, const DeviceDrawParams &draw_params) { + assert(mem.type == MEM_PIXELS); + if(!background) { PixelMem pmem = pixel_mem_map[mem.device_pointer]; float *vpointer; @@ -1724,6 +1759,8 @@ public: } } + device_vector work_tiles(this, "work_tiles", MEM_READ_ONLY); + /* keep rendering tiles until done */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { @@ -1732,7 +1769,7 @@ public: split_kernel->path_trace(task, tile, void_buffer, void_buffer); } else { - path_trace(*task, tile); + path_trace(*task, tile, work_tiles); } } else if(tile.task == RenderTile::DENOISE) { @@ -1750,6 +1787,8 @@ public: break; } } + + work_tiles.free(); } else if(task->type == DeviceTask::SHADER) { shader(*task); @@ -1884,8 +1923,8 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory CUDAContextScope scope(device); device_vector size_buffer(device, "size_buffer", MEM_READ_WRITE); - size_buffer.resize(1); - device->mem_alloc(size_buffer); + size_buffer.alloc(1); + size_buffer.zero_to_device(); uint threads = num_threads; CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); @@ -1908,9 +1947,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory 1, 1, 1, 0, 0, (void**)&args, 0)); - device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); + size_buffer.copy_from_device(0, 1, 1); size_t size = size_buffer[0]; - device->mem_free(size_buffer); + size_buffer.free(); return size; } -- cgit v1.2.3