From 070a668d04844610059aaedc80c49e9038fd1779 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sat, 21 Oct 2017 01:09:59 +0200 Subject: Code refactor: move more memory allocation logic into device API. * Remove tex_* and pixels_* functions, replace by mem_*. * Add MEM_TEXTURE and MEM_PIXELS as memory types recognized by devices. * No longer create device_memory and call mem_* directly, always go through device_only_memory, device_vector and device_pixels. --- intern/cycles/device/device.cpp | 22 +-- intern/cycles/device/device.h | 32 ++- intern/cycles/device/device_cpu.cpp | 76 ++++--- intern/cycles/device/device_cuda.cpp | 263 ++++++++++++++----------- intern/cycles/device/device_denoising.cpp | 44 ++--- intern/cycles/device/device_memory.cpp | 62 ++++++ intern/cycles/device/device_memory.h | 240 ++++++++++++++++------ intern/cycles/device/device_multi.cpp | 161 ++++++--------- intern/cycles/device/device_network.cpp | 139 +++++-------- intern/cycles/device/device_network.h | 5 + intern/cycles/device/device_split_kernel.cpp | 37 ++-- intern/cycles/device/opencl/memory_manager.cpp | 8 +- intern/cycles/device/opencl/opencl_base.cpp | 75 ++++--- intern/cycles/device/opencl/opencl_split.cpp | 13 +- 14 files changed, 662 insertions(+), 515 deletions(-) (limited to 'intern/cycles/device') diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 9de10c184fb..41fbe7ce81b 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -85,28 +85,12 @@ Device::~Device() } } -void Device::pixels_alloc(device_memory& mem) -{ - mem_alloc(mem); -} - -void Device::pixels_copy_from(device_memory& mem, int y, int w, int h) -{ - if(mem.data_type == TYPE_HALF) - mem_copy_from(mem, y, w, h, sizeof(half4)); - else - mem_copy_from(mem, y, w, h, sizeof(uchar4)); -} - -void Device::pixels_free(device_memory& mem) -{ - mem_free(mem); -} - void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int dy, int width, int height, bool transparent, const DeviceDrawParams &draw_params) { - pixels_copy_from(rgba, y, w, h); + assert(mem.type == MEM_PIXELS); + + mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1)); if(transparent) { glEnable(GL_BLEND); diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 6bb65cde2a3..316bf70a5c3 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -281,28 +281,12 @@ public: /* statistics */ Stats &stats; - /* regular memory */ - virtual void mem_alloc(device_memory& mem) = 0; - virtual void mem_copy_to(device_memory& mem) = 0; - virtual void mem_copy_from(device_memory& mem, - int y, int w, int h, int elem) = 0; - virtual void mem_zero(device_memory& mem) = 0; - virtual void mem_free(device_memory& mem) = 0; - + /* memory alignment */ virtual int mem_address_alignment() { return 16; } /* constant memory */ virtual void const_copy_to(const char *name, void *host, size_t size) = 0; - /* texture memory */ - virtual void tex_alloc(device_memory& /*mem*/) {}; - virtual void tex_free(device_memory& /*mem*/) {}; - - /* pixel memory */ - virtual void pixels_alloc(device_memory& mem); - virtual void pixels_copy_from(device_memory& mem, int y, int w, int h); - virtual void pixels_free(device_memory& mem); - /* open shading language, only for CPU device */ virtual void *osl_memory() { return NULL; } @@ -349,6 +333,20 @@ public: static void tag_update(); static void free_memory(); + +protected: + /* Memory allocation, only accessed through device_memory. */ + friend class MultiDevice; + friend class DeviceServer; + friend class device_memory; + + virtual void mem_alloc(device_memory& mem) = 0; + virtual void mem_copy_to(device_memory& mem) = 0; + virtual void mem_copy_from(device_memory& mem, + int y, int w, int h, int elem) = 0; + virtual void mem_zero(device_memory& mem) = 0; + virtual void mem_free(device_memory& mem) = 0; + private: /* Indicted whether device types and devices lists were initialized. */ static bool need_types_update, need_devices_update; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index b4398f21014..32ab18fe164 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -209,7 +209,7 @@ public: CPUDevice(DeviceInfo& info_, Stats &stats_, bool background_) : Device(info_, stats_, background_), - texture_info(this, "__texture_info"), + texture_info(this, "__texture_info", MEM_TEXTURE), #define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name)) REGISTER_KERNEL(path_trace), REGISTER_KERNEL(convert_to_half_float), @@ -269,7 +269,7 @@ public: ~CPUDevice() { task_pool.stop(); - tex_free(texture_info); + texture_info.free(); } virtual bool show_samples() const @@ -280,33 +280,50 @@ public: void load_texture_info() { if(need_texture_info) { - tex_free(texture_info); - tex_alloc(texture_info); + texture_info.copy_to_device(); need_texture_info = false; } } void mem_alloc(device_memory& mem) { - if(mem.name) { - VLOG(1) << "Buffer allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; + if(mem.type == MEM_TEXTURE) { + assert(!"mem_alloc not supported for textures."); } + else { + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + } - mem.device_pointer = mem.data_pointer; + mem.device_pointer = mem.data_pointer; - if(!mem.device_pointer) { - mem.device_pointer = (device_ptr)malloc(mem.memory_size()); - } + if(!mem.device_pointer) { + mem.device_pointer = (device_ptr)malloc(mem.memory_size()); + } - mem.device_size = mem.memory_size(); - stats.mem_alloc(mem.device_size); + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); + } } - void mem_copy_to(device_memory& /*mem*/) + void mem_copy_to(device_memory& mem) { - /* no-op */ + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + tex_alloc(mem); + } + else if(mem.type == MEM_PIXELS) { + assert(!"mem_copy_to not supported for pixels."); + } + else { + if(!mem.device_pointer) { + mem_alloc(mem); + } + + /* copy is no-op */ + } } void mem_copy_from(device_memory& /*mem*/, @@ -318,12 +335,21 @@ public: void mem_zero(device_memory& mem) { - memset((void*)mem.device_pointer, 0, mem.memory_size()); + if(!mem.device_pointer) { + mem_alloc(mem); + } + + if(mem.device_pointer) { + memset((void*)mem.device_pointer, 0, mem.memory_size()); + } } void mem_free(device_memory& mem) { - if(mem.device_pointer) { + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + } + else if(mem.device_pointer) { if(!mem.data_pointer) { free((void*)mem.device_pointer); } @@ -354,7 +380,7 @@ public: kernel_tex_copy(&kernel_globals, mem.name, mem.data_pointer, - mem.data_width); + mem.data_size); } else { /* Image Texture. */ @@ -431,13 +457,13 @@ public: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc(task->tiles_mem); - TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { tiles->buffers[i] = buffers[i]; } + task->tiles_mem.copy_to_device(); + return true; } @@ -723,8 +749,7 @@ public: /* allocate buffer for kernel globals */ device_only_memory kgbuffer(this, "kernel_globals"); - kgbuffer.resize(1); - mem_alloc(kgbuffer); + kgbuffer.alloc_to_device(1); KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init()); @@ -734,8 +759,7 @@ public: requested_features.max_closure = MAX_CLOSURE; if(!split_kernel->load_kernels(requested_features)) { thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer); - mem_free(kgbuffer); - + kgbuffer.free(); delete split_kernel; return; } @@ -766,7 +790,7 @@ public: thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer); kg->~KernelGlobals(); - mem_free(kgbuffer); + kgbuffer.free(); delete split_kernel; } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index be606a92434..aa6386e455b 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -218,7 +218,7 @@ public: CUDADevice(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_), - texture_info(this, "__texture_info") + texture_info(this, "__texture_info", MEM_TEXTURE) { first_error = true; background = background_; @@ -275,7 +275,7 @@ public: delete split_kernel; if(info.has_bindless_textures) { - tex_free(texture_info); + texture_info.free(); } cuda_assert(cuCtxDestroy(cuContext)); @@ -548,20 +548,19 @@ public: void load_texture_info() { if(info.has_bindless_textures && need_texture_info) { - tex_free(texture_info); - tex_alloc(texture_info); + texture_info.copy_to_device(); need_texture_info = false; } } - void mem_alloc(device_memory& mem) + void generic_alloc(device_memory& mem) { CUDAContextScope scope(this); if(mem.name) { VLOG(1) << "Buffer allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; } CUdeviceptr device_pointer; @@ -572,31 +571,88 @@ public: stats.mem_alloc(size); } + void generic_copy_to(device_memory& mem) + { + if(mem.device_pointer) { + CUDAContextScope scope(this); + cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); + } + } + + void generic_free(device_memory& mem) + { + if(mem.device_pointer) { + CUDAContextScope scope(this); + + cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); + + mem.device_pointer = 0; + + stats.mem_free(mem.device_size); + mem.device_size = 0; + } + } + + void mem_alloc(device_memory& mem) + { + if(mem.type == MEM_PIXELS && !background) { + pixels_alloc(mem); + } + else if(mem.type == MEM_TEXTURE) { + assert(!"mem_alloc not supported for textures."); + } + else { + generic_alloc(mem); + } + } + void mem_copy_to(device_memory& mem) { - CUDAContextScope scope(this); + if(mem.type == MEM_PIXELS) { + assert(!"mem_copy_to not supported for pixels."); + } + else if(mem.type == MEM_TEXTURE) { + tex_free(mem); + tex_alloc(mem); + } + else { + if(!mem.device_pointer) { + generic_alloc(mem); + } - if(mem.device_pointer) - cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); + generic_copy_to(mem); + } } void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) { - CUDAContextScope scope(this); - size_t offset = elem*y*w; - size_t size = elem*w*h; - - if(mem.device_pointer) { - cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, - (CUdeviceptr)(mem.device_pointer + offset), size)); + if(mem.type == MEM_PIXELS && !background) { + pixels_copy_from(mem, y, w, h); + } + else if(mem.type == MEM_TEXTURE) { + assert(!"mem_copy_from not supported for textures."); } else { - memset((char*)mem.data_pointer + offset, 0, size); + CUDAContextScope scope(this); + size_t offset = elem*y*w; + size_t size = elem*w*h; + + if(mem.device_pointer) { + cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, + (CUdeviceptr)(mem.device_pointer + offset), size)); + } + else { + memset((char*)mem.data_pointer + offset, 0, size); + } } } void mem_zero(device_memory& mem) { + if(!mem.device_pointer) { + mem_alloc(mem); + } + if(mem.data_pointer) { memset((void*)mem.data_pointer, 0, mem.memory_size()); } @@ -609,14 +665,14 @@ public: void mem_free(device_memory& mem) { - if(mem.device_pointer) { - CUDAContextScope scope(this); - cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); - - mem.device_pointer = 0; - - stats.mem_free(mem.device_size); - mem.device_size = 0; + if(mem.type == MEM_PIXELS && !background) { + pixels_free(mem); + } + else if(mem.type == MEM_TEXTURE) { + tex_free(mem); + } + else { + generic_free(mem); } } @@ -700,8 +756,8 @@ public: if(mem.interpolation == INTERPOLATION_NONE) { /* Data Storage */ - mem_alloc(mem); - mem_copy_to(mem); + generic_alloc(mem); + generic_copy_to(mem); CUdeviceptr cumem; size_t cubytes; @@ -891,21 +947,19 @@ public: } else { tex_interp_map.erase(tex_interp_map.find(mem.device_pointer)); - mem_free(mem); + generic_free(mem); } } } bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc(task->tiles_mem); - TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { tiles->buffers[i] = buffers[i]; } - mem_copy_to(task->tiles_mem); + task->tiles_mem.copy_to_device(); return !have_error(); } @@ -1272,7 +1326,7 @@ public: task.unmap_neighbor_tiles(rtiles, this); } - void path_trace(DeviceTask& task, RenderTile& rtile) + void path_trace(DeviceTask& task, RenderTile& rtile, device_vector& work_tiles) { if(have_error()) return; @@ -1295,8 +1349,7 @@ public: cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); /* Allocate work tile. */ - device_vector work_tiles(this, "work_tiles", MEM_READ_ONLY); - work_tiles.resize(1); + work_tiles.alloc(1); WorkTile *wtile = work_tiles.get_data(); wtile->x = rtile.x; @@ -1306,9 +1359,6 @@ public: wtile->offset = rtile.offset; wtile->stride = rtile.stride; wtile->buffer = (float*)cuda_device_ptr(rtile.buffer); - mem_alloc(work_tiles); - - CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); /* Prepare work size. More step samples render faster, but for now we * remain conservative for GPUs connected to a display to avoid driver @@ -1329,8 +1379,9 @@ public: /* Setup and copy work tile to device. */ wtile->start_sample = sample; wtile->num_samples = min(step_samples, end_sample - sample);; - mem_copy_to(work_tiles); + work_tiles.copy_to_device(); + CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); uint total_work_size = wtile->w * wtile->h * wtile->num_samples; uint num_blocks = divide_up(total_work_size, num_threads_per_block); @@ -1354,8 +1405,6 @@ public: break; } } - - mem_free(work_tiles); } void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) @@ -1508,104 +1557,90 @@ public: void pixels_alloc(device_memory& mem) { - if(!background) { - PixelMem pmem; - - pmem.w = mem.data_width; - pmem.h = mem.data_height; + PixelMem pmem; - CUDAContextScope scope(this); + pmem.w = mem.data_width; + pmem.h = mem.data_height; - glGenBuffers(1, &pmem.cuPBO); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); - if(mem.data_type == TYPE_HALF) - glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW); - else - glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW); + CUDAContextScope scope(this); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glGenBuffers(1, &pmem.cuPBO); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); + if(mem.data_type == TYPE_HALF) + glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW); + else + glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW); - glGenTextures(1, &pmem.cuTexId); - glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); - if(mem.data_type == TYPE_HALF) - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL); - else - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glBindTexture(GL_TEXTURE_2D, 0); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); + glGenTextures(1, &pmem.cuTexId); + glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); + if(mem.data_type == TYPE_HALF) + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL); + else + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glBindTexture(GL_TEXTURE_2D, 0); - if(result == CUDA_SUCCESS) { - mem.device_pointer = pmem.cuTexId; - pixel_mem_map[mem.device_pointer] = pmem; + CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); - mem.device_size = mem.memory_size(); - stats.mem_alloc(mem.device_size); + if(result == CUDA_SUCCESS) { + mem.device_pointer = pmem.cuTexId; + pixel_mem_map[mem.device_pointer] = pmem; - return; - } - else { - /* failed to register buffer, fallback to no interop */ - glDeleteBuffers(1, &pmem.cuPBO); - glDeleteTextures(1, &pmem.cuTexId); + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); - background = true; - } + return; } + else { + /* failed to register buffer, fallback to no interop */ + glDeleteBuffers(1, &pmem.cuPBO); + glDeleteTextures(1, &pmem.cuTexId); - Device::pixels_alloc(mem); + background = true; + } } void pixels_copy_from(device_memory& mem, int y, int w, int h) { - if(!background) { - PixelMem pmem = pixel_mem_map[mem.device_pointer]; - - CUDAContextScope scope(this); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); - uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); - size_t offset = sizeof(uchar)*4*y*w; - memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h); - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + PixelMem pmem = pixel_mem_map[mem.device_pointer]; - return; - } + CUDAContextScope scope(this); - Device::pixels_copy_from(mem, y, w, h); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); + uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); + size_t offset = sizeof(uchar)*4*y*w; + memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h); + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); } void pixels_free(device_memory& mem) { if(mem.device_pointer) { - if(!background) { - PixelMem pmem = pixel_mem_map[mem.device_pointer]; - - CUDAContextScope scope(this); - - cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); - glDeleteBuffers(1, &pmem.cuPBO); - glDeleteTextures(1, &pmem.cuTexId); + PixelMem pmem = pixel_mem_map[mem.device_pointer]; - pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); - mem.device_pointer = 0; + CUDAContextScope scope(this); - stats.mem_free(mem.device_size); - mem.device_size = 0; + cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); + glDeleteBuffers(1, &pmem.cuPBO); + glDeleteTextures(1, &pmem.cuTexId); - return; - } + pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); + mem.device_pointer = 0; - Device::pixels_free(mem); + stats.mem_free(mem.device_size); + mem.device_size = 0; } } void draw_pixels(device_memory& mem, int y, int w, int h, int dx, int dy, int width, int height, bool transparent, const DeviceDrawParams &draw_params) { + assert(mem.type == MEM_PIXELS); + if(!background) { PixelMem pmem = pixel_mem_map[mem.device_pointer]; float *vpointer; @@ -1724,6 +1759,8 @@ public: } } + device_vector work_tiles(this, "work_tiles", MEM_READ_ONLY); + /* keep rendering tiles until done */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { @@ -1732,7 +1769,7 @@ public: split_kernel->path_trace(task, tile, void_buffer, void_buffer); } else { - path_trace(*task, tile); + path_trace(*task, tile, work_tiles); } } else if(tile.task == RenderTile::DENOISE) { @@ -1750,6 +1787,8 @@ public: break; } } + + work_tiles.free(); } else if(task->type == DeviceTask::SHADER) { shader(*task); @@ -1884,8 +1923,8 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory CUDAContextScope scope(device); device_vector size_buffer(device, "size_buffer", MEM_READ_WRITE); - size_buffer.resize(1); - device->mem_alloc(size_buffer); + size_buffer.alloc(1); + size_buffer.zero_to_device(); uint threads = num_threads; CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); @@ -1908,9 +1947,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory 1, 1, 1, 0, 0, (void**)&args, 0)); - device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); + size_buffer.copy_from_device(0, 1, 1); size_t size = size_buffer[0]; - device->mem_free(size_buffer); + size_buffer.free(); return size; } diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 2c3bfefd8b0..2d39721e3d3 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -44,7 +44,7 @@ void DenoisingTask::init_from_devicetask(const DeviceTask &task) void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles) { - tiles = (TilesInfo*) tiles_mem.resize(sizeof(TilesInfo)/sizeof(int)); + tiles = (TilesInfo*) tiles_mem.alloc(sizeof(TilesInfo)/sizeof(int)); device_ptr buffers[9]; for(int i = 0; i < 9; i++) { @@ -75,8 +75,7 @@ bool DenoisingTask::run_denoising() buffer.w = align_up(rect.z - rect.x, 4); buffer.h = rect.w - rect.y; buffer.pass_stride = align_up(buffer.w * buffer.h, divide_up(device->mem_address_alignment(), sizeof(float))); - buffer.mem.resize(buffer.pass_stride * buffer.passes); - device->mem_alloc(buffer.mem); + buffer.mem.alloc_to_device(buffer.pass_stride * buffer.passes); device_ptr null_ptr = (device_ptr) 0; @@ -161,8 +160,7 @@ bool DenoisingTask::run_denoising() int num_color_passes = 3; device_only_memory temp_color(device, "Denoising temporary color"); - temp_color.resize(3*buffer.pass_stride); - device->mem_alloc(temp_color); + temp_color.alloc_to_device(3*buffer.pass_stride); for(int pass = 0; pass < num_color_passes; pass++) { device_sub_ptr color_pass(temp_color, pass*buffer.pass_stride, buffer.pass_stride); @@ -177,31 +175,25 @@ bool DenoisingTask::run_denoising() functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); } - device->mem_free(temp_color); + temp_color.free(); } storage.w = filter_area.z; storage.h = filter_area.w; - storage.transform.resize(storage.w*storage.h*TRANSFORM_SIZE); - storage.rank.resize(storage.w*storage.h); - device->mem_alloc(storage.transform); - device->mem_alloc(storage.rank); + storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE); + storage.rank.alloc_to_device(storage.w*storage.h); functions.construct_transform(); device_only_memory temporary_1(device, "Denoising NLM temporary 1"); device_only_memory temporary_2(device, "Denoising NLM temporary 2"); - temporary_1.resize(buffer.w*buffer.h); - temporary_2.resize(buffer.w*buffer.h); - device->mem_alloc(temporary_1); - device->mem_alloc(temporary_2); + temporary_1.alloc_to_device(buffer.w*buffer.h); + temporary_2.alloc_to_device(buffer.w*buffer.h); reconstruction_state.temporary_1_ptr = temporary_1.device_pointer; reconstruction_state.temporary_2_ptr = temporary_2.device_pointer; - storage.XtWX.resize(storage.w*storage.h*XTWX_SIZE); - storage.XtWY.resize(storage.w*storage.h*XTWY_SIZE); - device->mem_alloc(storage.XtWX); - device->mem_alloc(storage.XtWY); + storage.XtWX.alloc_to_device(storage.w*storage.h*XTWX_SIZE); + storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE); reconstruction_state.filter_rect = make_int4(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h); int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x; @@ -218,14 +210,14 @@ bool DenoisingTask::run_denoising() functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr); } - device->mem_free(storage.XtWX); - device->mem_free(storage.XtWY); - device->mem_free(storage.transform); - device->mem_free(storage.rank); - device->mem_free(temporary_1); - device->mem_free(temporary_2); - device->mem_free(buffer.mem); - device->mem_free(tiles_mem); + storage.XtWX.free(); + storage.XtWY.free(); + storage.transform.free(); + storage.rank.free(); + temporary_1.free(); + temporary_2.free(); + buffer.mem.free(); + tiles_mem.free(); return true; } diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp index 98fa638ef8e..9f4f60e7531 100644 --- a/intern/cycles/device/device_memory.cpp +++ b/intern/cycles/device/device_memory.cpp @@ -43,6 +43,68 @@ device_memory::~device_memory() { } +device_ptr device_memory::host_alloc(size_t size) +{ + if(!size) { + return 0; + } + + size_t alignment = device->mem_address_alignment(); + device_ptr ptr = (device_ptr)util_aligned_malloc(size, alignment); + + if(ptr) { + util_guarded_mem_alloc(size); + } + else { + throw std::bad_alloc(); + } + + return ptr; +} + +void device_memory::host_free(device_ptr ptr, size_t size) +{ + if(ptr) { + util_guarded_mem_free(size); + util_aligned_free((void*)ptr); + } +} + +void device_memory::device_alloc() +{ + assert(!device_pointer && type != MEM_TEXTURE); + device->mem_alloc(*this); +} + +void device_memory::device_free() +{ + if(device_pointer) { + device->mem_free(*this); + } +} + +void device_memory::device_copy_to() +{ + assert(type != MEM_PIXELS && type != MEM_WRITE_ONLY); + if(data_size) { + device->mem_copy_to(*this); + } +} + +void device_memory::device_copy_from(int y, int w, int h, int elem) +{ + assert(type != MEM_TEXTURE && type != MEM_READ_ONLY); + device->mem_copy_from(*this, y, w, h, elem); +} + +void device_memory::device_zero() +{ + assert(type != MEM_PIXELS && type != MEM_WRITE_ONLY); + if(data_size) { + device->mem_zero(*this); + } +} + /* Device Sub Ptr */ device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size) diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 3dfecde59d8..7bf8bdc1cea 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -19,14 +19,7 @@ /* Device Memory * - * This file defines data types that can be used in device memory arrays, and - * a device_vector type to store such arrays. - * - * device_vector contains an STL vector, metadata about the data type, - * dimensions, elements, and a device pointer. For the CPU device this is just - * a pointer to the STL vector data, as no copying needs to take place. For - * other devices this is a pointer to device memory, where we will copy memory - * to and from. */ + * Data types for allocating, copying and freeing device memory. */ #include "util/util_debug.h" #include "util/util_half.h" @@ -41,7 +34,9 @@ class Device; enum MemoryType { MEM_READ_ONLY, MEM_WRITE_ONLY, - MEM_READ_WRITE + MEM_READ_WRITE, + MEM_TEXTURE, + MEM_PIXELS }; /* Supported Data Types */ @@ -172,7 +167,10 @@ template<> struct device_type_traits { static const int num_elements = 1; }; -/* Device Memory */ +/* Device Memory + * + * Base class for all device memory. This should not be allocated directly, + * instead the appropriate subclass can be used. */ class device_memory { @@ -182,7 +180,7 @@ public: return elements*data_elements*datatype_size(data_type); } - /* data information */ + /* Data information. */ DataType data_type; int data_elements; device_ptr data_pointer; @@ -196,25 +194,39 @@ public: InterpolationType interpolation; ExtensionType extension; - /* device pointer */ + /* Device pointer. */ Device *device; device_ptr device_pointer; - device_memory(Device *device, const char *name, MemoryType type); virtual ~device_memory(); - void resize(size_t size) - { - data_size = size; - data_width = size; - } - protected: - /* no copying */ + /* Only create through subclasses. */ + device_memory(Device *device, const char *name, MemoryType type); + + /* No copying allowed. */ device_memory(const device_memory&); device_memory& operator = (const device_memory&); + + /* Host allocation on the device. All data_pointer memory should be + * allocated with these functions, for devices that support using + * the same pointer for host and device. */ + device_ptr host_alloc(size_t size); + void host_free(device_ptr ptr, size_t size); + + /* Device memory allocation and copying. */ + void device_alloc(); + void device_free(); + void device_copy_to(); + void device_copy_from(int y, int w, int h, int elem); + void device_zero(); }; +/* Device Only Memory + * + * Working memory only needed by the device, with no corresponding allocation + * on the host. Only used internally in the device implementations. */ + template class device_only_memory : public device_memory { @@ -226,18 +238,43 @@ public: data_elements = max(device_type_traits::num_elements, 1); } - void resize(size_t num) + virtual ~device_only_memory() + { + free(); + } + + void alloc_to_device(size_t num) + { + data_size = num*sizeof(T); + device_alloc(); + } + + void free() + { + device_free(); + } + + void zero_to_device() { - device_memory::resize(num*sizeof(T)); + device_zero(); } }; -/* Device Vector */ +/* Device Vector + * + * Data vector to exchange data between host and device. Memory will be + * allocated on the host first with alloc() and resize, and then filled + * in and copied to the device with copy_to_device(). Or alternatively + * allocated and set to zero on the device with zero_to_device(). + * + * When using memory type MEM_TEXTURE, a pointer to this memory will be + * automatically attached to kernel globals, using the provided name + * matching an entry in kernel_textures.h. */ template class device_vector : public device_memory { public: - device_vector(Device *device, const char *name, MemoryType type = MEM_READ_ONLY) + device_vector(Device *device, const char *name, MemoryType type) : device_memory(device, name, type) { data_type = device_type_traits::data_type; @@ -246,84 +283,175 @@ public: assert(data_elements > 0); } - virtual ~device_vector() {} + virtual ~device_vector() + { + free(); + } - /* vector functions */ - T *resize(size_t width, size_t height = 0, size_t depth = 0) + /* Host memory allocation. */ + T *alloc(size_t width, size_t height = 0, size_t depth = 0) { - data_size = width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth); - if(data.resize(data_size) == NULL) { - clear(); - return NULL; + size_t new_size = size(width, height, depth); + + if(new_size != data_size) { + device_free(); + host_free(data_pointer, sizeof(T)*data_size); + data_pointer = host_alloc(sizeof(T)*new_size); } + + data_size = new_size; data_width = width; data_height = height; data_depth = depth; - if(data_size == 0) { - data_pointer = 0; - return NULL; + assert(device_ptr == 0); + + return get_data(); + } + + /* Host memory resize. Only use this if the original data needs to be + * preserved, it is faster to call alloc() if it can be discarded. */ + T *resize(size_t width, size_t height = 0, size_t depth = 0) + { + size_t new_size = size(width, height, depth); + + if(new_size != data_size) { + device_ptr new_ptr = host_alloc(sizeof(T)*new_size); + + if(new_size && data_size) { + size_t min_size = ((new_size < data_size)? new_size: data_size); + memcpy((T*)new_ptr, (T*)data_pointer, sizeof(T)*min_size); + } + + device_free(); + host_free(data_pointer, sizeof(T)*data_size); + data_pointer = new_ptr; } - data_pointer = (device_ptr)&data[0]; - return &data[0]; + + data_size = new_size; + data_width = width; + data_height = height; + data_depth = depth; + assert(device_ptr == 0); + + return get_data(); } + /* Take over data from an existing array. */ void steal_data(array& from) { - data.steal_data(from); - data_size = data.size(); - data_pointer = (data_size)? (device_ptr)&data[0]: 0; - data_width = data_size; + device_free(); + host_free(data_pointer, sizeof(T)*data_size); + + data_size = from.size(); + data_width = 0; data_height = 0; data_depth = 0; + data_pointer = (device_ptr)from.steal_pointer(); + assert(device_pointer == 0); } - void clear() + /* Free device and host memory. */ + void free() { - data.clear(); - data_pointer = 0; + device_free(); + host_free(data_pointer, sizeof(T)*data_size); + + data_size = 0; data_width = 0; data_height = 0; data_depth = 0; - data_size = 0; - device_pointer = 0; + data_pointer = 0; + assert(device_pointer == 0); } size_t size() { - return data.size(); + return data_size; } T* get_data() { - return &data[0]; + return (T*)data_pointer; } T& operator[](size_t i) { - return data[i]; + assert(i < data_size); + return get_data()[i]; } -private: - array data; + void copy_to_device() + { + device_copy_to(); + } + + void copy_from_device(int y, int w, int h) + { + device_copy_from(y, w, h, sizeof(T)); + } + + void zero_to_device() + { + device_zero(); + } + +protected: + size_t size(size_t width, size_t height, size_t depth) + { + return width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth); + } }; -/* A device_sub_ptr is a pointer into another existing memory. - * Therefore, it is not allocated separately, but just created from the already allocated base memory. - * It is freed automatically when it goes out of scope, which should happen before the base memory is freed. - * Note that some devices require the offset and size of the sub_ptr to be properly aligned. */ +/* Pixel Memory + * + * Device memory to efficiently draw as pixels to the screen in interactive + * rendering. Only copying pixels from the device is supported, not copying to. */ + +template class device_pixels : public device_vector +{ +public: + device_pixels(Device *device, const char *name) + : device_vector(device, name, MEM_PIXELS) + { + } + + void alloc_to_device(size_t width, size_t height, size_t depth = 0) + { + device_vector::alloc(width, height, depth); + device_memory::device_alloc(); + } + + T *copy_from_device(int y, int w, int h) + { + device_memory::device_copy_from(y, w, h, sizeof(T)); + return device_vector::get_data(); + } +}; + +/* Device Sub Memory + * + * Pointer into existing memory. It is not allocated separately, but created + * from an already allocated base memory. It is freed automatically when it + * goes out of scope, which should happen before base memory is freed. + * + * Note: some devices require offset and size of the sub_ptr to be properly + * aligned to device->mem_address_alingment(). */ + class device_sub_ptr { public: device_sub_ptr(device_memory& mem, int offset, int size); ~device_sub_ptr(); - /* No copying. */ - device_sub_ptr& operator = (const device_sub_ptr&); device_ptr operator*() const { return ptr; } + protected: + /* No copying. */ + device_sub_ptr& operator = (const device_sub_ptr&); + Device *device; device_ptr ptr; }; diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 7f7fbc0d1d3..0a6dd90c86d 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -43,10 +43,10 @@ public: }; list devices; - device_ptr unique_ptr; + device_ptr unique_key; MultiDevice(DeviceInfo& info, Stats &stats, bool background_) - : Device(info, stats, background_), unique_ptr(1) + : Device(info, stats, background_), unique_key(1) { Device *device; @@ -108,68 +108,87 @@ public: void mem_alloc(device_memory& mem) { + device_ptr key = unique_key++; + foreach(SubDevice& sub, devices) { + mem.device = sub.device; mem.device_pointer = 0; + sub.device->mem_alloc(mem); - sub.ptr_map[unique_ptr] = mem.device_pointer; + sub.ptr_map[key] = mem.device_pointer; } - mem.device_pointer = unique_ptr++; - stats.mem_alloc(mem.device_size); + mem.device = this; + mem.device_pointer = key; } void mem_copy_to(device_memory& mem) { - device_ptr tmp = mem.device_pointer; + device_ptr existing_key = mem.device_pointer; + device_ptr key = (existing_key)? existing_key: unique_key++; foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; + mem.device = sub.device; + mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0; + sub.device->mem_copy_to(mem); + sub.ptr_map[key] = mem.device_pointer; } - mem.device_pointer = tmp; + mem.device = this; + mem.device_pointer = key; } void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) { - device_ptr tmp = mem.device_pointer; + device_ptr key = mem.device_pointer; int i = 0, sub_h = h/devices.size(); foreach(SubDevice& sub, devices) { int sy = y + i*sub_h; int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h; - mem.device_pointer = sub.ptr_map[tmp]; + mem.device = sub.device; + mem.device_pointer = sub.ptr_map[key]; + sub.device->mem_copy_from(mem, sy, w, sh, elem); i++; } - mem.device_pointer = tmp; + mem.device = this; + mem.device_pointer = key; } void mem_zero(device_memory& mem) { - device_ptr tmp = mem.device_pointer; + device_ptr existing_key = mem.device_pointer; + device_ptr key = (existing_key)? existing_key: unique_key++; foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; + mem.device = sub.device; + mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0; + sub.device->mem_zero(mem); + sub.ptr_map[key] = mem.device_pointer; } - mem.device_pointer = tmp; + mem.device = this; + mem.device_pointer = key; } void mem_free(device_memory& mem) { - device_ptr tmp = mem.device_pointer; - stats.mem_free(mem.device_size); + device_ptr key = mem.device_pointer; foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; + mem.device = sub.device; + mem.device_pointer = sub.ptr_map[key]; + sub.device->mem_free(mem); - sub.ptr_map.erase(sub.ptr_map.find(tmp)); + sub.ptr_map.erase(sub.ptr_map.find(key)); } + mem.device = this; mem.device_pointer = 0; } @@ -179,81 +198,10 @@ public: sub.device->const_copy_to(name, host, size); } - void tex_alloc(device_memory& mem) - { - VLOG(1) << "Texture allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; - - foreach(SubDevice& sub, devices) { - mem.device_pointer = 0; - sub.device->tex_alloc(mem); - sub.ptr_map[unique_ptr] = mem.device_pointer; - } - - mem.device_pointer = unique_ptr++; - stats.mem_alloc(mem.device_size); - } - - void tex_free(device_memory& mem) - { - device_ptr tmp = mem.device_pointer; - stats.mem_free(mem.device_size); - - foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; - sub.device->tex_free(mem); - sub.ptr_map.erase(sub.ptr_map.find(tmp)); - } - - mem.device_pointer = 0; - } - - void pixels_alloc(device_memory& mem) - { - foreach(SubDevice& sub, devices) { - mem.device_pointer = 0; - sub.device->pixels_alloc(mem); - sub.ptr_map[unique_ptr] = mem.device_pointer; - } - - mem.device_pointer = unique_ptr++; - } - - void pixels_free(device_memory& mem) - { - device_ptr tmp = mem.device_pointer; - - foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; - sub.device->pixels_free(mem); - sub.ptr_map.erase(sub.ptr_map.find(tmp)); - } - - mem.device_pointer = 0; - } - - void pixels_copy_from(device_memory& mem, int y, int w, int h) - { - device_ptr tmp = mem.device_pointer; - int i = 0, sub_h = h/devices.size(); - - foreach(SubDevice& sub, devices) { - int sy = y + i*sub_h; - int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h; - - mem.device_pointer = sub.ptr_map[tmp]; - sub.device->pixels_copy_from(mem, sy, w, sh); - i++; - } - - mem.device_pointer = tmp; - } - void draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int dy, int width, int height, bool transparent, const DeviceDrawParams &draw_params) { - device_ptr tmp = rgba.device_pointer; + device_ptr key = rgba.device_pointer; int i = 0, sub_h = h/devices.size(); int sub_height = height/devices.size(); @@ -264,12 +212,12 @@ public: int sdy = dy + i*sub_height; /* adjust math for w/width */ - rgba.device_pointer = sub.ptr_map[tmp]; + rgba.device_pointer = sub.ptr_map[key]; sub.device->draw_pixels(rgba, sy, w, sh, dx, sdy, width, sheight, transparent, draw_params); i++; } - rgba.device_pointer = tmp; + rgba.device_pointer = key; } void map_tile(Device *sub_device, RenderTile& tile) @@ -304,15 +252,21 @@ public: * to the current device now, for the duration of the denoising task. * Note that this temporarily modifies the RenderBuffers and calls * the device, so this function is not thread safe. */ - if(tiles[i].buffers->device != sub_device) { - device_vector &mem = tiles[i].buffers->buffer; - + device_vector &mem = tiles[i].buffers->buffer; + if(mem.device != sub_device) { tiles[i].buffers->copy_from_device(); + + Device *original_device = mem.device; device_ptr original_ptr = mem.device_pointer; + + mem.device = sub_device; mem.device_pointer = 0; + sub_device->mem_alloc(mem); sub_device->mem_copy_to(mem); tiles[i].buffer = mem.device_pointer; + + mem.device = original_device; mem.device_pointer = original_ptr; } } @@ -324,25 +278,30 @@ public: if(!tiles[i].buffers) { continue; } - if(tiles[i].buffers->device != sub_device) { - device_vector &mem = tiles[i].buffers->buffer; + device_vector &mem = tiles[i].buffers->buffer; + if(mem.device != sub_device) { + Device *original_device = mem.device; device_ptr original_ptr = mem.device_pointer; + size_t original_size = mem.device_size; + + mem.device = sub_device; mem.device_pointer = tiles[i].buffer; /* Copy denoised tile to the host. */ if(i == 4) { - tiles[i].buffers->copy_from_device(sub_device); + tiles[i].buffers->copy_from_device(); } - size_t mem_size = mem.device_size; sub_device->mem_free(mem); + + mem.device = original_device; mem.device_pointer = original_ptr; - mem.device_size = mem_size; + mem.device_size = original_size; /* Copy denoised tile to the original device. */ if(i == 4) { - tiles[i].buffers->device->mem_copy_to(mem); + mem.copy_to_device(); } } } diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index bdc88b6acae..fa231c817e6 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -172,36 +172,6 @@ public: snd.write_buffer(host, size); } - void tex_alloc(device_memory& mem) - { - VLOG(1) << "Texture allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; - - thread_scoped_lock lock(rpc_lock); - - mem.device_pointer = ++mem_counter; - - RPCSend snd(socket, &error_func, "tex_alloc"); - snd.add(mem); - snd.write(); - snd.write_buffer((void*)mem.data_pointer, mem.memory_size()); - } - - void tex_free(device_memory& mem) - { - if(mem.device_pointer) { - thread_scoped_lock lock(rpc_lock); - - RPCSend snd(socket, &error_func, "tex_free"); - - snd.add(mem); - snd.write(); - - mem.device_pointer = 0; - } - } - bool load_kernels(const DeviceRequestedFeatures& requested_features) { if(error_func.have_error()) @@ -310,7 +280,7 @@ public: snd.write(); } - int get_split_task_count(DeviceTask& task) + int get_split_task_count(DeviceTask&) { return 1; } @@ -464,21 +434,17 @@ protected: rcv.read(mem, name); lock.unlock(); + /* Allocate host side data buffer. */ + size_t data_size = mem.memory_size(); device_ptr client_pointer = mem.device_pointer; - /* create a memory buffer for the device buffer */ - size_t data_size = mem.memory_size(); DataVector &data_v = data_vector_insert(client_pointer, data_size); + mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0; - if(data_size) - mem.data_pointer = (device_ptr)&(data_v[0]); - else - mem.data_pointer = 0; - - /* perform the allocation on the actual device */ + /* Perform the allocation on the actual device. */ device->mem_alloc(mem); - /* store a mapping to/from client_pointer and real device pointer */ + /* Store a mapping to/from client_pointer and real device pointer. */ pointer_mapping_insert(client_pointer, mem.device_pointer); } else if(rcv.name == "mem_copy_to") { @@ -487,23 +453,33 @@ protected: rcv.read(mem, name); lock.unlock(); + size_t data_size = mem.memory_size(); device_ptr client_pointer = mem.device_pointer; - DataVector &data_v = data_vector_find(client_pointer); - - size_t data_size = mem.memory_size(); + if(client_pointer) { + /* Lookup existing host side data buffer. */ + DataVector &data_v = data_vector_find(client_pointer); + mem.data_pointer = (device_ptr)&data_v[0]; - /* get pointer to memory buffer for device buffer */ - mem.data_pointer = (device_ptr)&data_v[0]; + /* Translate the client pointer to a real device pointer. */ + mem.device_pointer = device_ptr_from_client_pointer(client_pointer); + } + else { + /* Allocate host side data buffer. */ + DataVector &data_v = data_vector_insert(client_pointer, data_size); + mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0; + } - /* copy data from network into memory buffer */ + /* Copy data from network into memory buffer. */ rcv.read_buffer((uint8_t*)mem.data_pointer, data_size); - /* translate the client pointer to a real device pointer */ - mem.device_pointer = device_ptr_from_client_pointer(client_pointer); - - /* copy the data from the memory buffer to the device buffer */ + /* Copy the data from the memory buffer to the device buffer. */ device->mem_copy_to(mem); + + if(!client_pointer) { + /* Store a mapping to/from client_pointer and real device pointer. */ + pointer_mapping_insert(client_pointer, mem.device_pointer); + } } else if(rcv.name == "mem_copy_from") { string name; @@ -538,14 +514,30 @@ protected: rcv.read(mem, name); lock.unlock(); + size_t data_size = mem.memory_size(); device_ptr client_pointer = mem.device_pointer; - mem.device_pointer = device_ptr_from_client_pointer(client_pointer); - DataVector &data_v = data_vector_find(client_pointer); + if(client_pointer) { + /* Lookup existing host side data buffer. */ + DataVector &data_v = data_vector_find(client_pointer); + mem.data_pointer = (device_ptr)&data_v[0]; - mem.data_pointer = (device_ptr)&(data_v[0]); + /* Translate the client pointer to a real device pointer. */ + mem.device_pointer = device_ptr_from_client_pointer(client_pointer); + } + else { + /* Allocate host side data buffer. */ + DataVector &data_v = data_vector_insert(client_pointer, data_size); + mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0; + } + /* Zero memory. */ device->mem_zero(mem); + + if(!client_pointer) { + /* Store a mapping to/from client_pointer and real device pointer. */ + pointer_mapping_insert(client_pointer, mem.device_pointer); + } } else if(rcv.name == "mem_free") { string name; @@ -573,45 +565,6 @@ protected: device->const_copy_to(name_string.c_str(), &host_vector[0], size); } - else if(rcv.name == "tex_alloc") { - string name; - network_device_memory mem(device); - device_ptr client_pointer; - - rcv.read(mem, name); - lock.unlock(); - - client_pointer = mem.device_pointer; - - size_t data_size = mem.memory_size(); - - DataVector &data_v = data_vector_insert(client_pointer, data_size); - - if(data_size) - mem.data_pointer = (device_ptr)&(data_v[0]); - else - mem.data_pointer = 0; - - rcv.read_buffer((uint8_t*)mem.data_pointer, data_size); - - device->tex_alloc(mem); - - pointer_mapping_insert(client_pointer, mem.device_pointer); - } - else if(rcv.name == "tex_free") { - string name; - network_device_memory mem(device); - device_ptr client_pointer; - - rcv.read(mem, name); - lock.unlock(); - - client_pointer = mem.device_pointer; - - mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer); - - device->tex_free(mem); - } else if(rcv.name == "load_kernels") { DeviceRequestedFeatures requested_features; rcv.read(requested_features.experimental); @@ -696,7 +649,7 @@ protected: } } - bool task_acquire_tile(Device *device, RenderTile& tile) + bool task_acquire_tile(Device *, RenderTile& tile) { thread_scoped_lock acquire_lock(acquire_mutex); diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h index 8a53290f421..a38d962c0af 100644 --- a/intern/cycles/device/device_network.h +++ b/intern/cycles/device/device_network.h @@ -279,6 +279,11 @@ public: mem.name = name.c_str(); mem.data_pointer = 0; + + /* Can't transfer OpenGL texture over network. */ + if(mem.type == MEM_PIXELS) { + mem.type = MEM_WRITE_ONLY; + } } template void read(T& data) diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index 6c8befa89be..f2839a8b1b9 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -61,11 +61,11 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) DeviceSplitKernel::~DeviceSplitKernel() { - device->mem_free(split_data); - device->mem_free(ray_state); - device->mem_free(use_queues_flag); - device->mem_free(queue_index); - device->mem_free(work_pool_wgs); + split_data.free(); + ray_state.free(); + use_queues_flag.free(); + queue_index.free(); + work_pool_wgs.free(); delete kernel_path_init; delete kernel_scene_intersect; @@ -175,20 +175,11 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, unsigned int max_work_groups = num_global_elements / work_pool_size + 1; /* Allocate work_pool_wgs memory. */ - work_pool_wgs.resize(max_work_groups); - device->mem_alloc(work_pool_wgs); - - queue_index.resize(NUM_QUEUES); - device->mem_alloc(queue_index); - - use_queues_flag.resize(1); - device->mem_alloc(use_queues_flag); - - ray_state.resize(num_global_elements); - device->mem_alloc(ray_state); - - split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); - device->mem_alloc(split_data); + work_pool_wgs.alloc_to_device(max_work_groups); + queue_index.alloc_to_device(NUM_QUEUES); + use_queues_flag.alloc_to_device(1); + split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); + ray_state.alloc(num_global_elements); } #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \ @@ -225,9 +216,9 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, /* reset state memory here as global size for data_init * kernel might not be large enough to do in kernel */ - device->mem_zero(work_pool_wgs); - device->mem_zero(split_data); - device->mem_zero(ray_state); + work_pool_wgs.zero_to_device(); + split_data.zero_to_device(); + ray_state.zero_to_device(); if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), subtile, @@ -284,7 +275,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, } /* Decide if we should exit path-iteration in host. */ - device->mem_copy_from(ray_state, 0, global_size[0] * global_size[1] * sizeof(char), 1, 1); + ray_state.copy_from_device(0, global_size[0] * global_size[1], 1); activeRaysAvailable = false; diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp index e48367b8987..a791b374774 100644 --- a/intern/cycles/device/opencl/memory_manager.cpp +++ b/intern/cycles/device/opencl/memory_manager.cpp @@ -76,8 +76,7 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) device_only_memory *new_buffer = new device_only_memory(device, "memory manager buffer"); - new_buffer->resize(total_size); - device->mem_alloc(*new_buffer); + new_buffer->alloc_to_device(total_size); size_t offset = 0; @@ -111,7 +110,6 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) offset += allocation->size; } - device->mem_free(*buffer); delete buffer; buffer = new_buffer; @@ -144,9 +142,9 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) clFinish(device->cqCommandQueue); } -void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *device) +void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *) { - device->mem_free(*buffer); + buffer->free(); } MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer() diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 90f461b4c98..5e9debc3b17 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -74,7 +74,7 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where) OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_), memory_manager(this), - texture_info_buffer(this, "__texture_info", MEM_READ_ONLY) + texture_info(this, "__texture_info", MEM_TEXTURE) { cpPlatform = NULL; cdDevice = NULL; @@ -157,7 +157,6 @@ OpenCLDeviceBase::~OpenCLDeviceBase() ConstMemMap::iterator mt; for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) { - mem_free(*(mt->second)); delete mt->second; } @@ -318,9 +317,9 @@ void OpenCLDeviceBase::mem_alloc(device_memory& mem) cl_mem_flags mem_flag; void *mem_ptr = NULL; - if(mem.type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) mem_flag = CL_MEM_READ_ONLY; - else if(mem.type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY || mem.type == MEM_PIXELS) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -348,17 +347,27 @@ void OpenCLDeviceBase::mem_alloc(device_memory& mem) void OpenCLDeviceBase::mem_copy_to(device_memory& mem) { - /* this is blocking */ - size_t size = mem.memory_size(); - if(size != 0) { - opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, - CL_MEM_PTR(mem.device_pointer), - CL_TRUE, - 0, - size, - (void*)mem.data_pointer, - 0, - NULL, NULL)); + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + tex_alloc(mem); + } + else { + if(!mem.device_pointer) { + mem_alloc(mem); + } + + /* this is blocking */ + size_t size = mem.memory_size(); + if(size != 0) { + opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, + CL_MEM_PTR(mem.device_pointer), + CL_TRUE, + 0, + size, + (void*)mem.data_pointer, + 0, + NULL, NULL)); + } } } @@ -410,6 +419,10 @@ void OpenCLDeviceBase::mem_zero_kernel(device_ptr mem, size_t size) void OpenCLDeviceBase::mem_zero(device_memory& mem) { + if(!mem.device_pointer) { + mem_alloc(mem); + } + if(mem.device_pointer) { if(base_program.is_loaded()) { mem_zero_kernel(mem.device_pointer, mem.memory_size()); @@ -445,14 +458,19 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem) void OpenCLDeviceBase::mem_free(device_memory& mem) { - if(mem.device_pointer) { - if(mem.device_pointer != null_mem) { - opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer))); - } - mem.device_pointer = 0; + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + } + else { + if(mem.device_pointer) { + if(mem.device_pointer != null_mem) { + opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer))); + } + mem.device_pointer = 0; - stats.mem_free(mem.device_size); - mem.device_size = 0; + stats.mem_free(mem.device_size); + mem.device_size = 0; + } } } @@ -464,9 +482,9 @@ int OpenCLDeviceBase::mem_address_alignment() device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size) { cl_mem_flags mem_flag; - if(mem.type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) mem_flag = CL_MEM_READ_ONLY; - else if(mem.type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY || mem.type == MEM_PIXELS) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -498,9 +516,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) if(i == const_mem_map.end()) { data = new device_vector(this, name, MEM_READ_ONLY); - data->resize(size); - - mem_alloc(*data); + data->alloc(size); const_mem_map.insert(ConstMemMap::value_type(name, data)); } else { @@ -508,7 +524,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) } memcpy(data->get_data(), host, size); - mem_copy_to(*data); + data->copy_to_device(); } void OpenCLDeviceBase::tex_alloc(device_memory& mem) @@ -1037,8 +1053,7 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr, bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc(task->tiles_mem); - mem_copy_to(task->tiles_mem); + task->tiles_mem.copy_to_device(); cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index c966ebe0c5e..96139afa450 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -128,8 +128,7 @@ public: /* Allocate buffer for kernel globals */ device_only_memory kgbuffer(this, "kernel_globals"); - kgbuffer.resize(1); - mem_alloc(kgbuffer); + kgbuffer.alloc_to_device(1); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { @@ -160,7 +159,7 @@ public: task->release_tile(tile); } - mem_free(kgbuffer); + kgbuffer.free(); } } @@ -289,8 +288,8 @@ public: virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) { device_vector size_buffer(device, "size_buffer", MEM_READ_WRITE); - size_buffer.resize(1); - device->mem_alloc(size_buffer); + size_buffer.alloc(1); + size_buffer.zero_to_device(); uint threads = num_threads; device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer); @@ -308,9 +307,9 @@ public: device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); - device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); + size_buffer.copy_from_device(0, 1, 1); size_t size = size_buffer[0]; - device->mem_free(size_buffer); + size_buffer.free(); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", -- cgit v1.2.3