diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-10-21 00:31:13 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-10-24 02:03:59 +0300 |
commit | 7ad9333fad25b9a7cabea0d659eaf724f89912c8 (patch) | |
tree | 4fa0d78c9659dd842852420bd1d8b8f8925a0c2f /intern/cycles/device | |
parent | ae41f38f78f8c54f92cf34dd88e35948e19aed55 (diff) |
Code refactor: store device/interp/extension/type in each device_memory.
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/device/device.cpp | 14 | ||||
-rw-r--r-- | intern/cycles/device/device.h | 14 | ||||
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 48 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 58 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.cpp | 68 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.h | 18 | ||||
-rw-r--r-- | intern/cycles/device/device_memory.cpp | 60 | ||||
-rw-r--r-- | intern/cycles/device/device_memory.h | 29 | ||||
-rw-r--r-- | intern/cycles/device/device_multi.cpp | 16 | ||||
-rw-r--r-- | intern/cycles/device/device_network.cpp | 76 | ||||
-rw-r--r-- | intern/cycles/device/device_network.h | 21 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.cpp | 18 | ||||
-rw-r--r-- | intern/cycles/device/opencl/memory_manager.cpp | 14 | ||||
-rw-r--r-- | intern/cycles/device/opencl/memory_manager.h | 6 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl.h | 25 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 58 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 8 |
18 files changed, 294 insertions, 258 deletions
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index 3c632160fbd..959c0aa97c9 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -26,6 +26,7 @@ set(SRC device_cpu.cpp device_cuda.cpp device_denoising.cpp + device_memory.cpp device_multi.cpp device_opencl.cpp device_split_kernel.cpp diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 16c027e2cb5..9de10c184fb 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -87,7 +87,7 @@ Device::~Device() void Device::pixels_alloc(device_memory& mem) { - mem_alloc("pixels", mem, MEM_READ_WRITE); + mem_alloc(mem); } void Device::pixels_copy_from(device_memory& mem, int y, int w, int h) @@ -429,16 +429,4 @@ void Device::free_memory() devices.free_memory(); } - -device_sub_ptr::device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type) - : device(device) -{ - ptr = device->mem_alloc_sub_ptr(mem, offset, size, type); -} - -device_sub_ptr::~device_sub_ptr() -{ - device->mem_free_sub_ptr(ptr); -} - CCL_NAMESPACE_END diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 4bf88f75932..6bb65cde2a3 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -253,7 +253,7 @@ protected: /* used for real time display */ unsigned int vertex_buffer; - virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/, MemoryType /*type*/) + virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/) { /* Only required for devices that implement denoising. */ assert(false); @@ -282,7 +282,7 @@ public: Stats &stats; /* regular memory */ - virtual void mem_alloc(const char *name, device_memory& mem, MemoryType type) = 0; + virtual void mem_alloc(device_memory& mem) = 0; virtual void mem_copy_to(device_memory& mem) = 0; virtual void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) = 0; @@ -295,15 +295,7 @@ public: virtual void const_copy_to(const char *name, void *host, size_t size) = 0; /* texture memory */ - virtual void tex_alloc(const char * /*name*/, - device_memory& /*mem*/, - InterpolationType interpolation = INTERPOLATION_NONE, - ExtensionType extension = EXTENSION_REPEAT) - { - (void)interpolation; /* Ignored. */ - (void)extension; /* Ignored. */ - }; - + virtual void tex_alloc(device_memory& /*mem*/) {}; virtual void tex_free(device_memory& /*mem*/) {}; /* pixel memory */ diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index b05f24659ee..60c06462d4d 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -209,6 +209,7 @@ public: CPUDevice(DeviceInfo& info_, Stats &stats_, bool background_) : Device(info_, stats_, background_), + texture_info(this, "__texture_info"), #define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name)) REGISTER_KERNEL(path_trace), REGISTER_KERNEL(convert_to_half_float), @@ -280,15 +281,15 @@ public: { if(need_texture_info) { tex_free(texture_info); - tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT); + tex_alloc(texture_info); need_texture_info = false; } } - void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/) + void mem_alloc(device_memory& mem) { - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -332,7 +333,7 @@ public: } } - virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/) + virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/) { return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset)); } @@ -342,32 +343,25 @@ public: kernel_const_copy(&kernel_globals, name, host, size); } - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) + void tex_alloc(device_memory& mem) { - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - if(interpolation == INTERPOLATION_NONE) { + if(mem.interpolation == INTERPOLATION_NONE) { /* Data texture. */ kernel_tex_copy(&kernel_globals, - name, + mem.name, mem.data_pointer, - mem.data_width, - mem.data_height, - mem.data_depth, - interpolation, - extension); + mem.data_width); } else { /* Image Texture. */ int flat_slot = 0; - if(string_startswith(name, "__tex_image")) { - int pos = string(name).rfind("_"); - flat_slot = atoi(name + pos + 1); + if(string_startswith(mem.name, "__tex_image")) { + int pos = string(mem.name).rfind("_"); + flat_slot = atoi(mem.name + pos + 1); } else { assert(0); @@ -382,8 +376,8 @@ public: TextureInfo& info = texture_info[flat_slot]; info.data = (uint64_t)mem.data_pointer; info.cl_buffer = 0; - info.interpolation = interpolation; - info.extension = extension; + info.interpolation = mem.interpolation; + info.extension = mem.extension; info.width = mem.data_width; info.height = mem.data_height; info.depth = mem.data_depth; @@ -437,7 +431,7 @@ public: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY); + mem_alloc(task->tiles_mem); TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { @@ -728,9 +722,9 @@ public: } /* allocate buffer for kernel globals */ - device_only_memory<KernelGlobals> kgbuffer; + device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals"); kgbuffer.resize(1); - mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE); + mem_alloc(kgbuffer); KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init()); @@ -751,8 +745,8 @@ public: while(task.acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel) { - device_memory data; - split_kernel->path_trace(&task, tile, kgbuffer, data); + device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY); + split_kernel->path_trace(&task, tile, kgbuffer, void_buffer); } else { path_trace(task, tile, kg); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 0f17b67c8c6..1295ec86355 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -217,7 +217,8 @@ public: } CUDADevice(DeviceInfo& info, Stats &stats, bool background_) - : Device(info, stats, background_) + : Device(info, stats, background_), + texture_info(this, "__texture_info") { first_error = true; background = background_; @@ -548,17 +549,17 @@ public: { if(info.has_bindless_textures && need_texture_info) { tex_free(texture_info); - tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT); + tex_alloc(texture_info); need_texture_info = false; } } - void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/) + void mem_alloc(device_memory& mem) { CUDAContextScope scope(this); - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -619,7 +620,7 @@ public: } } - virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/) + virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/) { return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset)); } @@ -635,14 +636,11 @@ public: cuda_assert(cuMemcpyHtoD(mem, host, size)); } - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) + void tex_alloc(device_memory& mem) { CUDAContextScope scope(this); - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; @@ -650,12 +648,12 @@ public: bool has_bindless_textures = info.has_bindless_textures; /* General variables for both architectures */ - string bind_name = name; + string bind_name = mem.name; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP; - switch(extension) { + switch(mem.extension) { case EXTENSION_REPEAT: address_mode = CU_TR_ADDRESS_MODE_WRAP; break; @@ -671,7 +669,7 @@ public: } CUfilter_mode filter_mode; - if(interpolation == INTERPOLATION_CLOSEST) { + if(mem.interpolation == INTERPOLATION_CLOSEST) { filter_mode = CU_TR_FILTER_MODE_POINT; } else { @@ -681,13 +679,13 @@ public: /* General variables for Fermi */ CUtexref texref = NULL; - if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) { + if(!has_bindless_textures && mem.interpolation != INTERPOLATION_NONE) { if(mem.data_depth > 1) { /* Kernel uses different bind names for 2d and 3d float textures, * so we have to adjust couple of things here. */ vector<string> tokens; - string_split(tokens, name, "_"); + string_split(tokens, mem.name, "_"); bind_name = string_printf("__tex_image_%s_3d_%s", tokens[2].c_str(), tokens[3].c_str()); @@ -700,9 +698,9 @@ public: } } - if(interpolation == INTERPOLATION_NONE) { + if(mem.interpolation == INTERPOLATION_NONE) { /* Data Storage */ - mem_alloc(NULL, mem, MEM_READ_ONLY); + mem_alloc(mem); mem_copy_to(mem); CUdeviceptr cumem; @@ -802,9 +800,9 @@ public: if(has_bindless_textures) { /* Bindless Textures - Kepler */ int flat_slot = 0; - if(string_startswith(name, "__tex_image")) { - int pos = string(name).rfind("_"); - flat_slot = atoi(name + pos + 1); + if(string_startswith(mem.name, "__tex_image")) { + int pos = string(mem.name).rfind("_"); + flat_slot = atoi(mem.name + pos + 1); } else { assert(0); @@ -843,8 +841,8 @@ public: TextureInfo& info = texture_info[flat_slot]; info.data = (uint64_t)tex; info.cl_buffer = 0; - info.interpolation = interpolation; - info.extension = extension; + info.interpolation = mem.interpolation; + info.extension = mem.extension; info.width = mem.data_width; info.height = mem.data_height; info.depth = mem.data_depth; @@ -869,7 +867,7 @@ public: } /* Fermi and Kepler */ - tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE); + tex_interp_map[mem.device_pointer] = (mem.interpolation != INTERPOLATION_NONE); } void tex_free(device_memory& mem) @@ -900,7 +898,7 @@ public: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY); + mem_alloc(task->tiles_mem); TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { @@ -1297,7 +1295,7 @@ public: cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); /* Allocate work tile. */ - device_vector<WorkTile> work_tiles; + device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY); work_tiles.resize(1); WorkTile *wtile = work_tiles.get_data(); @@ -1308,7 +1306,7 @@ public: wtile->offset = rtile.offset; wtile->stride = rtile.stride; wtile->buffer = (float*)cuda_device_ptr(rtile.buffer); - mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY); + mem_alloc(work_tiles); CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); @@ -1730,7 +1728,7 @@ public: while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel()) { - device_memory void_buffer; + device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY); split_kernel->path_trace(task, tile, void_buffer, void_buffer); } else { @@ -1885,9 +1883,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory { CUDAContextScope scope(device); - device_vector<uint64_t> size_buffer; + device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE); size_buffer.resize(1); - device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); + device->mem_alloc(size_buffer); uint threads = num_threads; CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 619cc1d171e..2c3bfefd8b0 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -76,21 +76,21 @@ bool DenoisingTask::run_denoising() buffer.h = rect.w - rect.y; buffer.pass_stride = align_up(buffer.w * buffer.h, divide_up(device->mem_address_alignment(), sizeof(float))); buffer.mem.resize(buffer.pass_stride * buffer.passes); - device->mem_alloc("Denoising Pixel Buffer", buffer.mem, MEM_READ_WRITE); + device->mem_alloc(buffer.mem); device_ptr null_ptr = (device_ptr) 0; /* Prefilter shadow feature. */ { - device_sub_ptr unfiltered_a (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr unfiltered_b (device, buffer.mem, 1*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr sample_var (device, buffer.mem, 2*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr sample_var_var (device, buffer.mem, 3*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr buffer_var (device, buffer.mem, 5*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr filtered_var (device, buffer.mem, 6*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_1(device, buffer.mem, 7*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_2(device, buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_3(device, buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride); + device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_1(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_2(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_3(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride); nlm_state.temporary_1_ptr = *nlm_temporary_1; nlm_state.temporary_2_ptr = *nlm_temporary_2; @@ -123,17 +123,17 @@ bool DenoisingTask::run_denoising() functions.non_local_means(filtered_b, filtered_a, residual_var, final_b); /* Combine the two double-filtered halves to a final shadow feature. */ - device_sub_ptr shadow_pass(device, buffer.mem, 4*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride); functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect); } /* Prefilter general features. */ { - device_sub_ptr unfiltered (device, buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr variance (device, buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_1(device, buffer.mem, 10*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_2(device, buffer.mem, 11*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_3(device, buffer.mem, 12*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_1(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_2(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_3(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride); nlm_state.temporary_1_ptr = *nlm_temporary_1; nlm_state.temporary_2_ptr = *nlm_temporary_2; @@ -143,7 +143,7 @@ bool DenoisingTask::run_denoising() int variance_from[] = { 3, 4, 5, 13, 9, 10, 11}; int pass_to[] = { 1, 2, 3, 0, 5, 6, 7}; for(int pass = 0; pass < 7; pass++) { - device_sub_ptr feature_pass(device, buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride); /* Get the unfiltered pass and its variance from the RenderBuffers. */ functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance); /* Smooth the pass and store the result in the denoising buffers. */ @@ -160,20 +160,20 @@ bool DenoisingTask::run_denoising() int variance_to[] = {11, 12, 13}; int num_color_passes = 3; - device_only_memory<float> temp_color; + device_only_memory<float> temp_color(device, "Denoising temporary color"); temp_color.resize(3*buffer.pass_stride); - device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE); + device->mem_alloc(temp_color); for(int pass = 0; pass < num_color_passes; pass++) { - device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr color_pass(temp_color, pass*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride); functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass); } { - device_sub_ptr depth_pass (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr output_pass (device, buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride); + device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride); + device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride); functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); } @@ -184,24 +184,24 @@ bool DenoisingTask::run_denoising() storage.h = filter_area.w; storage.transform.resize(storage.w*storage.h*TRANSFORM_SIZE); storage.rank.resize(storage.w*storage.h); - device->mem_alloc("Denoising Transform", storage.transform, MEM_READ_WRITE); - device->mem_alloc("Denoising Rank", storage.rank, MEM_READ_WRITE); + device->mem_alloc(storage.transform); + device->mem_alloc(storage.rank); functions.construct_transform(); - device_only_memory<float> temporary_1; - device_only_memory<float> temporary_2; + device_only_memory<float> temporary_1(device, "Denoising NLM temporary 1"); + device_only_memory<float> temporary_2(device, "Denoising NLM temporary 2"); temporary_1.resize(buffer.w*buffer.h); temporary_2.resize(buffer.w*buffer.h); - device->mem_alloc("Denoising NLM temporary 1", temporary_1, MEM_READ_WRITE); - device->mem_alloc("Denoising NLM temporary 2", temporary_2, MEM_READ_WRITE); + device->mem_alloc(temporary_1); + device->mem_alloc(temporary_2); reconstruction_state.temporary_1_ptr = temporary_1.device_pointer; reconstruction_state.temporary_2_ptr = temporary_2.device_pointer; storage.XtWX.resize(storage.w*storage.h*XTWX_SIZE); storage.XtWY.resize(storage.w*storage.h*XTWY_SIZE); - device->mem_alloc("Denoising XtWX", storage.XtWX, MEM_READ_WRITE); - device->mem_alloc("Denoising XtWY", storage.XtWY, MEM_READ_WRITE); + device->mem_alloc(storage.XtWX); + device->mem_alloc(storage.XtWY); reconstruction_state.filter_rect = make_int4(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h); int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x; @@ -213,8 +213,8 @@ bool DenoisingTask::run_denoising() reconstruction_state.source_h = rect.w-rect.y; { - device_sub_ptr color_ptr (device, buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr color_var_ptr(device, buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride); + device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride); functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr); } diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index def7b72f67d..606f7422ac8 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -123,9 +123,21 @@ public: device_only_memory<float3> XtWY; int w; int h; + + Storage(Device *device) + : transform(device, "denoising transform"), + rank(device, "denoising rank"), + XtWX(device, "denoising XtWX"), + XtWY(device, "denoising XtWY") + {} } storage; - DenoisingTask(Device *device) : device(device) {} + DenoisingTask(Device *device) + : tiles_mem(device, "denoising tiles_mem", MEM_READ_WRITE), + storage(device), + buffer(device), + device(device) + {} void init_from_devicetask(const DeviceTask &task); @@ -137,6 +149,10 @@ public: int w; int h; device_only_memory<float> mem; + + DenoiseBuffers(Device *device) + : mem(device, "denoising pixel buffer") + {} } buffer; protected: diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp new file mode 100644 index 00000000000..98fa638ef8e --- /dev/null +++ b/intern/cycles/device/device_memory.cpp @@ -0,0 +1,60 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/device.h" +#include "device/device_memory.h" + +CCL_NAMESPACE_BEGIN + +/* Device Memory */ + +device_memory::device_memory(Device *device, const char *name, MemoryType type) +: data_type(device_type_traits<uchar>::data_type), + data_elements(device_type_traits<uchar>::num_elements), + data_pointer(0), + data_size(0), + device_size(0), + data_width(0), + data_height(0), + data_depth(0), + type(type), + name(name), + interpolation(INTERPOLATION_NONE), + extension(EXTENSION_REPEAT), + device(device), + device_pointer(0) +{ +} + +device_memory::~device_memory() +{ +} + +/* Device Sub Ptr */ + +device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size) +: device(mem.device) +{ + ptr = device->mem_alloc_sub_ptr(mem, offset, size); +} + +device_sub_ptr::~device_sub_ptr() +{ + device->mem_free_sub_ptr(ptr); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index eeeca61496e..3dfecde59d8 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -30,6 +30,7 @@ #include "util/util_debug.h" #include "util/util_half.h" +#include "util/util_texture.h" #include "util/util_types.h" #include "util/util_vector.h" @@ -190,23 +191,17 @@ public: size_t data_width; size_t data_height; size_t data_depth; + MemoryType type; + const char *name; + InterpolationType interpolation; + ExtensionType extension; /* device pointer */ + Device *device; device_ptr device_pointer; - device_memory() - { - data_type = device_type_traits<uchar>::data_type; - data_elements = device_type_traits<uchar>::num_elements; - data_pointer = 0; - data_size = 0; - device_size = 0; - data_width = 0; - data_height = 0; - data_depth = 0; - device_pointer = 0; - } - virtual ~device_memory() { assert(!device_pointer); } + device_memory(Device *device, const char *name, MemoryType type); + virtual ~device_memory(); void resize(size_t size) { @@ -224,7 +219,8 @@ template<typename T> class device_only_memory : public device_memory { public: - device_only_memory() + device_only_memory(Device *device, const char *name) + : device_memory(device, name, MEM_READ_WRITE) { data_type = device_type_traits<T>::data_type; data_elements = max(device_type_traits<T>::num_elements, 1); @@ -241,7 +237,8 @@ public: template<typename T> class device_vector : public device_memory { public: - device_vector() + device_vector(Device *device, const char *name, MemoryType type = MEM_READ_ONLY) + : device_memory(device, name, type) { data_type = device_type_traits<T>::data_type; data_elements = device_type_traits<T>::num_elements; @@ -317,7 +314,7 @@ private: class device_sub_ptr { public: - device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type); + device_sub_ptr(device_memory& mem, int offset, int size); ~device_sub_ptr(); /* No copying. */ device_sub_ptr& operator = (const device_sub_ptr&); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index b17b972b06f..7f7fbc0d1d3 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -106,11 +106,11 @@ public: return true; } - void mem_alloc(const char *name, device_memory& mem, MemoryType type) + void mem_alloc(device_memory& mem) { foreach(SubDevice& sub, devices) { mem.device_pointer = 0; - sub.device->mem_alloc(name, mem, type); + sub.device->mem_alloc(mem); sub.ptr_map[unique_ptr] = mem.device_pointer; } @@ -179,19 +179,15 @@ public: sub.device->const_copy_to(name, host, size); } - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType - interpolation, - ExtensionType extension) + void tex_alloc(device_memory& mem) { - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; foreach(SubDevice& sub, devices) { mem.device_pointer = 0; - sub.device->tex_alloc(name, mem, interpolation, extension); + sub.device->tex_alloc(mem); sub.ptr_map[unique_ptr] = mem.device_pointer; } @@ -314,7 +310,7 @@ public: tiles[i].buffers->copy_from_device(); device_ptr original_ptr = mem.device_pointer; mem.device_pointer = 0; - sub_device->mem_alloc("Temporary memory for neighboring tile", mem, MEM_READ_WRITE); + sub_device->mem_alloc(mem); sub_device->mem_copy_to(mem); tiles[i].buffer = mem.device_pointer; mem.device_pointer = original_ptr; diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index 3fea89a243c..bdc88b6acae 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -87,10 +87,10 @@ public: snd.write(); } - void mem_alloc(const char *name, device_memory& mem, MemoryType type) + void mem_alloc(device_memory& mem) { - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -100,9 +100,7 @@ public: mem.device_pointer = ++mem_counter; RPCSend snd(socket, &error_func, "mem_alloc"); - snd.add(mem); - snd.add(type); snd.write(); } @@ -174,12 +172,9 @@ public: snd.write_buffer(host, size); } - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) + void tex_alloc(device_memory& mem) { - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; @@ -188,13 +183,7 @@ public: mem.device_pointer = ++mem_counter; RPCSend snd(socket, &error_func, "tex_alloc"); - - string name_string(name); - - snd.add(name_string); snd.add(mem); - snd.add(interpolation); - snd.add(extension); snd.write(); snd.write_buffer((void*)mem.data_pointer, mem.memory_size()); } @@ -470,16 +459,12 @@ protected: void process(RPCReceive& rcv, thread_scoped_lock &lock) { if(rcv.name == "mem_alloc") { - MemoryType type; - network_device_memory mem; - device_ptr client_pointer; - - rcv.read(mem); - rcv.read(type); - + string name; + network_device_memory mem(device); + rcv.read(mem, name); lock.unlock(); - client_pointer = mem.device_pointer; + device_ptr client_pointer = mem.device_pointer; /* create a memory buffer for the device buffer */ size_t data_size = mem.memory_size(); @@ -491,15 +476,15 @@ protected: mem.data_pointer = 0; /* perform the allocation on the actual device */ - device->mem_alloc(NULL, mem, type); + device->mem_alloc(mem); /* store a mapping to/from client_pointer and real device pointer */ pointer_mapping_insert(client_pointer, mem.device_pointer); } else if(rcv.name == "mem_copy_to") { - network_device_memory mem; - - rcv.read(mem); + string name; + network_device_memory mem(device); + rcv.read(mem, name); lock.unlock(); device_ptr client_pointer = mem.device_pointer; @@ -521,10 +506,11 @@ protected: device->mem_copy_to(mem); } else if(rcv.name == "mem_copy_from") { - network_device_memory mem; + string name; + network_device_memory mem(device); int y, w, h, elem; - rcv.read(mem); + rcv.read(mem, name); rcv.read(y); rcv.read(w); rcv.read(h); @@ -547,9 +533,9 @@ protected: lock.unlock(); } else if(rcv.name == "mem_zero") { - network_device_memory mem; - - rcv.read(mem); + string name; + network_device_memory mem(device); + rcv.read(mem, name); lock.unlock(); device_ptr client_pointer = mem.device_pointer; @@ -562,13 +548,13 @@ protected: device->mem_zero(mem); } else if(rcv.name == "mem_free") { - network_device_memory mem; - device_ptr client_pointer; + string name; + network_device_memory mem(device); - rcv.read(mem); + rcv.read(mem, name); lock.unlock(); - client_pointer = mem.device_pointer; + device_ptr client_pointer = mem.device_pointer; mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer); @@ -588,16 +574,11 @@ protected: device->const_copy_to(name_string.c_str(), &host_vector[0], size); } else if(rcv.name == "tex_alloc") { - network_device_memory mem; string name; - InterpolationType interpolation; - ExtensionType extension_type; + network_device_memory mem(device); device_ptr client_pointer; - rcv.read(name); - rcv.read(mem); - rcv.read(interpolation); - rcv.read(extension_type); + rcv.read(mem, name); lock.unlock(); client_pointer = mem.device_pointer; @@ -613,15 +594,16 @@ protected: rcv.read_buffer((uint8_t*)mem.data_pointer, data_size); - device->tex_alloc(name.c_str(), mem, interpolation, extension_type); + device->tex_alloc(mem); pointer_mapping_insert(client_pointer, mem.device_pointer); } else if(rcv.name == "tex_free") { - network_device_memory mem; + string name; + network_device_memory mem(device); device_ptr client_pointer; - rcv.read(mem); + rcv.read(mem, name); lock.unlock(); client_pointer = mem.device_pointer; diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h index 3d3bd99dfe7..8a53290f421 100644 --- a/intern/cycles/device/device_network.h +++ b/intern/cycles/device/device_network.h @@ -38,6 +38,7 @@ #include "util/util_foreach.h" #include "util/util_list.h" #include "util/util_map.h" +#include "util/util_param.h" #include "util/util_string.h" CCL_NAMESPACE_BEGIN @@ -68,8 +69,15 @@ typedef boost::archive::binary_iarchive i_archive; class network_device_memory : public device_memory { public: - network_device_memory() {} - ~network_device_memory() { device_pointer = 0; }; + network_device_memory(Device *device) + : device_memory(device, "", MEM_READ_ONLY) + { + } + + ~network_device_memory() + { + device_pointer = 0; + }; vector<char> local_data; }; @@ -119,6 +127,9 @@ public: { archive & mem.data_type & mem.data_elements & mem.data_size; archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer; + archive & mem.type & string(mem.name); + archive & mem.interpolation & mem.extension; + archive & mem.device_pointer; } template<typename T> void add(const T& data) @@ -258,11 +269,15 @@ public: delete archive_stream; } - void read(network_device_memory& mem) + void read(network_device_memory& mem, string& name) { *archive & mem.data_type & mem.data_elements & mem.data_size; *archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer; + *archive & mem.type & name; + *archive & mem.interpolation & mem.extension; + *archive & mem.device_pointer; + mem.name = name.c_str(); mem.data_pointer = 0; } diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index d2b3a89fa98..5283bd60bd5 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -26,7 +26,13 @@ CCL_NAMESPACE_BEGIN static const double alpha = 0.1; /* alpha for rolling average */ -DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device) +DeviceSplitKernel::DeviceSplitKernel(Device *device) +: device(device), + split_data(device, "split_data", MEM_READ_WRITE), + ray_state(device, "ray_state", MEM_READ_WRITE), + queue_index(device, "queue_index"), + use_queues_flag(device, "use_queues_flag"), + work_pool_wgs(device, "work_pool_wgs") { current_max_closure = -1; first_tile = true; @@ -170,19 +176,19 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, /* Allocate work_pool_wgs memory. */ work_pool_wgs.resize(max_work_groups); - device->mem_alloc("work_pool_wgs", work_pool_wgs, MEM_READ_WRITE); + device->mem_alloc(work_pool_wgs); queue_index.resize(NUM_QUEUES); - device->mem_alloc("queue_index", queue_index, MEM_READ_WRITE); + device->mem_alloc(queue_index); use_queues_flag.resize(1); - device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE); + device->mem_alloc(use_queues_flag); ray_state.resize(num_global_elements); - device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE); + device->mem_alloc(ray_state); split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); - device->mem_alloc("split_data", split_data, MEM_READ_WRITE); + device->mem_alloc(split_data); } #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \ diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp index b67dfef88aa..6deed4e3f0d 100644 --- a/intern/cycles/device/opencl/memory_manager.cpp +++ b/intern/cycles/device/opencl/memory_manager.cpp @@ -73,10 +73,12 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) return; } - device_memory *new_buffer = new device_memory; + device_memory *new_buffer = new device_memory(device, + "memory manager buffer", + MEM_READ_ONLY); new_buffer->resize(total_size); - device->mem_alloc(string_printf("buffer_%p", this).data(), *new_buffer, MEM_READ_ONLY); + device->mem_alloc(*new_buffer); size_t offset = 0; @@ -161,8 +163,14 @@ MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer() return smallest; } -MemoryManager::MemoryManager(OpenCLDeviceBase *device) : device(device), need_update(false) +MemoryManager::MemoryManager(OpenCLDeviceBase *device) +: device(device), need_update(false) { + foreach(DeviceBuffer& device_buffer, device_buffers) { + device_buffer.buffer = new device_memory(device, + "memory manager buffer", + MEM_READ_ONLY); + } } void MemoryManager::free() diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h index 3714405d026..7ef74a79834 100644 --- a/intern/cycles/device/opencl/memory_manager.h +++ b/intern/cycles/device/opencl/memory_manager.h @@ -60,11 +60,13 @@ private: vector<Allocation*> allocations; size_t size; /* Size of all allocations. */ - DeviceBuffer() : buffer(new device_memory), size(0) + DeviceBuffer() + : buffer(NULL), size(0) { } - ~DeviceBuffer() { + ~DeviceBuffer() + { delete buffer; buffer = NULL; } diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index bd956e29083..1dd4ad7df7f 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -340,7 +340,7 @@ public: virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, vector<OpenCLProgram*> &programs) = 0; - void mem_alloc(const char *name, device_memory& mem, MemoryType type); + void mem_alloc(device_memory& mem); void mem_copy_to(device_memory& mem); void mem_copy_from(device_memory& mem, int y, int w, int h, int elem); void mem_zero(device_memory& mem); @@ -349,10 +349,7 @@ public: int mem_address_alignment(); void const_copy_to(const char *name, void *host, size_t size); - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType /*interpolation*/, - ExtensionType /*extension*/); + void tex_alloc(device_memory& mem); void tex_free(device_memory& mem); size_t global_size_round_up(int group_size, int global_size); @@ -440,7 +437,7 @@ protected: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task); - device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type); + device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size); void mem_free_sub_ptr(device_ptr ptr); class ArgumentWrapper { @@ -550,21 +547,7 @@ private: vector<TextureInfo> texture_info; device_memory texture_info_buffer; - struct Texture { - Texture() {} - Texture(device_memory* mem, - InterpolationType interpolation, - ExtensionType extension) - : mem(mem), - interpolation(interpolation), - extension(extension) { - } - device_memory* mem; - InterpolationType interpolation; - ExtensionType extension; - }; - - typedef map<string, Texture> TexturesMap; + typedef map<string, device_memory*> TexturesMap; TexturesMap textures; bool textures_need_update; diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 48c32a9dc5c..89ab1a43e68 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -72,7 +72,9 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where) } OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_) -: Device(info, stats, background_), memory_manager(this) +: Device(info, stats, background_), + memory_manager(this), + texture_info_buffer(this, "__texture_info", MEM_READ_ONLY) { cpPlatform = NULL; cdDevice = NULL; @@ -286,10 +288,10 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea return true; } -void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryType type) +void OpenCLDeviceBase::mem_alloc(device_memory& mem) { - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -307,8 +309,8 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp if(size > max_alloc_size) { string error = "Scene too complex to fit in available memory."; - if(name != NULL) { - error += string_printf(" (allocating buffer %s failed.)", name); + if(mem.name != NULL) { + error += string_printf(" (allocating buffer %s failed.)", mem.name); } set_error(error); @@ -318,9 +320,9 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp cl_mem_flags mem_flag; void *mem_ptr = NULL; - if(type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY) mem_flag = CL_MEM_READ_ONLY; - else if(type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -461,12 +463,12 @@ int OpenCLDeviceBase::mem_address_alignment() return OpenCLInfo::mem_address_alignment(cdDevice); } -device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type) +device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size) { cl_mem_flags mem_flag; - if(type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY) mem_flag = CL_MEM_READ_ONLY; - else if(type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -497,10 +499,10 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) device_vector<uchar> *data; if(i == const_mem_map.end()) { - data = new device_vector<uchar>(); + data = new device_vector<uchar>(this, name, MEM_READ_ONLY); data->resize(size); - mem_alloc(name, *data, MEM_READ_ONLY); + mem_alloc(*data); const_mem_map.insert(ConstMemMap::value_type(name, data)); } else { @@ -511,19 +513,16 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) mem_copy_to(*data); } -void OpenCLDeviceBase::tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) +void OpenCLDeviceBase::tex_alloc(device_memory& mem) { - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - memory_manager.alloc(name, mem); + memory_manager.alloc(mem.name, mem); /* Set the pointer to non-null to keep code that inspects its value from thinking its unallocated. */ mem.device_pointer = 1; - textures[name] = Texture(&mem, interpolation, extension); + textures[mem.name] = &mem; textures_need_update = true; } @@ -537,7 +536,7 @@ void OpenCLDeviceBase::tex_free(device_memory& mem) } foreach(TexturesMap::value_type& value, textures) { - if(value.second.mem == &mem) { + if(value.second == &mem) { textures.erase(value.first); break; } @@ -658,22 +657,21 @@ void OpenCLDeviceBase::flush_texture_buffers() /* Fill in descriptors */ foreach(texture_slot_t& slot, texture_slots) { - Texture& tex = textures[slot.name]; - TextureInfo& info = texture_info[slot.slot]; MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name); - info.data = desc.offset; info.cl_buffer = desc.device_buffer; if(string_startswith(slot.name, "__tex_image")) { - info.width = tex.mem->data_width; - info.height = tex.mem->data_height; - info.depth = tex.mem->data_depth; + device_memory *mem = textures[slot.name]; + + info.width = mem->data_width; + info.height = mem->data_height; + info.depth = mem->data_depth; - info.interpolation = tex.interpolation; - info.extension = tex.extension; + info.interpolation = mem->interpolation; + info.extension = mem->extension; } } @@ -1045,7 +1043,7 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr, bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_WRITE); + mem_alloc(task->tiles_mem); mem_copy_to(task->tiles_mem); cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 920106f92d4..3edb2442070 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -127,9 +127,9 @@ public: } KernelGlobals; /* Allocate buffer for kernel globals */ - device_memory kgbuffer; + device_memory kgbuffer(this, "kernel_globals", MEM_READ_WRITE); kgbuffer.resize(sizeof(KernelGlobals)); - mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE); + mem_alloc(kgbuffer); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { @@ -288,9 +288,9 @@ public: virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) { - device_vector<uint64_t> size_buffer; + device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE); size_buffer.resize(1); - device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); + device->mem_alloc(size_buffer); uint threads = num_threads; device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer); |