diff options
Diffstat (limited to 'intern/cycles/device/opencl')
-rw-r--r-- | intern/cycles/device/opencl/memory_manager.cpp | 18 | ||||
-rw-r--r-- | intern/cycles/device/opencl/memory_manager.h | 8 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl.h | 34 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 137 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 17 |
5 files changed, 106 insertions, 108 deletions
diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp index b67dfef88aa..a791b374774 100644 --- a/intern/cycles/device/opencl/memory_manager.cpp +++ b/intern/cycles/device/opencl/memory_manager.cpp @@ -73,10 +73,10 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) return; } - device_memory *new_buffer = new device_memory; + device_only_memory<uchar> *new_buffer = + new device_only_memory<uchar>(device, "memory manager buffer"); - new_buffer->resize(total_size); - device->mem_alloc(string_printf("buffer_%p", this).data(), *new_buffer, MEM_READ_ONLY); + new_buffer->alloc_to_device(total_size); size_t offset = 0; @@ -110,7 +110,6 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) offset += allocation->size; } - device->mem_free(*buffer); delete buffer; buffer = new_buffer; @@ -143,9 +142,9 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) clFinish(device->cqCommandQueue); } -void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *device) +void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *) { - device->mem_free(*buffer); + buffer->free(); } MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer() @@ -161,8 +160,13 @@ MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer() return smallest; } -MemoryManager::MemoryManager(OpenCLDeviceBase *device) : device(device), need_update(false) +MemoryManager::MemoryManager(OpenCLDeviceBase *device) +: device(device), need_update(false) { + foreach(DeviceBuffer& device_buffer, device_buffers) { + device_buffer.buffer = + new device_only_memory<uchar>(device, "memory manager buffer"); + } } void MemoryManager::free() diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h index 3714405d026..b3d861275f0 100644 --- a/intern/cycles/device/opencl/memory_manager.h +++ b/intern/cycles/device/opencl/memory_manager.h @@ -56,15 +56,17 @@ private: }; struct DeviceBuffer { - device_memory *buffer; + device_only_memory<uchar> *buffer; vector<Allocation*> allocations; size_t size; /* Size of all allocations. */ - DeviceBuffer() : buffer(new device_memory), size(0) + DeviceBuffer() + : buffer(NULL), size(0) { } - ~DeviceBuffer() { + ~DeviceBuffer() + { delete buffer; buffer = NULL; } diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index bd956e29083..55848c8112d 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -340,7 +340,7 @@ public: virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, vector<OpenCLProgram*> &programs) = 0; - void mem_alloc(const char *name, device_memory& mem, MemoryType type); + void mem_alloc(device_memory& mem); void mem_copy_to(device_memory& mem); void mem_copy_from(device_memory& mem, int y, int w, int h, int elem); void mem_zero(device_memory& mem); @@ -349,10 +349,7 @@ public: int mem_address_alignment(); void const_copy_to(const char *name, void *host, size_t size); - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType /*interpolation*/, - ExtensionType /*extension*/); + void tex_alloc(device_memory& mem); void tex_free(device_memory& mem); size_t global_size_round_up(int group_size, int global_size); @@ -440,7 +437,7 @@ protected: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task); - device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type); + device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size); void mem_free_sub_ptr(device_ptr ptr); class ArgumentWrapper { @@ -461,6 +458,11 @@ protected: } template<typename T> + ArgumentWrapper(device_only_memory<T>& argument) : size(sizeof(void*)), + pointer((void*)(&argument.device_pointer)) + { + } + template<typename T> ArgumentWrapper(T& argument) : size(sizeof(argument)), pointer(&argument) { @@ -546,25 +548,9 @@ private: friend class MemoryManager; static_assert_align(TextureInfo, 16); + device_vector<TextureInfo> texture_info; - vector<TextureInfo> texture_info; - device_memory texture_info_buffer; - - struct Texture { - Texture() {} - Texture(device_memory* mem, - InterpolationType interpolation, - ExtensionType extension) - : mem(mem), - interpolation(interpolation), - extension(extension) { - } - device_memory* mem; - InterpolationType interpolation; - ExtensionType extension; - }; - - typedef map<string, Texture> TexturesMap; + typedef map<string, device_memory*> TexturesMap; TexturesMap textures; bool textures_need_update; diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 48c32a9dc5c..5e9debc3b17 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -72,7 +72,9 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where) } OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_) -: Device(info, stats, background_), memory_manager(this) +: Device(info, stats, background_), + memory_manager(this), + texture_info(this, "__texture_info", MEM_TEXTURE) { cpPlatform = NULL; cdDevice = NULL; @@ -136,11 +138,9 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou return; } - /* Allocate this right away so that texture_info_buffer is placed at offset 0 in the device memory buffers */ + /* Allocate this right away so that texture_info is placed at offset 0 in the device memory buffers */ texture_info.resize(1); - texture_info_buffer.resize(1); - texture_info_buffer.data_pointer = (device_ptr)&texture_info[0]; - memory_manager.alloc("texture_info", texture_info_buffer); + memory_manager.alloc("texture_info", texture_info); fprintf(stderr, "Device init success\n"); device_initialized = true; @@ -157,7 +157,6 @@ OpenCLDeviceBase::~OpenCLDeviceBase() ConstMemMap::iterator mt; for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) { - mem_free(*(mt->second)); delete mt->second; } @@ -286,10 +285,10 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea return true; } -void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryType type) +void OpenCLDeviceBase::mem_alloc(device_memory& mem) { - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -307,8 +306,8 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp if(size > max_alloc_size) { string error = "Scene too complex to fit in available memory."; - if(name != NULL) { - error += string_printf(" (allocating buffer %s failed.)", name); + if(mem.name != NULL) { + error += string_printf(" (allocating buffer %s failed.)", mem.name); } set_error(error); @@ -318,9 +317,9 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp cl_mem_flags mem_flag; void *mem_ptr = NULL; - if(type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) mem_flag = CL_MEM_READ_ONLY; - else if(type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY || mem.type == MEM_PIXELS) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -348,17 +347,27 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp void OpenCLDeviceBase::mem_copy_to(device_memory& mem) { - /* this is blocking */ - size_t size = mem.memory_size(); - if(size != 0) { - opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, - CL_MEM_PTR(mem.device_pointer), - CL_TRUE, - 0, - size, - (void*)mem.data_pointer, - 0, - NULL, NULL)); + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + tex_alloc(mem); + } + else { + if(!mem.device_pointer) { + mem_alloc(mem); + } + + /* this is blocking */ + size_t size = mem.memory_size(); + if(size != 0) { + opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, + CL_MEM_PTR(mem.device_pointer), + CL_TRUE, + 0, + size, + (void*)mem.data_pointer, + 0, + NULL, NULL)); + } } } @@ -410,6 +419,10 @@ void OpenCLDeviceBase::mem_zero_kernel(device_ptr mem, size_t size) void OpenCLDeviceBase::mem_zero(device_memory& mem) { + if(!mem.device_pointer) { + mem_alloc(mem); + } + if(mem.device_pointer) { if(base_program.is_loaded()) { mem_zero_kernel(mem.device_pointer, mem.memory_size()); @@ -445,14 +458,19 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem) void OpenCLDeviceBase::mem_free(device_memory& mem) { - if(mem.device_pointer) { - if(mem.device_pointer != null_mem) { - opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer))); - } - mem.device_pointer = 0; + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + } + else { + if(mem.device_pointer) { + if(mem.device_pointer != null_mem) { + opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer))); + } + mem.device_pointer = 0; - stats.mem_free(mem.device_size); - mem.device_size = 0; + stats.mem_free(mem.device_size); + mem.device_size = 0; + } } } @@ -461,12 +479,12 @@ int OpenCLDeviceBase::mem_address_alignment() return OpenCLInfo::mem_address_alignment(cdDevice); } -device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type) +device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size) { cl_mem_flags mem_flag; - if(type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) mem_flag = CL_MEM_READ_ONLY; - else if(type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY || mem.type == MEM_PIXELS) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -497,10 +515,8 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) device_vector<uchar> *data; if(i == const_mem_map.end()) { - data = new device_vector<uchar>(); - data->resize(size); - - mem_alloc(name, *data, MEM_READ_ONLY); + data = new device_vector<uchar>(this, name, MEM_READ_ONLY); + data->alloc(size); const_mem_map.insert(ConstMemMap::value_type(name, data)); } else { @@ -508,22 +524,19 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) } memcpy(data->get_data(), host, size); - mem_copy_to(*data); + data->copy_to_device(); } -void OpenCLDeviceBase::tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) +void OpenCLDeviceBase::tex_alloc(device_memory& mem) { - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - memory_manager.alloc(name, mem); + memory_manager.alloc(mem.name, mem); /* Set the pointer to non-null to keep code that inspects its value from thinking its unallocated. */ mem.device_pointer = 1; - textures[name] = Texture(&mem, interpolation, extension); + textures[mem.name] = &mem; textures_need_update = true; } @@ -537,7 +550,7 @@ void OpenCLDeviceBase::tex_free(device_memory& mem) } foreach(TexturesMap::value_type& value, textures) { - if(value.second.mem == &mem) { + if(value.second == &mem) { textures.erase(value.first); break; } @@ -648,38 +661,33 @@ void OpenCLDeviceBase::flush_texture_buffers() } /* Realloc texture descriptors buffer. */ - memory_manager.free(texture_info_buffer); - + memory_manager.free(texture_info); texture_info.resize(num_slots); - texture_info_buffer.resize(num_slots * sizeof(TextureInfo)); - texture_info_buffer.data_pointer = (device_ptr)&texture_info[0]; - - memory_manager.alloc("texture_info", texture_info_buffer); + memory_manager.alloc("texture_info", texture_info); /* Fill in descriptors */ foreach(texture_slot_t& slot, texture_slots) { - Texture& tex = textures[slot.name]; - TextureInfo& info = texture_info[slot.slot]; MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name); - info.data = desc.offset; info.cl_buffer = desc.device_buffer; if(string_startswith(slot.name, "__tex_image")) { - info.width = tex.mem->data_width; - info.height = tex.mem->data_height; - info.depth = tex.mem->data_depth; + device_memory *mem = textures[slot.name]; + + info.width = mem->data_width; + info.height = mem->data_height; + info.depth = mem->data_depth; - info.interpolation = tex.interpolation; - info.extension = tex.extension; + info.interpolation = mem->interpolation; + info.extension = mem->extension; } } /* Force write of descriptors. */ - memory_manager.free(texture_info_buffer); - memory_manager.alloc("texture_info", texture_info_buffer); + memory_manager.free(texture_info); + memory_manager.alloc("texture_info", texture_info); } void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) @@ -1045,8 +1053,7 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr, bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_WRITE); - mem_copy_to(task->tiles_mem); + task->tiles_mem.copy_to_device(); cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 920106f92d4..96139afa450 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -127,9 +127,8 @@ public: } KernelGlobals; /* Allocate buffer for kernel globals */ - device_memory kgbuffer; - kgbuffer.resize(sizeof(KernelGlobals)); - mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE); + device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals"); + kgbuffer.alloc_to_device(1); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { @@ -160,7 +159,7 @@ public: task->release_tile(tile); } - mem_free(kgbuffer); + kgbuffer.free(); } } @@ -288,9 +287,9 @@ public: virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) { - device_vector<uint64_t> size_buffer; - size_buffer.resize(1); - device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); + device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE); + size_buffer.alloc(1); + size_buffer.zero_to_device(); uint threads = num_threads; device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer); @@ -308,9 +307,9 @@ public: device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); - device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); + size_buffer.copy_from_device(0, 1, 1); size_t size = size_buffer[0]; - device->mem_free(size_buffer); + size_buffer.free(); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", |