diff options
Diffstat (limited to 'intern/cycles/device/opencl')
-rw-r--r-- | intern/cycles/device/opencl/memory_manager.cpp | 253 | ||||
-rw-r--r-- | intern/cycles/device/opencl/memory_manager.h | 105 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl.h | 43 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 149 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_mega.cpp | 5 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 25 |
6 files changed, 542 insertions, 38 deletions
diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp new file mode 100644 index 00000000000..b67dfef88aa --- /dev/null +++ b/intern/cycles/device/opencl/memory_manager.cpp @@ -0,0 +1,253 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef WITH_OPENCL + +#include "util/util_foreach.h" + +#include "device/opencl/opencl.h" +#include "device/opencl/memory_manager.h" + +CCL_NAMESPACE_BEGIN + +void MemoryManager::DeviceBuffer::add_allocation(Allocation& allocation) +{ + allocations.push_back(&allocation); +} + +void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) +{ + bool need_realloc = false; + + /* Calculate total size and remove any freed. */ + size_t total_size = 0; + + for(int i = allocations.size()-1; i >= 0; i--) { + Allocation* allocation = allocations[i]; + + /* Remove allocations that have been freed. */ + if(!allocation->mem || allocation->mem->memory_size() == 0) { + allocation->device_buffer = NULL; + allocation->size = 0; + + allocations.erase(allocations.begin()+i); + + need_realloc = true; + + continue; + } + + /* Get actual size for allocation. */ + size_t alloc_size = align_up(allocation->mem->memory_size(), 16); + + if(allocation->size != alloc_size) { + /* Allocation is either new or resized. */ + allocation->size = alloc_size; + allocation->needs_copy_to_device = true; + + need_realloc = true; + } + + total_size += alloc_size; + } + + if(need_realloc) { + cl_ulong max_buffer_size; + clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL); + + if(total_size > max_buffer_size) { + device->set_error("Scene too complex to fit in available memory."); + return; + } + + device_memory *new_buffer = new device_memory; + + new_buffer->resize(total_size); + device->mem_alloc(string_printf("buffer_%p", this).data(), *new_buffer, MEM_READ_ONLY); + + size_t offset = 0; + + foreach(Allocation* allocation, allocations) { + if(allocation->needs_copy_to_device) { + /* Copy from host to device. */ + opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue, + CL_MEM_PTR(new_buffer->device_pointer), + CL_FALSE, + offset, + allocation->mem->memory_size(), + (void*)allocation->mem->data_pointer, + 0, NULL, NULL + )); + + allocation->needs_copy_to_device = false; + } + else { + /* Fast copy from memory already on device. */ + opencl_device_assert(device, clEnqueueCopyBuffer(device->cqCommandQueue, + CL_MEM_PTR(buffer->device_pointer), + CL_MEM_PTR(new_buffer->device_pointer), + allocation->desc.offset, + offset, + allocation->mem->memory_size(), + 0, NULL, NULL + )); + } + + allocation->desc.offset = offset; + offset += allocation->size; + } + + device->mem_free(*buffer); + delete buffer; + + buffer = new_buffer; + } + else { + assert(total_size == buffer->data_size); + + size_t offset = 0; + + foreach(Allocation* allocation, allocations) { + if(allocation->needs_copy_to_device) { + /* Copy from host to device. */ + opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue, + CL_MEM_PTR(buffer->device_pointer), + CL_FALSE, + offset, + allocation->mem->memory_size(), + (void*)allocation->mem->data_pointer, + 0, NULL, NULL + )); + + allocation->needs_copy_to_device = false; + } + + offset += allocation->size; + } + } + + /* Not really necessary, but seems to improve responsiveness for some reason. */ + clFinish(device->cqCommandQueue); +} + +void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *device) +{ + device->mem_free(*buffer); +} + +MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer() +{ + DeviceBuffer* smallest = device_buffers; + + foreach(DeviceBuffer& device_buffer, device_buffers) { + if(device_buffer.size < smallest->size) { + smallest = &device_buffer; + } + } + + return smallest; +} + +MemoryManager::MemoryManager(OpenCLDeviceBase *device) : device(device), need_update(false) +{ +} + +void MemoryManager::free() +{ + foreach(DeviceBuffer& device_buffer, device_buffers) { + device_buffer.free(device); + } +} + +void MemoryManager::alloc(const char *name, device_memory& mem) +{ + Allocation& allocation = allocations[name]; + + allocation.mem = &mem; + allocation.needs_copy_to_device = true; + + if(!allocation.device_buffer) { + DeviceBuffer* device_buffer = smallest_device_buffer(); + allocation.device_buffer = device_buffer; + + allocation.desc.device_buffer = device_buffer - device_buffers; + + device_buffer->add_allocation(allocation); + + device_buffer->size += mem.memory_size(); + } + + need_update = true; +} + +bool MemoryManager::free(device_memory& mem) +{ + foreach(AllocationsMap::value_type& value, allocations) { + Allocation& allocation = value.second; + if(allocation.mem == &mem) { + + allocation.device_buffer->size -= mem.memory_size(); + + allocation.mem = NULL; + allocation.needs_copy_to_device = false; + + need_update = true; + return true; + } + } + + return false; +} + +MemoryManager::BufferDescriptor MemoryManager::get_descriptor(string name) +{ + update_device_memory(); + + Allocation& allocation = allocations[name]; + return allocation.desc; +} + +void MemoryManager::update_device_memory() +{ + if(!need_update) { + return; + } + + need_update = false; + + foreach(DeviceBuffer& device_buffer, device_buffers) { + device_buffer.update_device_memory(device); + } +} + +void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg) +{ + update_device_memory(); + + foreach(DeviceBuffer& device_buffer, device_buffers) { + if(device_buffer.buffer->device_pointer) { + device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer); + } + else { + device->kernel_set_args(kernel, (*narg)++, device->null_mem); + } + } +} + +CCL_NAMESPACE_END + +#endif /* WITH_OPENCL */ + diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h new file mode 100644 index 00000000000..3714405d026 --- /dev/null +++ b/intern/cycles/device/opencl/memory_manager.h @@ -0,0 +1,105 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "device/device.h" + +#include "util/util_map.h" +#include "util/util_vector.h" +#include "util/util_string.h" + +#include "clew.h" + +CCL_NAMESPACE_BEGIN + +class OpenCLDeviceBase; + +class MemoryManager { +public: + static const int NUM_DEVICE_BUFFERS = 8; + + struct BufferDescriptor { + uint device_buffer; + cl_ulong offset; + }; + +private: + struct DeviceBuffer; + + struct Allocation { + device_memory *mem; + + DeviceBuffer *device_buffer; + size_t size; /* Size of actual allocation, may be larger than requested. */ + + BufferDescriptor desc; + + bool needs_copy_to_device; + + Allocation() : mem(NULL), device_buffer(NULL), size(0), needs_copy_to_device(false) + { + } + }; + + struct DeviceBuffer { + device_memory *buffer; + vector<Allocation*> allocations; + size_t size; /* Size of all allocations. */ + + DeviceBuffer() : buffer(new device_memory), size(0) + { + } + + ~DeviceBuffer() { + delete buffer; + buffer = NULL; + } + + void add_allocation(Allocation& allocation); + + void update_device_memory(OpenCLDeviceBase *device); + + void free(OpenCLDeviceBase *device); + }; + + OpenCLDeviceBase *device; + + DeviceBuffer device_buffers[NUM_DEVICE_BUFFERS]; + + typedef unordered_map<string, Allocation> AllocationsMap; + AllocationsMap allocations; + + bool need_update; + + DeviceBuffer* smallest_device_buffer(); + +public: + MemoryManager(OpenCLDeviceBase *device); + + void free(); /* Free all memory. */ + + void alloc(const char *name, device_memory& mem); + bool free(device_memory& mem); + + BufferDescriptor get_descriptor(string name); + + void update_device_memory(); + void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg); +}; + +CCL_NAMESPACE_END + diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 78ca377d933..0dae9136870 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -25,6 +25,8 @@ #include "clew.h" +#include "device/opencl/memory_manager.h" + CCL_NAMESPACE_BEGIN /* Disable workarounds, seems to be working fine on latest drivers. */ @@ -224,6 +226,18 @@ public: static string get_kernel_md5(); }; +#define opencl_device_assert(device, stmt) \ + { \ + cl_int err = stmt; \ + \ + if(err != CL_SUCCESS) { \ + string message = string_printf("OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \ + if((device)->error_msg == "") \ + (device)->error_msg = message; \ + fprintf(stderr, "%s\n", message.c_str()); \ + } \ + } (void)0 + #define opencl_assert(stmt) \ { \ cl_int err = stmt; \ @@ -344,6 +358,7 @@ public: size_t global_size_round_up(int group_size, int global_size); void enqueue_kernel(cl_kernel kernel, size_t w, size_t h, size_t max_workgroup_size = -1); void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name); + void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg); void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half); void shader(DeviceTask& task); @@ -525,6 +540,34 @@ protected: virtual string build_options_for_base_program( const DeviceRequestedFeatures& /*requested_features*/); + +private: + MemoryManager memory_manager; + friend MemoryManager; + + struct tex_info_t { + uint buffer, padding; + cl_ulong offset; + uint width, height, depth, options; + }; + static_assert_align(tex_info_t, 16); + + vector<tex_info_t> texture_descriptors; + device_memory texture_descriptors_buffer; + + struct Texture { + device_memory* mem; + InterpolationType interpolation; + ExtensionType extension; + }; + + typedef map<string, Texture> TexturesMap; + TexturesMap textures; + + bool textures_need_update; + +protected: + void flush_texture_buffers(); }; Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background); diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 509da7a0a84..63b5e004b7d 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -63,7 +63,7 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where) } OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_) -: Device(info, stats, background_) +: Device(info, stats, background_), memory_manager(this) { cpPlatform = NULL; cdDevice = NULL; @@ -71,6 +71,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou cqCommandQueue = NULL; null_mem = 0; device_initialized = false; + textures_need_update = true; vector<OpenCLPlatformDevice> usable_devices; OpenCLInfo::get_usable_devices(&usable_devices); @@ -126,6 +127,12 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou return; } + /* Allocate this right away so that texture_descriptors_buffer is placed at offset 0 in the device memory buffers */ + texture_descriptors.resize(1); + texture_descriptors_buffer.resize(1); + texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0]; + memory_manager.alloc("texture_descriptors", texture_descriptors_buffer); + fprintf(stderr, "Device init success\n"); device_initialized = true; } @@ -134,6 +141,8 @@ OpenCLDeviceBase::~OpenCLDeviceBase() { task_pool.stop(); + memory_manager.free(); + if(null_mem) clReleaseMemObject(CL_MEM_PTR(null_mem)); @@ -493,29 +502,31 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) void OpenCLDeviceBase::tex_alloc(const char *name, device_memory& mem, - InterpolationType /*interpolation*/, - ExtensionType /*extension*/) + InterpolationType interpolation, + ExtensionType extension) { VLOG(1) << "Texture allocate: " << name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - mem_alloc(NULL, mem, MEM_READ_ONLY); - mem_copy_to(mem); - assert(mem_map.find(name) == mem_map.end()); - mem_map.insert(MemMap::value_type(name, mem.device_pointer)); + + memory_manager.alloc(name, mem); + + textures[name] = {&mem, interpolation, extension}; + + textures_need_update = true; } void OpenCLDeviceBase::tex_free(device_memory& mem) { - if(mem.device_pointer) { - foreach(const MemMap::value_type& value, mem_map) { - if(value.second == mem.device_pointer) { - mem_map.erase(value.first); - break; - } - } + if(memory_manager.free(mem)) { + textures_need_update = true; + } - mem_free(mem); + foreach(TexturesMap::value_type& value, textures) { + if(value.second.mem == &mem) { + textures.erase(value.first); + break; + } } } @@ -581,6 +592,104 @@ void OpenCLDeviceBase::set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr)); } +void OpenCLDeviceBase::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg) +{ + flush_texture_buffers(); + + memory_manager.set_kernel_arg_buffers(kernel, narg); +} + +void OpenCLDeviceBase::flush_texture_buffers() +{ + if(!textures_need_update) { + return; + } + textures_need_update = false; + + /* Setup slots for textures. */ + int num_slots = 0; + + struct texture_slot_t { + string name; + int slot; + }; + + vector<texture_slot_t> texture_slots; + +#define KERNEL_TEX(type, ttype, name) \ + if(textures.find(#name) != textures.end()) { \ + texture_slots.push_back({#name, num_slots}); \ + } \ + num_slots++; +#include "kernel/kernel_textures.h" + + int num_data_slots = num_slots; + + foreach(TexturesMap::value_type& tex, textures) { + string name = tex.first; + + if(string_startswith(name, "__tex_image")) { + int pos = name.rfind("_"); + int id = atoi(name.data() + pos + 1); + + texture_slots.push_back({name, num_data_slots + id}); + + num_slots = max(num_slots, num_data_slots + id + 1); + } + } + + /* Realloc texture descriptors buffer. */ + memory_manager.free(texture_descriptors_buffer); + + texture_descriptors.resize(num_slots); + texture_descriptors_buffer.resize(num_slots * sizeof(tex_info_t)); + texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0]; + + memory_manager.alloc("texture_descriptors", texture_descriptors_buffer); + + /* Fill in descriptors */ + foreach(texture_slot_t& slot, texture_slots) { + Texture& tex = textures[slot.name]; + + tex_info_t& info = texture_descriptors[slot.slot]; + + MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name); + + info.offset = desc.offset; + info.buffer = desc.device_buffer; + + if(string_startswith(slot.name, "__tex_image")) { + info.width = tex.mem->data_width; + info.height = tex.mem->data_height; + info.depth = tex.mem->data_depth; + + info.options = 0; + + if(tex.interpolation == INTERPOLATION_CLOSEST) { + info.options |= (1 << 0); + } + + switch(tex.extension) { + case EXTENSION_REPEAT: + info.options |= (1 << 1); + break; + case EXTENSION_EXTEND: + info.options |= (1 << 2); + break; + case EXTENSION_CLIP: + info.options |= (1 << 3); + break; + default: + break; + } + } + } + + /* Force write of descriptors. */ + memory_manager.free(texture_descriptors_buffer); + memory_manager.alloc("texture_descriptors", texture_descriptors_buffer); +} + void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) { /* cast arguments to cl types */ @@ -605,10 +714,7 @@ void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ d_rgba, d_buffer); -#define KERNEL_TEX(type, ttype, name) \ -set_kernel_arg_mem(ckFilmConvertKernel, &start_arg_index, #name); -#include "kernel/kernel_textures.h" -#undef KERNEL_TEX + set_kernel_arg_buffers(ckFilmConvertKernel, &start_arg_index); start_arg_index += kernel_set_args(ckFilmConvertKernel, start_arg_index, @@ -1030,10 +1136,7 @@ void OpenCLDeviceBase::shader(DeviceTask& task) d_output_luma); } -#define KERNEL_TEX(type, ttype, name) \ - set_kernel_arg_mem(kernel, &start_arg_index, #name); -#include "kernel/kernel_textures.h" -#undef KERNEL_TEX + set_kernel_arg_buffers(kernel, &start_arg_index); start_arg_index += kernel_set_args(kernel, start_arg_index, diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp index 06c15bcf401..ec47fdafa3d 100644 --- a/intern/cycles/device/opencl/opencl_mega.cpp +++ b/intern/cycles/device/opencl/opencl_mega.cpp @@ -82,10 +82,7 @@ public: d_buffer, d_rng_state); -#define KERNEL_TEX(type, ttype, name) \ - set_kernel_arg_mem(ckPathTraceKernel, &start_arg_index, #name); -#include "kernel/kernel_textures.h" -#undef KERNEL_TEX + set_kernel_arg_buffers(ckPathTraceKernel, &start_arg_index); start_arg_index += kernel_set_args(ckPathTraceKernel, start_arg_index, diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 76d9983e9a2..df7c064a24f 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -99,6 +99,8 @@ public: void thread_run(DeviceTask *task) { + flush_texture_buffers(); + if(task->type == DeviceTask::FILM_CONVERT) { film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } @@ -113,10 +115,19 @@ public: */ typedef struct KernelGlobals { ccl_constant KernelData *data; + ccl_global char *buffers[8]; + + typedef struct _tex_info_t { + uint buffer, padding; + ulong offset; + uint width, height, depth, options; + } _tex_info_t; + #define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name; + _tex_info_t name; #include "kernel/kernel_textures.h" #undef KERNEL_TEX + SplitData split_data; SplitParams split_param_data; } KernelGlobals; @@ -217,11 +228,7 @@ public: *cached_memory.ray_state, *cached_memory.rng_state); -/* TODO(sergey): Avoid map lookup here. */ -#define KERNEL_TEX(type, ttype, name) \ - device->set_kernel_arg_mem(program(), &start_arg_index, #name); -#include "kernel/kernel_textures.h" -#undef KERNEL_TEX + device->set_kernel_arg_buffers(program(), &start_arg_index); start_arg_index += device->kernel_set_args(program(), @@ -352,11 +359,7 @@ public: ray_state, rtile.rng_state); -/* TODO(sergey): Avoid map lookup here. */ -#define KERNEL_TEX(type, ttype, name) \ - device->set_kernel_arg_mem(device->program_data_init(), &start_arg_index, #name); -#include "kernel/kernel_textures.h" -#undef KERNEL_TEX + device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index); start_arg_index += device->kernel_set_args(device->program_data_init(), |