From ae41f38f78f8c54f92cf34dd88e35948e19aed55 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Fri, 20 Oct 2017 05:08:26 +0200 Subject: Code refactor: pass device to scene, check OSL with device info. --- intern/cycles/app/cycles_standalone.cpp | 43 ++++++++++++++----------------- intern/cycles/blender/blender_session.cpp | 14 +++++----- intern/cycles/device/device.cpp | 3 +++ intern/cycles/device/device.h | 2 ++ intern/cycles/device/device_cpu.cpp | 1 + intern/cycles/device/device_network.cpp | 1 + intern/cycles/render/scene.cpp | 9 +++---- intern/cycles/render/scene.h | 2 +- 8 files changed, 40 insertions(+), 35 deletions(-) (limited to 'intern/cycles') diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index 0cd249f0d84..939c6cf7eb5 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -97,27 +97,9 @@ static BufferParams& session_buffer_params() return buffer_params; } -static void session_init() -{ - options.session = new Session(options.session_params); - options.session->reset(session_buffer_params(), options.session_params.samples); - options.session->scene = options.scene; - - if(options.session_params.background && !options.quiet) - options.session->progress.set_update_callback(function_bind(&session_print_status)); -#ifdef WITH_CYCLES_STANDALONE_GUI - else - options.session->progress.set_update_callback(function_bind(&view_redraw)); -#endif - - options.session->start(); - - options.scene = NULL; -} - static void scene_init() { - options.scene = new Scene(options.scene_params, options.session_params.device); + options.scene = new Scene(options.scene_params, options.session->device); /* Read XML */ xml_read_file(options.scene, options.filepath.c_str()); @@ -136,6 +118,25 @@ static void scene_init() options.scene->camera->compute_auto_viewplane(); } +static void session_init() +{ + options.session = new Session(options.session_params); + options.session->reset(session_buffer_params(), options.session_params.samples); + + if(options.session_params.background && !options.quiet) + options.session->progress.set_update_callback(function_bind(&session_print_status)); +#ifdef WITH_CYCLES_STANDALONE_GUI + else + options.session->progress.set_update_callback(function_bind(&view_redraw)); +#endif + + options.session->start(); + + /* load scene */ + scene_init(); + options.session->scene = options.scene; +} + static void session_exit() { if(options.session) { @@ -430,7 +431,6 @@ static void options_parse(int argc, const char **argv) /* find matching device */ DeviceType device_type = Device::type_from_string(devicename.c_str()); vector& devices = Device::available_devices(); - DeviceInfo device_info; bool device_available = false; foreach(DeviceInfo& device, devices) { @@ -467,9 +467,6 @@ static void options_parse(int argc, const char **argv) /* For smoother Viewport */ options.session_params.start_resolution = 64; - - /* load scene */ - scene_init(); } CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index f1226388a62..5b71e11d61d 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -124,20 +124,22 @@ void BlenderSession::create_session() last_progress = -1.0f; start_resize_time = 0.0; + /* create session */ + session = new Session(session_params); + session->scene = scene; + session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this)); + session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this)); + session->set_pause(session_pause); + /* create scene */ - scene = new Scene(scene_params, session_params.device); + scene = new Scene(scene_params, session->device); /* setup callbacks for builtin image support */ scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3, _4, _5, _6, _7, _8); scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4, _5); scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4, _5); - /* create session */ - session = new Session(session_params); session->scene = scene; - session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this)); - session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this)); - session->set_pause(session_pause); /* create sync */ sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress); diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index f31cacd8ec1..16c027e2cb5 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -380,10 +380,13 @@ DeviceInfo Device::get_multi_device(const vector& subdevices, int th info.has_bindless_textures = true; info.has_volume_decoupled = true; info.has_qbvh = true; + info.has_osl = true; + foreach(const DeviceInfo &device, subdevices) { info.has_bindless_textures &= device.has_bindless_textures; info.has_volume_decoupled &= device.has_volume_decoupled; info.has_qbvh &= device.has_qbvh; + info.has_osl &= device.has_osl; if(device.type == DEVICE_CPU && subdevices.size() > 1) { if(background) { diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index f400eeb3e6b..4bf88f75932 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -57,6 +57,7 @@ public: bool has_bindless_textures; /* flag for GPU and Multi device */ bool has_volume_decoupled; bool has_qbvh; + bool has_osl; bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */ int cpu_threads; vector multi_devices; @@ -72,6 +73,7 @@ public: has_bindless_textures = false; has_volume_decoupled = false; has_qbvh = false; + has_osl = false; use_split_kernel = false; } diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 0ba00da16a6..b05f24659ee 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -1028,6 +1028,7 @@ void device_cpu_info(vector& devices) info.advanced_shading = true; info.has_qbvh = system_cpu_support_sse2(); info.has_volume_decoupled = true; + info.has_osl = true; devices.insert(devices.begin(), info); } diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index ced10c98dc9..3fea89a243c 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -348,6 +348,7 @@ void device_network_info(vector& devices) info.advanced_shading = true; info.has_volume_decoupled = false; info.has_qbvh = false; + info.has_osl = false; devices.push_back(info); } diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index cf89385a33d..00c32312d9f 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -40,10 +40,9 @@ CCL_NAMESPACE_BEGIN -Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_) -: params(params_) +Scene::Scene(const SceneParams& params_, Device *device) +: device(device), params(params_) { - device = NULL; memset(&dscene.data, 0, sizeof(dscene.data)); camera = new Camera(); @@ -54,13 +53,13 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_) mesh_manager = new MeshManager(); object_manager = new ObjectManager(); integrator = new Integrator(); - image_manager = new ImageManager(device_info_); + image_manager = new ImageManager(device->info); particle_system_manager = new ParticleSystemManager(); curve_system_manager = new CurveSystemManager(); bake_manager = new BakeManager(); /* OSL only works on the CPU */ - if(device_info_.type == DEVICE_CPU) + if(device->info.has_osl) shader_manager = ShaderManager::create(this, params.shadingsystem); else shader_manager = ShaderManager::create(this, SHADINGSYSTEM_SVM); diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index d4ec7d90ff5..23b9eb06a7b 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -201,7 +201,7 @@ public: /* mutex must be locked manually by callers */ thread_mutex mutex; - Scene(const SceneParams& params, const DeviceInfo& device_info); + Scene(const SceneParams& params, Device *device); ~Scene(); void device_update(Device *device, Progress& progress); -- cgit v1.2.3 From 7ad9333fad25b9a7cabea0d659eaf724f89912c8 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Fri, 20 Oct 2017 23:31:13 +0200 Subject: Code refactor: store device/interp/extension/type in each device_memory. --- intern/cycles/device/CMakeLists.txt | 1 + intern/cycles/device/device.cpp | 14 +---- intern/cycles/device/device.h | 14 +---- intern/cycles/device/device_cpu.cpp | 48 +++++++--------- intern/cycles/device/device_cuda.cpp | 58 ++++++++++---------- intern/cycles/device/device_denoising.cpp | 68 +++++++++++------------ intern/cycles/device/device_denoising.h | 18 +++++- intern/cycles/device/device_memory.cpp | 60 ++++++++++++++++++++ intern/cycles/device/device_memory.h | 29 +++++----- intern/cycles/device/device_multi.cpp | 16 ++---- intern/cycles/device/device_network.cpp | 76 ++++++++++---------------- intern/cycles/device/device_network.h | 21 ++++++- intern/cycles/device/device_split_kernel.cpp | 18 ++++-- intern/cycles/device/opencl/memory_manager.cpp | 14 ++++- intern/cycles/device/opencl/memory_manager.h | 6 +- intern/cycles/device/opencl/opencl.h | 25 ++------- intern/cycles/device/opencl/opencl_base.cpp | 58 ++++++++++---------- intern/cycles/device/opencl/opencl_split.cpp | 8 +-- intern/cycles/kernel/kernel.h | 6 +- intern/cycles/kernel/kernels/cpu/kernel.cpp | 8 +-- intern/cycles/render/bake.cpp | 8 +-- intern/cycles/render/buffers.cpp | 23 ++++---- intern/cycles/render/image.cpp | 20 +++---- intern/cycles/render/integrator.cpp | 2 +- intern/cycles/render/light.cpp | 16 +++--- intern/cycles/render/mesh.cpp | 46 ++++++++-------- intern/cycles/render/mesh_displace.cpp | 8 +-- intern/cycles/render/object.cpp | 8 +-- intern/cycles/render/particles.cpp | 2 +- intern/cycles/render/scene.cpp | 41 +++++++++++++- intern/cycles/render/scene.h | 2 + intern/cycles/render/shader.cpp | 2 +- intern/cycles/render/svm.cpp | 2 +- intern/cycles/render/tables.cpp | 2 +- 34 files changed, 410 insertions(+), 338 deletions(-) create mode 100644 intern/cycles/device/device_memory.cpp (limited to 'intern/cycles') diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index 3c632160fbd..959c0aa97c9 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -26,6 +26,7 @@ set(SRC device_cpu.cpp device_cuda.cpp device_denoising.cpp + device_memory.cpp device_multi.cpp device_opencl.cpp device_split_kernel.cpp diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 16c027e2cb5..9de10c184fb 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -87,7 +87,7 @@ Device::~Device() void Device::pixels_alloc(device_memory& mem) { - mem_alloc("pixels", mem, MEM_READ_WRITE); + mem_alloc(mem); } void Device::pixels_copy_from(device_memory& mem, int y, int w, int h) @@ -429,16 +429,4 @@ void Device::free_memory() devices.free_memory(); } - -device_sub_ptr::device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type) - : device(device) -{ - ptr = device->mem_alloc_sub_ptr(mem, offset, size, type); -} - -device_sub_ptr::~device_sub_ptr() -{ - device->mem_free_sub_ptr(ptr); -} - CCL_NAMESPACE_END diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 4bf88f75932..6bb65cde2a3 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -253,7 +253,7 @@ protected: /* used for real time display */ unsigned int vertex_buffer; - virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/, MemoryType /*type*/) + virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/) { /* Only required for devices that implement denoising. */ assert(false); @@ -282,7 +282,7 @@ public: Stats &stats; /* regular memory */ - virtual void mem_alloc(const char *name, device_memory& mem, MemoryType type) = 0; + virtual void mem_alloc(device_memory& mem) = 0; virtual void mem_copy_to(device_memory& mem) = 0; virtual void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) = 0; @@ -295,15 +295,7 @@ public: virtual void const_copy_to(const char *name, void *host, size_t size) = 0; /* texture memory */ - virtual void tex_alloc(const char * /*name*/, - device_memory& /*mem*/, - InterpolationType interpolation = INTERPOLATION_NONE, - ExtensionType extension = EXTENSION_REPEAT) - { - (void)interpolation; /* Ignored. */ - (void)extension; /* Ignored. */ - }; - + virtual void tex_alloc(device_memory& /*mem*/) {}; virtual void tex_free(device_memory& /*mem*/) {}; /* pixel memory */ diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index b05f24659ee..60c06462d4d 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -209,6 +209,7 @@ public: CPUDevice(DeviceInfo& info_, Stats &stats_, bool background_) : Device(info_, stats_, background_), + texture_info(this, "__texture_info"), #define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name)) REGISTER_KERNEL(path_trace), REGISTER_KERNEL(convert_to_half_float), @@ -280,15 +281,15 @@ public: { if(need_texture_info) { tex_free(texture_info); - tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT); + tex_alloc(texture_info); need_texture_info = false; } } - void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/) + void mem_alloc(device_memory& mem) { - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -332,7 +333,7 @@ public: } } - virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/) + virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/) { return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset)); } @@ -342,32 +343,25 @@ public: kernel_const_copy(&kernel_globals, name, host, size); } - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) + void tex_alloc(device_memory& mem) { - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - if(interpolation == INTERPOLATION_NONE) { + if(mem.interpolation == INTERPOLATION_NONE) { /* Data texture. */ kernel_tex_copy(&kernel_globals, - name, + mem.name, mem.data_pointer, - mem.data_width, - mem.data_height, - mem.data_depth, - interpolation, - extension); + mem.data_width); } else { /* Image Texture. */ int flat_slot = 0; - if(string_startswith(name, "__tex_image")) { - int pos = string(name).rfind("_"); - flat_slot = atoi(name + pos + 1); + if(string_startswith(mem.name, "__tex_image")) { + int pos = string(mem.name).rfind("_"); + flat_slot = atoi(mem.name + pos + 1); } else { assert(0); @@ -382,8 +376,8 @@ public: TextureInfo& info = texture_info[flat_slot]; info.data = (uint64_t)mem.data_pointer; info.cl_buffer = 0; - info.interpolation = interpolation; - info.extension = extension; + info.interpolation = mem.interpolation; + info.extension = mem.extension; info.width = mem.data_width; info.height = mem.data_height; info.depth = mem.data_depth; @@ -437,7 +431,7 @@ public: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY); + mem_alloc(task->tiles_mem); TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { @@ -728,9 +722,9 @@ public: } /* allocate buffer for kernel globals */ - device_only_memory kgbuffer; + device_only_memory kgbuffer(this, "kernel_globals"); kgbuffer.resize(1); - mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE); + mem_alloc(kgbuffer); KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init()); @@ -751,8 +745,8 @@ public: while(task.acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel) { - device_memory data; - split_kernel->path_trace(&task, tile, kgbuffer, data); + device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY); + split_kernel->path_trace(&task, tile, kgbuffer, void_buffer); } else { path_trace(task, tile, kg); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 0f17b67c8c6..1295ec86355 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -217,7 +217,8 @@ public: } CUDADevice(DeviceInfo& info, Stats &stats, bool background_) - : Device(info, stats, background_) + : Device(info, stats, background_), + texture_info(this, "__texture_info") { first_error = true; background = background_; @@ -548,17 +549,17 @@ public: { if(info.has_bindless_textures && need_texture_info) { tex_free(texture_info); - tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT); + tex_alloc(texture_info); need_texture_info = false; } } - void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/) + void mem_alloc(device_memory& mem) { CUDAContextScope scope(this); - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -619,7 +620,7 @@ public: } } - virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/) + virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/) { return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset)); } @@ -635,14 +636,11 @@ public: cuda_assert(cuMemcpyHtoD(mem, host, size)); } - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) + void tex_alloc(device_memory& mem) { CUDAContextScope scope(this); - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; @@ -650,12 +648,12 @@ public: bool has_bindless_textures = info.has_bindless_textures; /* General variables for both architectures */ - string bind_name = name; + string bind_name = mem.name; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP; - switch(extension) { + switch(mem.extension) { case EXTENSION_REPEAT: address_mode = CU_TR_ADDRESS_MODE_WRAP; break; @@ -671,7 +669,7 @@ public: } CUfilter_mode filter_mode; - if(interpolation == INTERPOLATION_CLOSEST) { + if(mem.interpolation == INTERPOLATION_CLOSEST) { filter_mode = CU_TR_FILTER_MODE_POINT; } else { @@ -681,13 +679,13 @@ public: /* General variables for Fermi */ CUtexref texref = NULL; - if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) { + if(!has_bindless_textures && mem.interpolation != INTERPOLATION_NONE) { if(mem.data_depth > 1) { /* Kernel uses different bind names for 2d and 3d float textures, * so we have to adjust couple of things here. */ vector tokens; - string_split(tokens, name, "_"); + string_split(tokens, mem.name, "_"); bind_name = string_printf("__tex_image_%s_3d_%s", tokens[2].c_str(), tokens[3].c_str()); @@ -700,9 +698,9 @@ public: } } - if(interpolation == INTERPOLATION_NONE) { + if(mem.interpolation == INTERPOLATION_NONE) { /* Data Storage */ - mem_alloc(NULL, mem, MEM_READ_ONLY); + mem_alloc(mem); mem_copy_to(mem); CUdeviceptr cumem; @@ -802,9 +800,9 @@ public: if(has_bindless_textures) { /* Bindless Textures - Kepler */ int flat_slot = 0; - if(string_startswith(name, "__tex_image")) { - int pos = string(name).rfind("_"); - flat_slot = atoi(name + pos + 1); + if(string_startswith(mem.name, "__tex_image")) { + int pos = string(mem.name).rfind("_"); + flat_slot = atoi(mem.name + pos + 1); } else { assert(0); @@ -843,8 +841,8 @@ public: TextureInfo& info = texture_info[flat_slot]; info.data = (uint64_t)tex; info.cl_buffer = 0; - info.interpolation = interpolation; - info.extension = extension; + info.interpolation = mem.interpolation; + info.extension = mem.extension; info.width = mem.data_width; info.height = mem.data_height; info.depth = mem.data_depth; @@ -869,7 +867,7 @@ public: } /* Fermi and Kepler */ - tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE); + tex_interp_map[mem.device_pointer] = (mem.interpolation != INTERPOLATION_NONE); } void tex_free(device_memory& mem) @@ -900,7 +898,7 @@ public: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY); + mem_alloc(task->tiles_mem); TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { @@ -1297,7 +1295,7 @@ public: cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); /* Allocate work tile. */ - device_vector work_tiles; + device_vector work_tiles(this, "work_tiles", MEM_READ_ONLY); work_tiles.resize(1); WorkTile *wtile = work_tiles.get_data(); @@ -1308,7 +1306,7 @@ public: wtile->offset = rtile.offset; wtile->stride = rtile.stride; wtile->buffer = (float*)cuda_device_ptr(rtile.buffer); - mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY); + mem_alloc(work_tiles); CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); @@ -1730,7 +1728,7 @@ public: while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel()) { - device_memory void_buffer; + device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY); split_kernel->path_trace(task, tile, void_buffer, void_buffer); } else { @@ -1885,9 +1883,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory { CUDAContextScope scope(device); - device_vector size_buffer; + device_vector size_buffer(device, "size_buffer", MEM_READ_WRITE); size_buffer.resize(1); - device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); + device->mem_alloc(size_buffer); uint threads = num_threads; CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 619cc1d171e..2c3bfefd8b0 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -76,21 +76,21 @@ bool DenoisingTask::run_denoising() buffer.h = rect.w - rect.y; buffer.pass_stride = align_up(buffer.w * buffer.h, divide_up(device->mem_address_alignment(), sizeof(float))); buffer.mem.resize(buffer.pass_stride * buffer.passes); - device->mem_alloc("Denoising Pixel Buffer", buffer.mem, MEM_READ_WRITE); + device->mem_alloc(buffer.mem); device_ptr null_ptr = (device_ptr) 0; /* Prefilter shadow feature. */ { - device_sub_ptr unfiltered_a (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr unfiltered_b (device, buffer.mem, 1*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr sample_var (device, buffer.mem, 2*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr sample_var_var (device, buffer.mem, 3*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr buffer_var (device, buffer.mem, 5*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr filtered_var (device, buffer.mem, 6*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_1(device, buffer.mem, 7*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_2(device, buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_3(device, buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride); + device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_1(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_2(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_3(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride); nlm_state.temporary_1_ptr = *nlm_temporary_1; nlm_state.temporary_2_ptr = *nlm_temporary_2; @@ -123,17 +123,17 @@ bool DenoisingTask::run_denoising() functions.non_local_means(filtered_b, filtered_a, residual_var, final_b); /* Combine the two double-filtered halves to a final shadow feature. */ - device_sub_ptr shadow_pass(device, buffer.mem, 4*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride); functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect); } /* Prefilter general features. */ { - device_sub_ptr unfiltered (device, buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr variance (device, buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_1(device, buffer.mem, 10*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_2(device, buffer.mem, 11*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr nlm_temporary_3(device, buffer.mem, 12*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_1(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_2(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr nlm_temporary_3(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride); nlm_state.temporary_1_ptr = *nlm_temporary_1; nlm_state.temporary_2_ptr = *nlm_temporary_2; @@ -143,7 +143,7 @@ bool DenoisingTask::run_denoising() int variance_from[] = { 3, 4, 5, 13, 9, 10, 11}; int pass_to[] = { 1, 2, 3, 0, 5, 6, 7}; for(int pass = 0; pass < 7; pass++) { - device_sub_ptr feature_pass(device, buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride); /* Get the unfiltered pass and its variance from the RenderBuffers. */ functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance); /* Smooth the pass and store the result in the denoising buffers. */ @@ -160,20 +160,20 @@ bool DenoisingTask::run_denoising() int variance_to[] = {11, 12, 13}; int num_color_passes = 3; - device_only_memory temp_color; + device_only_memory temp_color(device, "Denoising temporary color"); temp_color.resize(3*buffer.pass_stride); - device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE); + device->mem_alloc(temp_color); for(int pass = 0; pass < num_color_passes; pass++) { - device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr color_pass(temp_color, pass*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride); functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass); } { - device_sub_ptr depth_pass (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr output_pass (device, buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride); + device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride); + device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride); functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); } @@ -184,24 +184,24 @@ bool DenoisingTask::run_denoising() storage.h = filter_area.w; storage.transform.resize(storage.w*storage.h*TRANSFORM_SIZE); storage.rank.resize(storage.w*storage.h); - device->mem_alloc("Denoising Transform", storage.transform, MEM_READ_WRITE); - device->mem_alloc("Denoising Rank", storage.rank, MEM_READ_WRITE); + device->mem_alloc(storage.transform); + device->mem_alloc(storage.rank); functions.construct_transform(); - device_only_memory temporary_1; - device_only_memory temporary_2; + device_only_memory temporary_1(device, "Denoising NLM temporary 1"); + device_only_memory temporary_2(device, "Denoising NLM temporary 2"); temporary_1.resize(buffer.w*buffer.h); temporary_2.resize(buffer.w*buffer.h); - device->mem_alloc("Denoising NLM temporary 1", temporary_1, MEM_READ_WRITE); - device->mem_alloc("Denoising NLM temporary 2", temporary_2, MEM_READ_WRITE); + device->mem_alloc(temporary_1); + device->mem_alloc(temporary_2); reconstruction_state.temporary_1_ptr = temporary_1.device_pointer; reconstruction_state.temporary_2_ptr = temporary_2.device_pointer; storage.XtWX.resize(storage.w*storage.h*XTWX_SIZE); storage.XtWY.resize(storage.w*storage.h*XTWY_SIZE); - device->mem_alloc("Denoising XtWX", storage.XtWX, MEM_READ_WRITE); - device->mem_alloc("Denoising XtWY", storage.XtWY, MEM_READ_WRITE); + device->mem_alloc(storage.XtWX); + device->mem_alloc(storage.XtWY); reconstruction_state.filter_rect = make_int4(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h); int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x; @@ -213,8 +213,8 @@ bool DenoisingTask::run_denoising() reconstruction_state.source_h = rect.w-rect.y; { - device_sub_ptr color_ptr (device, buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); - device_sub_ptr color_var_ptr(device, buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); + device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride); + device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride); functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr); } diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index def7b72f67d..606f7422ac8 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -123,9 +123,21 @@ public: device_only_memory XtWY; int w; int h; + + Storage(Device *device) + : transform(device, "denoising transform"), + rank(device, "denoising rank"), + XtWX(device, "denoising XtWX"), + XtWY(device, "denoising XtWY") + {} } storage; - DenoisingTask(Device *device) : device(device) {} + DenoisingTask(Device *device) + : tiles_mem(device, "denoising tiles_mem", MEM_READ_WRITE), + storage(device), + buffer(device), + device(device) + {} void init_from_devicetask(const DeviceTask &task); @@ -137,6 +149,10 @@ public: int w; int h; device_only_memory mem; + + DenoiseBuffers(Device *device) + : mem(device, "denoising pixel buffer") + {} } buffer; protected: diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp new file mode 100644 index 00000000000..98fa638ef8e --- /dev/null +++ b/intern/cycles/device/device_memory.cpp @@ -0,0 +1,60 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/device.h" +#include "device/device_memory.h" + +CCL_NAMESPACE_BEGIN + +/* Device Memory */ + +device_memory::device_memory(Device *device, const char *name, MemoryType type) +: data_type(device_type_traits::data_type), + data_elements(device_type_traits::num_elements), + data_pointer(0), + data_size(0), + device_size(0), + data_width(0), + data_height(0), + data_depth(0), + type(type), + name(name), + interpolation(INTERPOLATION_NONE), + extension(EXTENSION_REPEAT), + device(device), + device_pointer(0) +{ +} + +device_memory::~device_memory() +{ +} + +/* Device Sub Ptr */ + +device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size) +: device(mem.device) +{ + ptr = device->mem_alloc_sub_ptr(mem, offset, size); +} + +device_sub_ptr::~device_sub_ptr() +{ + device->mem_free_sub_ptr(ptr); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index eeeca61496e..3dfecde59d8 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -30,6 +30,7 @@ #include "util/util_debug.h" #include "util/util_half.h" +#include "util/util_texture.h" #include "util/util_types.h" #include "util/util_vector.h" @@ -190,23 +191,17 @@ public: size_t data_width; size_t data_height; size_t data_depth; + MemoryType type; + const char *name; + InterpolationType interpolation; + ExtensionType extension; /* device pointer */ + Device *device; device_ptr device_pointer; - device_memory() - { - data_type = device_type_traits::data_type; - data_elements = device_type_traits::num_elements; - data_pointer = 0; - data_size = 0; - device_size = 0; - data_width = 0; - data_height = 0; - data_depth = 0; - device_pointer = 0; - } - virtual ~device_memory() { assert(!device_pointer); } + device_memory(Device *device, const char *name, MemoryType type); + virtual ~device_memory(); void resize(size_t size) { @@ -224,7 +219,8 @@ template class device_only_memory : public device_memory { public: - device_only_memory() + device_only_memory(Device *device, const char *name) + : device_memory(device, name, MEM_READ_WRITE) { data_type = device_type_traits::data_type; data_elements = max(device_type_traits::num_elements, 1); @@ -241,7 +237,8 @@ public: template class device_vector : public device_memory { public: - device_vector() + device_vector(Device *device, const char *name, MemoryType type = MEM_READ_ONLY) + : device_memory(device, name, type) { data_type = device_type_traits::data_type; data_elements = device_type_traits::num_elements; @@ -317,7 +314,7 @@ private: class device_sub_ptr { public: - device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type); + device_sub_ptr(device_memory& mem, int offset, int size); ~device_sub_ptr(); /* No copying. */ device_sub_ptr& operator = (const device_sub_ptr&); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index b17b972b06f..7f7fbc0d1d3 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -106,11 +106,11 @@ public: return true; } - void mem_alloc(const char *name, device_memory& mem, MemoryType type) + void mem_alloc(device_memory& mem) { foreach(SubDevice& sub, devices) { mem.device_pointer = 0; - sub.device->mem_alloc(name, mem, type); + sub.device->mem_alloc(mem); sub.ptr_map[unique_ptr] = mem.device_pointer; } @@ -179,19 +179,15 @@ public: sub.device->const_copy_to(name, host, size); } - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType - interpolation, - ExtensionType extension) + void tex_alloc(device_memory& mem) { - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; foreach(SubDevice& sub, devices) { mem.device_pointer = 0; - sub.device->tex_alloc(name, mem, interpolation, extension); + sub.device->tex_alloc(mem); sub.ptr_map[unique_ptr] = mem.device_pointer; } @@ -314,7 +310,7 @@ public: tiles[i].buffers->copy_from_device(); device_ptr original_ptr = mem.device_pointer; mem.device_pointer = 0; - sub_device->mem_alloc("Temporary memory for neighboring tile", mem, MEM_READ_WRITE); + sub_device->mem_alloc(mem); sub_device->mem_copy_to(mem); tiles[i].buffer = mem.device_pointer; mem.device_pointer = original_ptr; diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index 3fea89a243c..bdc88b6acae 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -87,10 +87,10 @@ public: snd.write(); } - void mem_alloc(const char *name, device_memory& mem, MemoryType type) + void mem_alloc(device_memory& mem) { - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -100,9 +100,7 @@ public: mem.device_pointer = ++mem_counter; RPCSend snd(socket, &error_func, "mem_alloc"); - snd.add(mem); - snd.add(type); snd.write(); } @@ -174,12 +172,9 @@ public: snd.write_buffer(host, size); } - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) + void tex_alloc(device_memory& mem) { - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; @@ -188,13 +183,7 @@ public: mem.device_pointer = ++mem_counter; RPCSend snd(socket, &error_func, "tex_alloc"); - - string name_string(name); - - snd.add(name_string); snd.add(mem); - snd.add(interpolation); - snd.add(extension); snd.write(); snd.write_buffer((void*)mem.data_pointer, mem.memory_size()); } @@ -470,16 +459,12 @@ protected: void process(RPCReceive& rcv, thread_scoped_lock &lock) { if(rcv.name == "mem_alloc") { - MemoryType type; - network_device_memory mem; - device_ptr client_pointer; - - rcv.read(mem); - rcv.read(type); - + string name; + network_device_memory mem(device); + rcv.read(mem, name); lock.unlock(); - client_pointer = mem.device_pointer; + device_ptr client_pointer = mem.device_pointer; /* create a memory buffer for the device buffer */ size_t data_size = mem.memory_size(); @@ -491,15 +476,15 @@ protected: mem.data_pointer = 0; /* perform the allocation on the actual device */ - device->mem_alloc(NULL, mem, type); + device->mem_alloc(mem); /* store a mapping to/from client_pointer and real device pointer */ pointer_mapping_insert(client_pointer, mem.device_pointer); } else if(rcv.name == "mem_copy_to") { - network_device_memory mem; - - rcv.read(mem); + string name; + network_device_memory mem(device); + rcv.read(mem, name); lock.unlock(); device_ptr client_pointer = mem.device_pointer; @@ -521,10 +506,11 @@ protected: device->mem_copy_to(mem); } else if(rcv.name == "mem_copy_from") { - network_device_memory mem; + string name; + network_device_memory mem(device); int y, w, h, elem; - rcv.read(mem); + rcv.read(mem, name); rcv.read(y); rcv.read(w); rcv.read(h); @@ -547,9 +533,9 @@ protected: lock.unlock(); } else if(rcv.name == "mem_zero") { - network_device_memory mem; - - rcv.read(mem); + string name; + network_device_memory mem(device); + rcv.read(mem, name); lock.unlock(); device_ptr client_pointer = mem.device_pointer; @@ -562,13 +548,13 @@ protected: device->mem_zero(mem); } else if(rcv.name == "mem_free") { - network_device_memory mem; - device_ptr client_pointer; + string name; + network_device_memory mem(device); - rcv.read(mem); + rcv.read(mem, name); lock.unlock(); - client_pointer = mem.device_pointer; + device_ptr client_pointer = mem.device_pointer; mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer); @@ -588,16 +574,11 @@ protected: device->const_copy_to(name_string.c_str(), &host_vector[0], size); } else if(rcv.name == "tex_alloc") { - network_device_memory mem; string name; - InterpolationType interpolation; - ExtensionType extension_type; + network_device_memory mem(device); device_ptr client_pointer; - rcv.read(name); - rcv.read(mem); - rcv.read(interpolation); - rcv.read(extension_type); + rcv.read(mem, name); lock.unlock(); client_pointer = mem.device_pointer; @@ -613,15 +594,16 @@ protected: rcv.read_buffer((uint8_t*)mem.data_pointer, data_size); - device->tex_alloc(name.c_str(), mem, interpolation, extension_type); + device->tex_alloc(mem); pointer_mapping_insert(client_pointer, mem.device_pointer); } else if(rcv.name == "tex_free") { - network_device_memory mem; + string name; + network_device_memory mem(device); device_ptr client_pointer; - rcv.read(mem); + rcv.read(mem, name); lock.unlock(); client_pointer = mem.device_pointer; diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h index 3d3bd99dfe7..8a53290f421 100644 --- a/intern/cycles/device/device_network.h +++ b/intern/cycles/device/device_network.h @@ -38,6 +38,7 @@ #include "util/util_foreach.h" #include "util/util_list.h" #include "util/util_map.h" +#include "util/util_param.h" #include "util/util_string.h" CCL_NAMESPACE_BEGIN @@ -68,8 +69,15 @@ typedef boost::archive::binary_iarchive i_archive; class network_device_memory : public device_memory { public: - network_device_memory() {} - ~network_device_memory() { device_pointer = 0; }; + network_device_memory(Device *device) + : device_memory(device, "", MEM_READ_ONLY) + { + } + + ~network_device_memory() + { + device_pointer = 0; + }; vector local_data; }; @@ -119,6 +127,9 @@ public: { archive & mem.data_type & mem.data_elements & mem.data_size; archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer; + archive & mem.type & string(mem.name); + archive & mem.interpolation & mem.extension; + archive & mem.device_pointer; } template void add(const T& data) @@ -258,11 +269,15 @@ public: delete archive_stream; } - void read(network_device_memory& mem) + void read(network_device_memory& mem, string& name) { *archive & mem.data_type & mem.data_elements & mem.data_size; *archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer; + *archive & mem.type & name; + *archive & mem.interpolation & mem.extension; + *archive & mem.device_pointer; + mem.name = name.c_str(); mem.data_pointer = 0; } diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index d2b3a89fa98..5283bd60bd5 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -26,7 +26,13 @@ CCL_NAMESPACE_BEGIN static const double alpha = 0.1; /* alpha for rolling average */ -DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device) +DeviceSplitKernel::DeviceSplitKernel(Device *device) +: device(device), + split_data(device, "split_data", MEM_READ_WRITE), + ray_state(device, "ray_state", MEM_READ_WRITE), + queue_index(device, "queue_index"), + use_queues_flag(device, "use_queues_flag"), + work_pool_wgs(device, "work_pool_wgs") { current_max_closure = -1; first_tile = true; @@ -170,19 +176,19 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, /* Allocate work_pool_wgs memory. */ work_pool_wgs.resize(max_work_groups); - device->mem_alloc("work_pool_wgs", work_pool_wgs, MEM_READ_WRITE); + device->mem_alloc(work_pool_wgs); queue_index.resize(NUM_QUEUES); - device->mem_alloc("queue_index", queue_index, MEM_READ_WRITE); + device->mem_alloc(queue_index); use_queues_flag.resize(1); - device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE); + device->mem_alloc(use_queues_flag); ray_state.resize(num_global_elements); - device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE); + device->mem_alloc(ray_state); split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); - device->mem_alloc("split_data", split_data, MEM_READ_WRITE); + device->mem_alloc(split_data); } #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \ diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp index b67dfef88aa..6deed4e3f0d 100644 --- a/intern/cycles/device/opencl/memory_manager.cpp +++ b/intern/cycles/device/opencl/memory_manager.cpp @@ -73,10 +73,12 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) return; } - device_memory *new_buffer = new device_memory; + device_memory *new_buffer = new device_memory(device, + "memory manager buffer", + MEM_READ_ONLY); new_buffer->resize(total_size); - device->mem_alloc(string_printf("buffer_%p", this).data(), *new_buffer, MEM_READ_ONLY); + device->mem_alloc(*new_buffer); size_t offset = 0; @@ -161,8 +163,14 @@ MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer() return smallest; } -MemoryManager::MemoryManager(OpenCLDeviceBase *device) : device(device), need_update(false) +MemoryManager::MemoryManager(OpenCLDeviceBase *device) +: device(device), need_update(false) { + foreach(DeviceBuffer& device_buffer, device_buffers) { + device_buffer.buffer = new device_memory(device, + "memory manager buffer", + MEM_READ_ONLY); + } } void MemoryManager::free() diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h index 3714405d026..7ef74a79834 100644 --- a/intern/cycles/device/opencl/memory_manager.h +++ b/intern/cycles/device/opencl/memory_manager.h @@ -60,11 +60,13 @@ private: vector allocations; size_t size; /* Size of all allocations. */ - DeviceBuffer() : buffer(new device_memory), size(0) + DeviceBuffer() + : buffer(NULL), size(0) { } - ~DeviceBuffer() { + ~DeviceBuffer() + { delete buffer; buffer = NULL; } diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index bd956e29083..1dd4ad7df7f 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -340,7 +340,7 @@ public: virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, vector &programs) = 0; - void mem_alloc(const char *name, device_memory& mem, MemoryType type); + void mem_alloc(device_memory& mem); void mem_copy_to(device_memory& mem); void mem_copy_from(device_memory& mem, int y, int w, int h, int elem); void mem_zero(device_memory& mem); @@ -349,10 +349,7 @@ public: int mem_address_alignment(); void const_copy_to(const char *name, void *host, size_t size); - void tex_alloc(const char *name, - device_memory& mem, - InterpolationType /*interpolation*/, - ExtensionType /*extension*/); + void tex_alloc(device_memory& mem); void tex_free(device_memory& mem); size_t global_size_round_up(int group_size, int global_size); @@ -440,7 +437,7 @@ protected: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task); - device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type); + device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size); void mem_free_sub_ptr(device_ptr ptr); class ArgumentWrapper { @@ -550,21 +547,7 @@ private: vector texture_info; device_memory texture_info_buffer; - struct Texture { - Texture() {} - Texture(device_memory* mem, - InterpolationType interpolation, - ExtensionType extension) - : mem(mem), - interpolation(interpolation), - extension(extension) { - } - device_memory* mem; - InterpolationType interpolation; - ExtensionType extension; - }; - - typedef map TexturesMap; + typedef map TexturesMap; TexturesMap textures; bool textures_need_update; diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 48c32a9dc5c..89ab1a43e68 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -72,7 +72,9 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where) } OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_) -: Device(info, stats, background_), memory_manager(this) +: Device(info, stats, background_), + memory_manager(this), + texture_info_buffer(this, "__texture_info", MEM_READ_ONLY) { cpPlatform = NULL; cdDevice = NULL; @@ -286,10 +288,10 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea return true; } -void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryType type) +void OpenCLDeviceBase::mem_alloc(device_memory& mem) { - if(name) { - VLOG(1) << "Buffer allocate: " << name << ", " + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } @@ -307,8 +309,8 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp if(size > max_alloc_size) { string error = "Scene too complex to fit in available memory."; - if(name != NULL) { - error += string_printf(" (allocating buffer %s failed.)", name); + if(mem.name != NULL) { + error += string_printf(" (allocating buffer %s failed.)", mem.name); } set_error(error); @@ -318,9 +320,9 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp cl_mem_flags mem_flag; void *mem_ptr = NULL; - if(type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY) mem_flag = CL_MEM_READ_ONLY; - else if(type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -461,12 +463,12 @@ int OpenCLDeviceBase::mem_address_alignment() return OpenCLInfo::mem_address_alignment(cdDevice); } -device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type) +device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size) { cl_mem_flags mem_flag; - if(type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY) mem_flag = CL_MEM_READ_ONLY; - else if(type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -497,10 +499,10 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) device_vector *data; if(i == const_mem_map.end()) { - data = new device_vector(); + data = new device_vector(this, name, MEM_READ_ONLY); data->resize(size); - mem_alloc(name, *data, MEM_READ_ONLY); + mem_alloc(*data); const_mem_map.insert(ConstMemMap::value_type(name, data)); } else { @@ -511,19 +513,16 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) mem_copy_to(*data); } -void OpenCLDeviceBase::tex_alloc(const char *name, - device_memory& mem, - InterpolationType interpolation, - ExtensionType extension) +void OpenCLDeviceBase::tex_alloc(device_memory& mem) { - VLOG(1) << "Texture allocate: " << name << ", " + VLOG(1) << "Texture allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - memory_manager.alloc(name, mem); + memory_manager.alloc(mem.name, mem); /* Set the pointer to non-null to keep code that inspects its value from thinking its unallocated. */ mem.device_pointer = 1; - textures[name] = Texture(&mem, interpolation, extension); + textures[mem.name] = &mem; textures_need_update = true; } @@ -537,7 +536,7 @@ void OpenCLDeviceBase::tex_free(device_memory& mem) } foreach(TexturesMap::value_type& value, textures) { - if(value.second.mem == &mem) { + if(value.second == &mem) { textures.erase(value.first); break; } @@ -658,22 +657,21 @@ void OpenCLDeviceBase::flush_texture_buffers() /* Fill in descriptors */ foreach(texture_slot_t& slot, texture_slots) { - Texture& tex = textures[slot.name]; - TextureInfo& info = texture_info[slot.slot]; MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name); - info.data = desc.offset; info.cl_buffer = desc.device_buffer; if(string_startswith(slot.name, "__tex_image")) { - info.width = tex.mem->data_width; - info.height = tex.mem->data_height; - info.depth = tex.mem->data_depth; + device_memory *mem = textures[slot.name]; + + info.width = mem->data_width; + info.height = mem->data_height; + info.depth = mem->data_depth; - info.interpolation = tex.interpolation; - info.extension = tex.extension; + info.interpolation = mem->interpolation; + info.extension = mem->extension; } } @@ -1045,7 +1043,7 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr, bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_WRITE); + mem_alloc(task->tiles_mem); mem_copy_to(task->tiles_mem); cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 920106f92d4..3edb2442070 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -127,9 +127,9 @@ public: } KernelGlobals; /* Allocate buffer for kernel globals */ - device_memory kgbuffer; + device_memory kgbuffer(this, "kernel_globals", MEM_READ_WRITE); kgbuffer.resize(sizeof(KernelGlobals)); - mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE); + mem_alloc(kgbuffer); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { @@ -288,9 +288,9 @@ public: virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) { - device_vector size_buffer; + device_vector size_buffer(device, "size_buffer", MEM_READ_WRITE); size_buffer.resize(1); - device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); + device->mem_alloc(size_buffer); uint threads = num_threads; device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer); diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 84a988f1dbc..abd67879690 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -41,11 +41,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, - size_t width, - size_t height, - size_t depth, - InterpolationType interpolation=INTERPOLATION_LINEAR, - ExtensionType extension = EXTENSION_REPEAT); + size_t size); #define KERNEL_ARCH cpu #include "kernel/kernels/cpu/kernel_cpu.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index 7679ab4f111..0ea5b1999aa 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -75,11 +75,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, - size_t width, - size_t height, - size_t depth, - InterpolationType interpolation, - ExtensionType extension) + size_t size) { if(0) { } @@ -87,7 +83,7 @@ void kernel_tex_copy(KernelGlobals *kg, #define KERNEL_TEX(type, tname) \ else if(strcmp(name, #tname) == 0) { \ kg->tname.data = (type*)mem; \ - kg->tname.width = width; \ + kg->tname.width = size; \ } #define KERNEL_IMAGE_TEX(type, tname) #include "kernel/kernel_textures.h" diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index 2bedf3668f7..66615bf336c 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -150,7 +150,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); /* setup input for device task */ - device_vector d_input; + device_vector d_input(device, "bake_input", MEM_READ_ONLY); uint4 *d_input_data = d_input.resize(shader_size * 2); size_t d_input_size = 0; @@ -165,15 +165,15 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre } /* run device task */ - device_vector d_output; + device_vector d_output(device, "bake_output", MEM_READ_WRITE); d_output.resize(shader_size); /* needs to be up to data for attribute access */ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc("bake_input", d_input, MEM_READ_ONLY); + device->mem_alloc(d_input); device->mem_copy_to(d_input); - device->mem_alloc("bake_output", d_output, MEM_READ_WRITE); + device->mem_alloc(d_output); device->mem_zero(d_output); DeviceTask task(DeviceTask::SHADER); diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index b7477ffadd0..2342dd52d86 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -114,9 +114,10 @@ RenderTile::RenderTile() /* Render Buffers */ -RenderBuffers::RenderBuffers(Device *device_) +RenderBuffers::RenderBuffers(Device *device) +: buffer(device, "RenderBuffers", MEM_READ_WRITE), + device(device) { - device = device_; } RenderBuffers::~RenderBuffers() @@ -138,10 +139,10 @@ void RenderBuffers::reset(Device *device, BufferParams& params_) /* free existing buffers */ device_free(); - + /* allocate buffer */ buffer.resize(params.width*params.height*params.get_passes_size()); - device->mem_alloc("render_buffer", buffer, MEM_READ_WRITE); + device->mem_alloc(buffer); device->mem_zero(buffer); } @@ -396,13 +397,15 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int /* Display Buffer */ -DisplayBuffer::DisplayBuffer(Device *device_, bool linear) +DisplayBuffer::DisplayBuffer(Device *device, bool linear) +: draw_width(0), + draw_height(0), + transparent(true), /* todo: determine from background */ + half_float(linear), + rgba_byte(device, "display buffer byte", MEM_WRITE_ONLY), + rgba_half(device, "display buffer half", MEM_WRITE_ONLY), + device(device) { - device = device_; - draw_width = 0; - draw_height = 0; - transparent = true; /* todo: determine from background */ - half_float = linear; } DisplayBuffer::~DisplayBuffer() diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index c9fbd237010..e7f5ff002b7 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -729,7 +729,7 @@ void ImageManager::device_load_image(Device *device, /* Create new texture. */ if(type == IMAGE_DATA_TYPE_FLOAT4) { - device_vector *tex_img = new device_vector(); + device_vector *tex_img = new device_vector(device, name.c_str()); if(!file_load_image(img, type, @@ -748,7 +748,7 @@ void ImageManager::device_load_image(Device *device, img->mem = tex_img; } else if(type == IMAGE_DATA_TYPE_FLOAT) { - device_vector *tex_img = new device_vector(); + device_vector *tex_img = new device_vector(device, name.c_str()); if(!file_load_image(img, type, @@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device, img->mem = tex_img; } else if(type == IMAGE_DATA_TYPE_BYTE4) { - device_vector *tex_img = new device_vector(); + device_vector *tex_img = new device_vector(device, name.c_str()); if(!file_load_image(img, type, @@ -783,7 +783,7 @@ void ImageManager::device_load_image(Device *device, img->mem = tex_img; } else if(type == IMAGE_DATA_TYPE_BYTE) { - device_vector *tex_img = new device_vector(); + device_vector *tex_img = new device_vector(device, name.c_str()); if(!file_load_image(img, type, @@ -798,7 +798,7 @@ void ImageManager::device_load_image(Device *device, img->mem = tex_img; } else if(type == IMAGE_DATA_TYPE_HALF4) { - device_vector *tex_img = new device_vector(); + device_vector *tex_img = new device_vector(device, name.c_str()); if(!file_load_image(img, type, @@ -816,7 +816,7 @@ void ImageManager::device_load_image(Device *device, img->mem = tex_img; } else if(type == IMAGE_DATA_TYPE_HALF) { - device_vector *tex_img = new device_vector(); + device_vector *tex_img = new device_vector(device, name.c_str()); if(!file_load_image(img, type, @@ -833,11 +833,11 @@ void ImageManager::device_load_image(Device *device, /* Copy to device. */ if(img->mem) { + img->mem->interpolation = img->interpolation; + img->mem->extension = img->extension; + thread_scoped_lock device_lock(device_mutex); - device->tex_alloc(name.c_str(), - *img->mem, - img->interpolation, - img->extension); + device->tex_alloc(*img->mem); } diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index b268478e6d3..b128f18db08 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -195,7 +195,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions); - device->tex_alloc("__sobol_directions", dscene->sobol_directions); + device->tex_alloc(dscene->sobol_directions); /* Clamping. */ bool use_sample_clamp = (sample_clamp_direct != 0.0f || diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index bb73ebd7e41..9664e1310d5 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -36,8 +36,8 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res int width = res; int height = res; - device_vector d_input; - device_vector d_output; + device_vector d_input(device, "background_input", MEM_READ_ONLY); + device_vector d_output(device, "background_output", MEM_WRITE_ONLY); uint4 *d_input_data = d_input.resize(width*height); @@ -57,9 +57,9 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc("shade_background_pixels_input", d_input, MEM_READ_ONLY); + device->mem_alloc(d_input); device->mem_copy_to(d_input); - device->mem_alloc("shade_background_pixels_output", d_output, MEM_WRITE_ONLY); + device->mem_alloc(d_output); device->mem_zero(d_output); DeviceTask main_task(DeviceTask::SHADER); @@ -451,7 +451,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen kfilm->pass_shadow_scale *= (float)(num_lights - num_background_lights)/(float)num_lights; /* CDF */ - device->tex_alloc("__light_distribution", dscene->light_distribution); + device->tex_alloc(dscene->light_distribution); /* Portals */ if(num_portals > 0) { @@ -611,8 +611,8 @@ void LightManager::device_update_background(Device *device, VLOG(2) << "Background MIS build time " << time_dt() - time_start << "\n"; /* update device */ - device->tex_alloc("__light_background_marginal_cdf", dscene->light_background_marginal_cdf); - device->tex_alloc("__light_background_conditional_cdf", dscene->light_background_conditional_cdf); + device->tex_alloc(dscene->light_background_marginal_cdf); + device->tex_alloc(dscene->light_background_conditional_cdf); } void LightManager::device_update_points(Device *device, @@ -813,7 +813,7 @@ void LightManager::device_update_points(Device *device, VLOG(1) << "Number of lights without contribution: " << num_scene_lights - light_index; - device->tex_alloc("__light_data", dscene->light_data); + device->tex_alloc(dscene->light_data); } void LightManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 69c21fc3cb3..685272b80c1 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -1359,7 +1359,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce /* copy to device */ dscene->data.bvh.attributes_map_stride = attr_map_stride; - device->tex_alloc("__attributes_map", dscene->attributes_map); + device->tex_alloc(dscene->attributes_map); } static void update_attribute_element_size(Mesh *mesh, @@ -1617,13 +1617,13 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene, progress.set_status("Updating Mesh", "Copying Attributes to device"); if(dscene->attributes_float.size()) { - device->tex_alloc("__attributes_float", dscene->attributes_float); + device->tex_alloc(dscene->attributes_float); } if(dscene->attributes_float3.size()) { - device->tex_alloc("__attributes_float3", dscene->attributes_float3); + device->tex_alloc(dscene->attributes_float3); } if(dscene->attributes_uchar4.size()) { - device->tex_alloc("__attributes_uchar4", dscene->attributes_uchar4); + device->tex_alloc(dscene->attributes_uchar4); } } @@ -1754,11 +1754,11 @@ void MeshManager::device_update_mesh(Device *device, /* vertex coordinates */ progress.set_status("Updating Mesh", "Copying Mesh to device"); - device->tex_alloc("__tri_shader", dscene->tri_shader); - device->tex_alloc("__tri_vnormal", dscene->tri_vnormal); - device->tex_alloc("__tri_vindex", dscene->tri_vindex); - device->tex_alloc("__tri_patch", dscene->tri_patch); - device->tex_alloc("__tri_patch_uv", dscene->tri_patch_uv); + device->tex_alloc(dscene->tri_shader); + device->tex_alloc(dscene->tri_vnormal); + device->tex_alloc(dscene->tri_vindex); + device->tex_alloc(dscene->tri_patch); + device->tex_alloc(dscene->tri_patch_uv); } if(curve_size != 0) { @@ -1772,8 +1772,8 @@ void MeshManager::device_update_mesh(Device *device, if(progress.get_cancel()) return; } - device->tex_alloc("__curve_keys", dscene->curve_keys); - device->tex_alloc("__curves", dscene->curves); + device->tex_alloc(dscene->curve_keys); + device->tex_alloc(dscene->curves); } if(patch_size != 0) { @@ -1791,7 +1791,7 @@ void MeshManager::device_update_mesh(Device *device, if(progress.get_cancel()) return; } - device->tex_alloc("__patches", dscene->patches); + device->tex_alloc(dscene->patches); } if(for_displacement) { @@ -1805,7 +1805,7 @@ void MeshManager::device_update_mesh(Device *device, prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]); } } - device->tex_alloc("__prim_tri_verts", dscene->prim_tri_verts); + device->tex_alloc(dscene->prim_tri_verts); } } @@ -1841,43 +1841,43 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene * if(pack.nodes.size()) { dscene->bvh_nodes.steal_data(pack.nodes); - device->tex_alloc("__bvh_nodes", dscene->bvh_nodes); + device->tex_alloc(dscene->bvh_nodes); } if(pack.leaf_nodes.size()) { dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes); - device->tex_alloc("__bvh_leaf_nodes", dscene->bvh_leaf_nodes); + device->tex_alloc(dscene->bvh_leaf_nodes); } if(pack.object_node.size()) { dscene->object_node.steal_data(pack.object_node); - device->tex_alloc("__object_node", dscene->object_node); + device->tex_alloc(dscene->object_node); } if(pack.prim_tri_index.size()) { dscene->prim_tri_index.steal_data(pack.prim_tri_index); - device->tex_alloc("__prim_tri_index", dscene->prim_tri_index); + device->tex_alloc(dscene->prim_tri_index); } if(pack.prim_tri_verts.size()) { dscene->prim_tri_verts.steal_data(pack.prim_tri_verts); - device->tex_alloc("__prim_tri_verts", dscene->prim_tri_verts); + device->tex_alloc(dscene->prim_tri_verts); } if(pack.prim_type.size()) { dscene->prim_type.steal_data(pack.prim_type); - device->tex_alloc("__prim_type", dscene->prim_type); + device->tex_alloc(dscene->prim_type); } if(pack.prim_visibility.size()) { dscene->prim_visibility.steal_data(pack.prim_visibility); - device->tex_alloc("__prim_visibility", dscene->prim_visibility); + device->tex_alloc(dscene->prim_visibility); } if(pack.prim_index.size()) { dscene->prim_index.steal_data(pack.prim_index); - device->tex_alloc("__prim_index", dscene->prim_index); + device->tex_alloc(dscene->prim_index); } if(pack.prim_object.size()) { dscene->prim_object.steal_data(pack.prim_object); - device->tex_alloc("__prim_object", dscene->prim_object); + device->tex_alloc(dscene->prim_object); } if(pack.prim_time.size()) { dscene->prim_time.steal_data(pack.prim_time); - device->tex_alloc("__prim_time", dscene->prim_time); + device->tex_alloc(dscene->prim_time); } dscene->data.bvh.root = pack.root_index; diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index 350a56bf185..c06cf86ea9c 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -64,7 +64,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me /* setup input for device task */ const size_t num_verts = mesh->verts.size(); vector done(num_verts, false); - device_vector d_input; + device_vector d_input(device, "displace_input", MEM_READ_ONLY); uint4 *d_input_data = d_input.resize(num_verts); size_t d_input_size = 0; @@ -115,15 +115,15 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me return false; /* run device task */ - device_vector d_output; + device_vector d_output(device, "displace_output", MEM_WRITE_ONLY); d_output.resize(d_input_size); /* needs to be up to data for attribute access */ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc("displace_input", d_input, MEM_READ_ONLY); + device->mem_alloc(d_input); device->mem_copy_to(d_input); - device->mem_alloc("displace_output", d_output, MEM_WRITE_ONLY); + device->mem_alloc(d_output); device->mem_zero(d_output); DeviceTask task(DeviceTask::SHADER); diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 12690090066..daa872239ce 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -534,9 +534,9 @@ void ObjectManager::device_update_transforms(Device *device, } } - device->tex_alloc("__objects", dscene->objects); + device->tex_alloc(dscene->objects); if(state.need_motion == Scene::MOTION_PASS) { - device->tex_alloc("__objects_vector", dscene->objects_vector); + device->tex_alloc(dscene->objects_vector); } dscene->data.bvh.have_motion = state.have_motion; @@ -638,7 +638,7 @@ void ObjectManager::device_update_flags(Device *device, } /* allocate object flag */ - device->tex_alloc("__object_flag", dscene->object_flag); + device->tex_alloc(dscene->object_flag); } void ObjectManager::device_update_patch_map_offsets(Device *device, DeviceScene *dscene, Scene *scene) @@ -672,7 +672,7 @@ void ObjectManager::device_update_patch_map_offsets(Device *device, DeviceScene if(update) { device->tex_free(dscene->objects); - device->tex_alloc("__objects", dscene->objects); + device->tex_alloc(dscene->objects); } } diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp index a51822a08be..a84ca51f274 100644 --- a/intern/cycles/render/particles.cpp +++ b/intern/cycles/render/particles.cpp @@ -91,7 +91,7 @@ void ParticleSystemManager::device_update_particles(Device *device, DeviceScene } } - device->tex_alloc("__particles", dscene->particles); + device->tex_alloc(dscene->particles); } void ParticleSystemManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 00c32312d9f..e362a35471d 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -40,8 +40,47 @@ CCL_NAMESPACE_BEGIN +DeviceScene::DeviceScene(Device *device) +: bvh_nodes(device, "__bvh_nodes"), + bvh_leaf_nodes(device, "__bvh_leaf_nodes"), + object_node(device, "__object_node"), + prim_tri_index(device, "__prim_tri_index"), + prim_tri_verts(device, "__prim_tri_verts"), + prim_type(device, "__prim_type"), + prim_visibility(device, "__prim_visibility"), + prim_index(device, "__prim_index"), + prim_object(device, "__prim_object"), + prim_time(device, "__prim_time"), + tri_shader(device, "__tri_shader"), + tri_vnormal(device, "__tri_vnormal"), + tri_vindex(device, "__tri_vindex"), + tri_patch(device, "__tri_patch"), + tri_patch_uv(device, "__tri_patch_uv"), + curves(device, "__curves"), + curve_keys(device, "__curve_keys"), + patches(device, "__patches"), + objects(device, "__objects"), + objects_vector(device, "__objects_vector"), + attributes_map(device, "__attributes_map"), + attributes_float(device, "__attributes_float"), + attributes_float3(device, "__attributes_float3"), + attributes_uchar4(device, "__attributes_uchar4"), + light_distribution(device, "__light_distribution"), + light_data(device, "__light_data"), + light_background_marginal_cdf(device, "__light_background_marginal_cdf"), + light_background_conditional_cdf(device, "__light_background_conditional_cdf"), + particles(device, "__particles"), + svm_nodes(device, "__svm_nodes"), + shader_flag(device, "__shader_flag"), + object_flag(device, "__object_flag"), + lookup_table(device, "__lookup_table"), + sobol_directions(device, "__sobol_directions") +{ + memset(&data, 0, sizeof(data)); +} + Scene::Scene(const SceneParams& params_, Device *device) -: device(device), params(params_) +: device(device), dscene(device), params(params_) { memset(&dscene.data, 0, sizeof(dscene.data)); diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 23b9eb06a7b..204c38e5963 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -114,6 +114,8 @@ public: device_vector sobol_directions; KernelData data; + + DeviceScene(Device *device); }; /* Scene Parameters */ diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 3992ada2e85..a77df55e520 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -479,7 +479,7 @@ void ShaderManager::device_update_common(Device *device, has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; } - device->tex_alloc("__shader_flag", dscene->shader_flag); + device->tex_alloc(dscene->shader_flag); /* lookup tables */ KernelTables *ktables = &dscene->data.tables; diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index 278a8a87b20..cf0dc97ef3f 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -130,7 +130,7 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene } dscene->svm_nodes.steal_data(svm_nodes); - device->tex_alloc("__svm_nodes", dscene->svm_nodes); + device->tex_alloc(dscene->svm_nodes); for(i = 0; i < scene->shaders.size(); i++) { Shader *shader = scene->shaders[i]; diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp index c08c83cfe11..9d04778abc6 100644 --- a/intern/cycles/render/tables.cpp +++ b/intern/cycles/render/tables.cpp @@ -45,7 +45,7 @@ void LookupTables::device_update(Device *device, DeviceScene *dscene) device->tex_free(dscene->lookup_table); if(lookup_tables.size() > 0) - device->tex_alloc("__lookup_table", dscene->lookup_table); + device->tex_alloc(dscene->lookup_table); need_update = false; } -- cgit v1.2.3 From aa8b4c5d8124c0379eeee9eacd1a0887a573d7d7 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Mon, 23 Oct 2017 19:32:59 +0200 Subject: Code refactor: use device_only_memory and device_vector in more places. --- intern/cycles/device/device_cpu.cpp | 2 +- intern/cycles/device/device_cuda.cpp | 2 +- intern/cycles/device/device_split_kernel.cpp | 2 +- intern/cycles/device/device_split_kernel.h | 2 +- intern/cycles/device/opencl/memory_manager.cpp | 10 ++++------ intern/cycles/device/opencl/memory_manager.h | 2 +- intern/cycles/device/opencl/opencl.h | 9 ++++++--- intern/cycles/device/opencl/opencl_base.cpp | 18 ++++++------------ intern/cycles/device/opencl/opencl_split.cpp | 4 ++-- 9 files changed, 23 insertions(+), 28 deletions(-) (limited to 'intern/cycles') diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 60c06462d4d..b4398f21014 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -745,7 +745,7 @@ public: while(task.acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel) { - device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY); + device_only_memory void_buffer(this, "void_buffer"); split_kernel->path_trace(&task, tile, kgbuffer, void_buffer); } else { diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 1295ec86355..be606a92434 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1728,7 +1728,7 @@ public: while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel()) { - device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY); + device_only_memory void_buffer(this, "void_buffer"); split_kernel->path_trace(task, tile, void_buffer, void_buffer); } else { diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index 5283bd60bd5..6c8befa89be 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -28,7 +28,7 @@ static const double alpha = 0.1; /* alpha for rolling average */ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device), - split_data(device, "split_data", MEM_READ_WRITE), + split_data(device, "split_data"), ray_state(device, "ray_state", MEM_READ_WRITE), queue_index(device, "queue_index"), use_queues_flag(device, "use_queues_flag"), diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index 9c42cb58520..0647c664447 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -79,7 +79,7 @@ private: * kernel will be available to another kernel via this global * memory. */ - device_memory split_data; + device_only_memory split_data; device_vector ray_state; device_only_memory queue_index; /* Array of size num_queues that tracks the size of each queue. */ diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp index 6deed4e3f0d..e48367b8987 100644 --- a/intern/cycles/device/opencl/memory_manager.cpp +++ b/intern/cycles/device/opencl/memory_manager.cpp @@ -73,9 +73,8 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) return; } - device_memory *new_buffer = new device_memory(device, - "memory manager buffer", - MEM_READ_ONLY); + device_only_memory *new_buffer = + new device_only_memory(device, "memory manager buffer"); new_buffer->resize(total_size); device->mem_alloc(*new_buffer); @@ -167,9 +166,8 @@ MemoryManager::MemoryManager(OpenCLDeviceBase *device) : device(device), need_update(false) { foreach(DeviceBuffer& device_buffer, device_buffers) { - device_buffer.buffer = new device_memory(device, - "memory manager buffer", - MEM_READ_ONLY); + device_buffer.buffer = + new device_only_memory(device, "memory manager buffer"); } } diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h index 7ef74a79834..b3d861275f0 100644 --- a/intern/cycles/device/opencl/memory_manager.h +++ b/intern/cycles/device/opencl/memory_manager.h @@ -56,7 +56,7 @@ private: }; struct DeviceBuffer { - device_memory *buffer; + device_only_memory *buffer; vector allocations; size_t size; /* Size of all allocations. */ diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 1dd4ad7df7f..55848c8112d 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -457,6 +457,11 @@ protected: { } + template + ArgumentWrapper(device_only_memory& argument) : size(sizeof(void*)), + pointer((void*)(&argument.device_pointer)) + { + } template ArgumentWrapper(T& argument) : size(sizeof(argument)), pointer(&argument) @@ -543,9 +548,7 @@ private: friend class MemoryManager; static_assert_align(TextureInfo, 16); - - vector texture_info; - device_memory texture_info_buffer; + device_vector texture_info; typedef map TexturesMap; TexturesMap textures; diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 89ab1a43e68..90f461b4c98 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -138,11 +138,9 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou return; } - /* Allocate this right away so that texture_info_buffer is placed at offset 0 in the device memory buffers */ + /* Allocate this right away so that texture_info is placed at offset 0 in the device memory buffers */ texture_info.resize(1); - texture_info_buffer.resize(1); - texture_info_buffer.data_pointer = (device_ptr)&texture_info[0]; - memory_manager.alloc("texture_info", texture_info_buffer); + memory_manager.alloc("texture_info", texture_info); fprintf(stderr, "Device init success\n"); device_initialized = true; @@ -647,13 +645,9 @@ void OpenCLDeviceBase::flush_texture_buffers() } /* Realloc texture descriptors buffer. */ - memory_manager.free(texture_info_buffer); - + memory_manager.free(texture_info); texture_info.resize(num_slots); - texture_info_buffer.resize(num_slots * sizeof(TextureInfo)); - texture_info_buffer.data_pointer = (device_ptr)&texture_info[0]; - - memory_manager.alloc("texture_info", texture_info_buffer); + memory_manager.alloc("texture_info", texture_info); /* Fill in descriptors */ foreach(texture_slot_t& slot, texture_slots) { @@ -676,8 +670,8 @@ void OpenCLDeviceBase::flush_texture_buffers() } /* Force write of descriptors. */ - memory_manager.free(texture_info_buffer); - memory_manager.alloc("texture_info", texture_info_buffer); + memory_manager.free(texture_info); + memory_manager.alloc("texture_info", texture_info); } void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 3edb2442070..c966ebe0c5e 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -127,8 +127,8 @@ public: } KernelGlobals; /* Allocate buffer for kernel globals */ - device_memory kgbuffer(this, "kernel_globals", MEM_READ_WRITE); - kgbuffer.resize(sizeof(KernelGlobals)); + device_only_memory kgbuffer(this, "kernel_globals"); + kgbuffer.resize(1); mem_alloc(kgbuffer); /* Keep rendering tiles until done. */ -- cgit v1.2.3 From 070a668d04844610059aaedc80c49e9038fd1779 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sat, 21 Oct 2017 01:09:59 +0200 Subject: Code refactor: move more memory allocation logic into device API. * Remove tex_* and pixels_* functions, replace by mem_*. * Add MEM_TEXTURE and MEM_PIXELS as memory types recognized by devices. * No longer create device_memory and call mem_* directly, always go through device_only_memory, device_vector and device_pixels. --- intern/cycles/device/device.cpp | 22 +-- intern/cycles/device/device.h | 32 ++- intern/cycles/device/device_cpu.cpp | 76 ++++--- intern/cycles/device/device_cuda.cpp | 263 ++++++++++++++----------- intern/cycles/device/device_denoising.cpp | 44 ++--- intern/cycles/device/device_memory.cpp | 62 ++++++ intern/cycles/device/device_memory.h | 240 ++++++++++++++++------ intern/cycles/device/device_multi.cpp | 161 ++++++--------- intern/cycles/device/device_network.cpp | 139 +++++-------- intern/cycles/device/device_network.h | 5 + intern/cycles/device/device_split_kernel.cpp | 37 ++-- intern/cycles/device/opencl/memory_manager.cpp | 8 +- intern/cycles/device/opencl/opencl_base.cpp | 75 ++++--- intern/cycles/device/opencl/opencl_split.cpp | 13 +- intern/cycles/render/bake.cpp | 22 +-- intern/cycles/render/buffers.cpp | 91 +++------ intern/cycles/render/buffers.h | 26 +-- intern/cycles/render/image.cpp | 82 +++++--- intern/cycles/render/integrator.cpp | 9 +- intern/cycles/render/light.cpp | 58 +++--- intern/cycles/render/mesh.cpp | 143 ++++++-------- intern/cycles/render/mesh_displace.cpp | 22 +-- intern/cycles/render/object.cpp | 32 ++- intern/cycles/render/particles.cpp | 11 +- intern/cycles/render/scene.cpp | 68 +++---- intern/cycles/render/session.cpp | 12 +- intern/cycles/render/shader.cpp | 12 +- intern/cycles/render/svm.cpp | 5 +- intern/cycles/render/tables.cpp | 11 +- intern/cycles/util/util_vector.h | 8 + 30 files changed, 934 insertions(+), 855 deletions(-) (limited to 'intern/cycles') diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 9de10c184fb..41fbe7ce81b 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -85,28 +85,12 @@ Device::~Device() } } -void Device::pixels_alloc(device_memory& mem) -{ - mem_alloc(mem); -} - -void Device::pixels_copy_from(device_memory& mem, int y, int w, int h) -{ - if(mem.data_type == TYPE_HALF) - mem_copy_from(mem, y, w, h, sizeof(half4)); - else - mem_copy_from(mem, y, w, h, sizeof(uchar4)); -} - -void Device::pixels_free(device_memory& mem) -{ - mem_free(mem); -} - void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int dy, int width, int height, bool transparent, const DeviceDrawParams &draw_params) { - pixels_copy_from(rgba, y, w, h); + assert(mem.type == MEM_PIXELS); + + mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1)); if(transparent) { glEnable(GL_BLEND); diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 6bb65cde2a3..316bf70a5c3 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -281,28 +281,12 @@ public: /* statistics */ Stats &stats; - /* regular memory */ - virtual void mem_alloc(device_memory& mem) = 0; - virtual void mem_copy_to(device_memory& mem) = 0; - virtual void mem_copy_from(device_memory& mem, - int y, int w, int h, int elem) = 0; - virtual void mem_zero(device_memory& mem) = 0; - virtual void mem_free(device_memory& mem) = 0; - + /* memory alignment */ virtual int mem_address_alignment() { return 16; } /* constant memory */ virtual void const_copy_to(const char *name, void *host, size_t size) = 0; - /* texture memory */ - virtual void tex_alloc(device_memory& /*mem*/) {}; - virtual void tex_free(device_memory& /*mem*/) {}; - - /* pixel memory */ - virtual void pixels_alloc(device_memory& mem); - virtual void pixels_copy_from(device_memory& mem, int y, int w, int h); - virtual void pixels_free(device_memory& mem); - /* open shading language, only for CPU device */ virtual void *osl_memory() { return NULL; } @@ -349,6 +333,20 @@ public: static void tag_update(); static void free_memory(); + +protected: + /* Memory allocation, only accessed through device_memory. */ + friend class MultiDevice; + friend class DeviceServer; + friend class device_memory; + + virtual void mem_alloc(device_memory& mem) = 0; + virtual void mem_copy_to(device_memory& mem) = 0; + virtual void mem_copy_from(device_memory& mem, + int y, int w, int h, int elem) = 0; + virtual void mem_zero(device_memory& mem) = 0; + virtual void mem_free(device_memory& mem) = 0; + private: /* Indicted whether device types and devices lists were initialized. */ static bool need_types_update, need_devices_update; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index b4398f21014..32ab18fe164 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -209,7 +209,7 @@ public: CPUDevice(DeviceInfo& info_, Stats &stats_, bool background_) : Device(info_, stats_, background_), - texture_info(this, "__texture_info"), + texture_info(this, "__texture_info", MEM_TEXTURE), #define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name)) REGISTER_KERNEL(path_trace), REGISTER_KERNEL(convert_to_half_float), @@ -269,7 +269,7 @@ public: ~CPUDevice() { task_pool.stop(); - tex_free(texture_info); + texture_info.free(); } virtual bool show_samples() const @@ -280,33 +280,50 @@ public: void load_texture_info() { if(need_texture_info) { - tex_free(texture_info); - tex_alloc(texture_info); + texture_info.copy_to_device(); need_texture_info = false; } } void mem_alloc(device_memory& mem) { - if(mem.name) { - VLOG(1) << "Buffer allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; + if(mem.type == MEM_TEXTURE) { + assert(!"mem_alloc not supported for textures."); } + else { + if(mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + } - mem.device_pointer = mem.data_pointer; + mem.device_pointer = mem.data_pointer; - if(!mem.device_pointer) { - mem.device_pointer = (device_ptr)malloc(mem.memory_size()); - } + if(!mem.device_pointer) { + mem.device_pointer = (device_ptr)malloc(mem.memory_size()); + } - mem.device_size = mem.memory_size(); - stats.mem_alloc(mem.device_size); + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); + } } - void mem_copy_to(device_memory& /*mem*/) + void mem_copy_to(device_memory& mem) { - /* no-op */ + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + tex_alloc(mem); + } + else if(mem.type == MEM_PIXELS) { + assert(!"mem_copy_to not supported for pixels."); + } + else { + if(!mem.device_pointer) { + mem_alloc(mem); + } + + /* copy is no-op */ + } } void mem_copy_from(device_memory& /*mem*/, @@ -318,12 +335,21 @@ public: void mem_zero(device_memory& mem) { - memset((void*)mem.device_pointer, 0, mem.memory_size()); + if(!mem.device_pointer) { + mem_alloc(mem); + } + + if(mem.device_pointer) { + memset((void*)mem.device_pointer, 0, mem.memory_size()); + } } void mem_free(device_memory& mem) { - if(mem.device_pointer) { + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + } + else if(mem.device_pointer) { if(!mem.data_pointer) { free((void*)mem.device_pointer); } @@ -354,7 +380,7 @@ public: kernel_tex_copy(&kernel_globals, mem.name, mem.data_pointer, - mem.data_width); + mem.data_size); } else { /* Image Texture. */ @@ -431,13 +457,13 @@ public: bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc(task->tiles_mem); - TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { tiles->buffers[i] = buffers[i]; } + task->tiles_mem.copy_to_device(); + return true; } @@ -723,8 +749,7 @@ public: /* allocate buffer for kernel globals */ device_only_memory kgbuffer(this, "kernel_globals"); - kgbuffer.resize(1); - mem_alloc(kgbuffer); + kgbuffer.alloc_to_device(1); KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init()); @@ -734,8 +759,7 @@ public: requested_features.max_closure = MAX_CLOSURE; if(!split_kernel->load_kernels(requested_features)) { thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer); - mem_free(kgbuffer); - + kgbuffer.free(); delete split_kernel; return; } @@ -766,7 +790,7 @@ public: thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer); kg->~KernelGlobals(); - mem_free(kgbuffer); + kgbuffer.free(); delete split_kernel; } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index be606a92434..aa6386e455b 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -218,7 +218,7 @@ public: CUDADevice(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_), - texture_info(this, "__texture_info") + texture_info(this, "__texture_info", MEM_TEXTURE) { first_error = true; background = background_; @@ -275,7 +275,7 @@ public: delete split_kernel; if(info.has_bindless_textures) { - tex_free(texture_info); + texture_info.free(); } cuda_assert(cuCtxDestroy(cuContext)); @@ -548,20 +548,19 @@ public: void load_texture_info() { if(info.has_bindless_textures && need_texture_info) { - tex_free(texture_info); - tex_alloc(texture_info); + texture_info.copy_to_device(); need_texture_info = false; } } - void mem_alloc(device_memory& mem) + void generic_alloc(device_memory& mem) { CUDAContextScope scope(this); if(mem.name) { VLOG(1) << "Buffer allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; } CUdeviceptr device_pointer; @@ -572,31 +571,88 @@ public: stats.mem_alloc(size); } + void generic_copy_to(device_memory& mem) + { + if(mem.device_pointer) { + CUDAContextScope scope(this); + cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); + } + } + + void generic_free(device_memory& mem) + { + if(mem.device_pointer) { + CUDAContextScope scope(this); + + cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); + + mem.device_pointer = 0; + + stats.mem_free(mem.device_size); + mem.device_size = 0; + } + } + + void mem_alloc(device_memory& mem) + { + if(mem.type == MEM_PIXELS && !background) { + pixels_alloc(mem); + } + else if(mem.type == MEM_TEXTURE) { + assert(!"mem_alloc not supported for textures."); + } + else { + generic_alloc(mem); + } + } + void mem_copy_to(device_memory& mem) { - CUDAContextScope scope(this); + if(mem.type == MEM_PIXELS) { + assert(!"mem_copy_to not supported for pixels."); + } + else if(mem.type == MEM_TEXTURE) { + tex_free(mem); + tex_alloc(mem); + } + else { + if(!mem.device_pointer) { + generic_alloc(mem); + } - if(mem.device_pointer) - cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); + generic_copy_to(mem); + } } void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) { - CUDAContextScope scope(this); - size_t offset = elem*y*w; - size_t size = elem*w*h; - - if(mem.device_pointer) { - cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, - (CUdeviceptr)(mem.device_pointer + offset), size)); + if(mem.type == MEM_PIXELS && !background) { + pixels_copy_from(mem, y, w, h); + } + else if(mem.type == MEM_TEXTURE) { + assert(!"mem_copy_from not supported for textures."); } else { - memset((char*)mem.data_pointer + offset, 0, size); + CUDAContextScope scope(this); + size_t offset = elem*y*w; + size_t size = elem*w*h; + + if(mem.device_pointer) { + cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, + (CUdeviceptr)(mem.device_pointer + offset), size)); + } + else { + memset((char*)mem.data_pointer + offset, 0, size); + } } } void mem_zero(device_memory& mem) { + if(!mem.device_pointer) { + mem_alloc(mem); + } + if(mem.data_pointer) { memset((void*)mem.data_pointer, 0, mem.memory_size()); } @@ -609,14 +665,14 @@ public: void mem_free(device_memory& mem) { - if(mem.device_pointer) { - CUDAContextScope scope(this); - cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); - - mem.device_pointer = 0; - - stats.mem_free(mem.device_size); - mem.device_size = 0; + if(mem.type == MEM_PIXELS && !background) { + pixels_free(mem); + } + else if(mem.type == MEM_TEXTURE) { + tex_free(mem); + } + else { + generic_free(mem); } } @@ -700,8 +756,8 @@ public: if(mem.interpolation == INTERPOLATION_NONE) { /* Data Storage */ - mem_alloc(mem); - mem_copy_to(mem); + generic_alloc(mem); + generic_copy_to(mem); CUdeviceptr cumem; size_t cubytes; @@ -891,21 +947,19 @@ public: } else { tex_interp_map.erase(tex_interp_map.find(mem.device_pointer)); - mem_free(mem); + generic_free(mem); } } } bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc(task->tiles_mem); - TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer; for(int i = 0; i < 9; i++) { tiles->buffers[i] = buffers[i]; } - mem_copy_to(task->tiles_mem); + task->tiles_mem.copy_to_device(); return !have_error(); } @@ -1272,7 +1326,7 @@ public: task.unmap_neighbor_tiles(rtiles, this); } - void path_trace(DeviceTask& task, RenderTile& rtile) + void path_trace(DeviceTask& task, RenderTile& rtile, device_vector& work_tiles) { if(have_error()) return; @@ -1295,8 +1349,7 @@ public: cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); /* Allocate work tile. */ - device_vector work_tiles(this, "work_tiles", MEM_READ_ONLY); - work_tiles.resize(1); + work_tiles.alloc(1); WorkTile *wtile = work_tiles.get_data(); wtile->x = rtile.x; @@ -1306,9 +1359,6 @@ public: wtile->offset = rtile.offset; wtile->stride = rtile.stride; wtile->buffer = (float*)cuda_device_ptr(rtile.buffer); - mem_alloc(work_tiles); - - CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); /* Prepare work size. More step samples render faster, but for now we * remain conservative for GPUs connected to a display to avoid driver @@ -1329,8 +1379,9 @@ public: /* Setup and copy work tile to device. */ wtile->start_sample = sample; wtile->num_samples = min(step_samples, end_sample - sample);; - mem_copy_to(work_tiles); + work_tiles.copy_to_device(); + CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); uint total_work_size = wtile->w * wtile->h * wtile->num_samples; uint num_blocks = divide_up(total_work_size, num_threads_per_block); @@ -1354,8 +1405,6 @@ public: break; } } - - mem_free(work_tiles); } void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) @@ -1508,104 +1557,90 @@ public: void pixels_alloc(device_memory& mem) { - if(!background) { - PixelMem pmem; - - pmem.w = mem.data_width; - pmem.h = mem.data_height; + PixelMem pmem; - CUDAContextScope scope(this); + pmem.w = mem.data_width; + pmem.h = mem.data_height; - glGenBuffers(1, &pmem.cuPBO); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); - if(mem.data_type == TYPE_HALF) - glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW); - else - glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW); + CUDAContextScope scope(this); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glGenBuffers(1, &pmem.cuPBO); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); + if(mem.data_type == TYPE_HALF) + glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW); + else + glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW); - glGenTextures(1, &pmem.cuTexId); - glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); - if(mem.data_type == TYPE_HALF) - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL); - else - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glBindTexture(GL_TEXTURE_2D, 0); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); + glGenTextures(1, &pmem.cuTexId); + glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); + if(mem.data_type == TYPE_HALF) + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL); + else + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glBindTexture(GL_TEXTURE_2D, 0); - if(result == CUDA_SUCCESS) { - mem.device_pointer = pmem.cuTexId; - pixel_mem_map[mem.device_pointer] = pmem; + CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); - mem.device_size = mem.memory_size(); - stats.mem_alloc(mem.device_size); + if(result == CUDA_SUCCESS) { + mem.device_pointer = pmem.cuTexId; + pixel_mem_map[mem.device_pointer] = pmem; - return; - } - else { - /* failed to register buffer, fallback to no interop */ - glDeleteBuffers(1, &pmem.cuPBO); - glDeleteTextures(1, &pmem.cuTexId); + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); - background = true; - } + return; } + else { + /* failed to register buffer, fallback to no interop */ + glDeleteBuffers(1, &pmem.cuPBO); + glDeleteTextures(1, &pmem.cuTexId); - Device::pixels_alloc(mem); + background = true; + } } void pixels_copy_from(device_memory& mem, int y, int w, int h) { - if(!background) { - PixelMem pmem = pixel_mem_map[mem.device_pointer]; - - CUDAContextScope scope(this); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); - uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); - size_t offset = sizeof(uchar)*4*y*w; - memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h); - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + PixelMem pmem = pixel_mem_map[mem.device_pointer]; - return; - } + CUDAContextScope scope(this); - Device::pixels_copy_from(mem, y, w, h); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); + uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); + size_t offset = sizeof(uchar)*4*y*w; + memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h); + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); } void pixels_free(device_memory& mem) { if(mem.device_pointer) { - if(!background) { - PixelMem pmem = pixel_mem_map[mem.device_pointer]; - - CUDAContextScope scope(this); - - cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); - glDeleteBuffers(1, &pmem.cuPBO); - glDeleteTextures(1, &pmem.cuTexId); + PixelMem pmem = pixel_mem_map[mem.device_pointer]; - pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); - mem.device_pointer = 0; + CUDAContextScope scope(this); - stats.mem_free(mem.device_size); - mem.device_size = 0; + cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); + glDeleteBuffers(1, &pmem.cuPBO); + glDeleteTextures(1, &pmem.cuTexId); - return; - } + pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); + mem.device_pointer = 0; - Device::pixels_free(mem); + stats.mem_free(mem.device_size); + mem.device_size = 0; } } void draw_pixels(device_memory& mem, int y, int w, int h, int dx, int dy, int width, int height, bool transparent, const DeviceDrawParams &draw_params) { + assert(mem.type == MEM_PIXELS); + if(!background) { PixelMem pmem = pixel_mem_map[mem.device_pointer]; float *vpointer; @@ -1724,6 +1759,8 @@ public: } } + device_vector work_tiles(this, "work_tiles", MEM_READ_ONLY); + /* keep rendering tiles until done */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { @@ -1732,7 +1769,7 @@ public: split_kernel->path_trace(task, tile, void_buffer, void_buffer); } else { - path_trace(*task, tile); + path_trace(*task, tile, work_tiles); } } else if(tile.task == RenderTile::DENOISE) { @@ -1750,6 +1787,8 @@ public: break; } } + + work_tiles.free(); } else if(task->type == DeviceTask::SHADER) { shader(*task); @@ -1884,8 +1923,8 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory CUDAContextScope scope(device); device_vector size_buffer(device, "size_buffer", MEM_READ_WRITE); - size_buffer.resize(1); - device->mem_alloc(size_buffer); + size_buffer.alloc(1); + size_buffer.zero_to_device(); uint threads = num_threads; CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); @@ -1908,9 +1947,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory 1, 1, 1, 0, 0, (void**)&args, 0)); - device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); + size_buffer.copy_from_device(0, 1, 1); size_t size = size_buffer[0]; - device->mem_free(size_buffer); + size_buffer.free(); return size; } diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 2c3bfefd8b0..2d39721e3d3 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -44,7 +44,7 @@ void DenoisingTask::init_from_devicetask(const DeviceTask &task) void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles) { - tiles = (TilesInfo*) tiles_mem.resize(sizeof(TilesInfo)/sizeof(int)); + tiles = (TilesInfo*) tiles_mem.alloc(sizeof(TilesInfo)/sizeof(int)); device_ptr buffers[9]; for(int i = 0; i < 9; i++) { @@ -75,8 +75,7 @@ bool DenoisingTask::run_denoising() buffer.w = align_up(rect.z - rect.x, 4); buffer.h = rect.w - rect.y; buffer.pass_stride = align_up(buffer.w * buffer.h, divide_up(device->mem_address_alignment(), sizeof(float))); - buffer.mem.resize(buffer.pass_stride * buffer.passes); - device->mem_alloc(buffer.mem); + buffer.mem.alloc_to_device(buffer.pass_stride * buffer.passes); device_ptr null_ptr = (device_ptr) 0; @@ -161,8 +160,7 @@ bool DenoisingTask::run_denoising() int num_color_passes = 3; device_only_memory temp_color(device, "Denoising temporary color"); - temp_color.resize(3*buffer.pass_stride); - device->mem_alloc(temp_color); + temp_color.alloc_to_device(3*buffer.pass_stride); for(int pass = 0; pass < num_color_passes; pass++) { device_sub_ptr color_pass(temp_color, pass*buffer.pass_stride, buffer.pass_stride); @@ -177,31 +175,25 @@ bool DenoisingTask::run_denoising() functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); } - device->mem_free(temp_color); + temp_color.free(); } storage.w = filter_area.z; storage.h = filter_area.w; - storage.transform.resize(storage.w*storage.h*TRANSFORM_SIZE); - storage.rank.resize(storage.w*storage.h); - device->mem_alloc(storage.transform); - device->mem_alloc(storage.rank); + storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE); + storage.rank.alloc_to_device(storage.w*storage.h); functions.construct_transform(); device_only_memory temporary_1(device, "Denoising NLM temporary 1"); device_only_memory temporary_2(device, "Denoising NLM temporary 2"); - temporary_1.resize(buffer.w*buffer.h); - temporary_2.resize(buffer.w*buffer.h); - device->mem_alloc(temporary_1); - device->mem_alloc(temporary_2); + temporary_1.alloc_to_device(buffer.w*buffer.h); + temporary_2.alloc_to_device(buffer.w*buffer.h); reconstruction_state.temporary_1_ptr = temporary_1.device_pointer; reconstruction_state.temporary_2_ptr = temporary_2.device_pointer; - storage.XtWX.resize(storage.w*storage.h*XTWX_SIZE); - storage.XtWY.resize(storage.w*storage.h*XTWY_SIZE); - device->mem_alloc(storage.XtWX); - device->mem_alloc(storage.XtWY); + storage.XtWX.alloc_to_device(storage.w*storage.h*XTWX_SIZE); + storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE); reconstruction_state.filter_rect = make_int4(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h); int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x; @@ -218,14 +210,14 @@ bool DenoisingTask::run_denoising() functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr); } - device->mem_free(storage.XtWX); - device->mem_free(storage.XtWY); - device->mem_free(storage.transform); - device->mem_free(storage.rank); - device->mem_free(temporary_1); - device->mem_free(temporary_2); - device->mem_free(buffer.mem); - device->mem_free(tiles_mem); + storage.XtWX.free(); + storage.XtWY.free(); + storage.transform.free(); + storage.rank.free(); + temporary_1.free(); + temporary_2.free(); + buffer.mem.free(); + tiles_mem.free(); return true; } diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp index 98fa638ef8e..9f4f60e7531 100644 --- a/intern/cycles/device/device_memory.cpp +++ b/intern/cycles/device/device_memory.cpp @@ -43,6 +43,68 @@ device_memory::~device_memory() { } +device_ptr device_memory::host_alloc(size_t size) +{ + if(!size) { + return 0; + } + + size_t alignment = device->mem_address_alignment(); + device_ptr ptr = (device_ptr)util_aligned_malloc(size, alignment); + + if(ptr) { + util_guarded_mem_alloc(size); + } + else { + throw std::bad_alloc(); + } + + return ptr; +} + +void device_memory::host_free(device_ptr ptr, size_t size) +{ + if(ptr) { + util_guarded_mem_free(size); + util_aligned_free((void*)ptr); + } +} + +void device_memory::device_alloc() +{ + assert(!device_pointer && type != MEM_TEXTURE); + device->mem_alloc(*this); +} + +void device_memory::device_free() +{ + if(device_pointer) { + device->mem_free(*this); + } +} + +void device_memory::device_copy_to() +{ + assert(type != MEM_PIXELS && type != MEM_WRITE_ONLY); + if(data_size) { + device->mem_copy_to(*this); + } +} + +void device_memory::device_copy_from(int y, int w, int h, int elem) +{ + assert(type != MEM_TEXTURE && type != MEM_READ_ONLY); + device->mem_copy_from(*this, y, w, h, elem); +} + +void device_memory::device_zero() +{ + assert(type != MEM_PIXELS && type != MEM_WRITE_ONLY); + if(data_size) { + device->mem_zero(*this); + } +} + /* Device Sub Ptr */ device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size) diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 3dfecde59d8..7bf8bdc1cea 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -19,14 +19,7 @@ /* Device Memory * - * This file defines data types that can be used in device memory arrays, and - * a device_vector type to store such arrays. - * - * device_vector contains an STL vector, metadata about the data type, - * dimensions, elements, and a device pointer. For the CPU device this is just - * a pointer to the STL vector data, as no copying needs to take place. For - * other devices this is a pointer to device memory, where we will copy memory - * to and from. */ + * Data types for allocating, copying and freeing device memory. */ #include "util/util_debug.h" #include "util/util_half.h" @@ -41,7 +34,9 @@ class Device; enum MemoryType { MEM_READ_ONLY, MEM_WRITE_ONLY, - MEM_READ_WRITE + MEM_READ_WRITE, + MEM_TEXTURE, + MEM_PIXELS }; /* Supported Data Types */ @@ -172,7 +167,10 @@ template<> struct device_type_traits { static const int num_elements = 1; }; -/* Device Memory */ +/* Device Memory + * + * Base class for all device memory. This should not be allocated directly, + * instead the appropriate subclass can be used. */ class device_memory { @@ -182,7 +180,7 @@ public: return elements*data_elements*datatype_size(data_type); } - /* data information */ + /* Data information. */ DataType data_type; int data_elements; device_ptr data_pointer; @@ -196,25 +194,39 @@ public: InterpolationType interpolation; ExtensionType extension; - /* device pointer */ + /* Device pointer. */ Device *device; device_ptr device_pointer; - device_memory(Device *device, const char *name, MemoryType type); virtual ~device_memory(); - void resize(size_t size) - { - data_size = size; - data_width = size; - } - protected: - /* no copying */ + /* Only create through subclasses. */ + device_memory(Device *device, const char *name, MemoryType type); + + /* No copying allowed. */ device_memory(const device_memory&); device_memory& operator = (const device_memory&); + + /* Host allocation on the device. All data_pointer memory should be + * allocated with these functions, for devices that support using + * the same pointer for host and device. */ + device_ptr host_alloc(size_t size); + void host_free(device_ptr ptr, size_t size); + + /* Device memory allocation and copying. */ + void device_alloc(); + void device_free(); + void device_copy_to(); + void device_copy_from(int y, int w, int h, int elem); + void device_zero(); }; +/* Device Only Memory + * + * Working memory only needed by the device, with no corresponding allocation + * on the host. Only used internally in the device implementations. */ + template class device_only_memory : public device_memory { @@ -226,18 +238,43 @@ public: data_elements = max(device_type_traits::num_elements, 1); } - void resize(size_t num) + virtual ~device_only_memory() + { + free(); + } + + void alloc_to_device(size_t num) + { + data_size = num*sizeof(T); + device_alloc(); + } + + void free() + { + device_free(); + } + + void zero_to_device() { - device_memory::resize(num*sizeof(T)); + device_zero(); } }; -/* Device Vector */ +/* Device Vector + * + * Data vector to exchange data between host and device. Memory will be + * allocated on the host first with alloc() and resize, and then filled + * in and copied to the device with copy_to_device(). Or alternatively + * allocated and set to zero on the device with zero_to_device(). + * + * When using memory type MEM_TEXTURE, a pointer to this memory will be + * automatically attached to kernel globals, using the provided name + * matching an entry in kernel_textures.h. */ template class device_vector : public device_memory { public: - device_vector(Device *device, const char *name, MemoryType type = MEM_READ_ONLY) + device_vector(Device *device, const char *name, MemoryType type) : device_memory(device, name, type) { data_type = device_type_traits::data_type; @@ -246,84 +283,175 @@ public: assert(data_elements > 0); } - virtual ~device_vector() {} + virtual ~device_vector() + { + free(); + } - /* vector functions */ - T *resize(size_t width, size_t height = 0, size_t depth = 0) + /* Host memory allocation. */ + T *alloc(size_t width, size_t height = 0, size_t depth = 0) { - data_size = width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth); - if(data.resize(data_size) == NULL) { - clear(); - return NULL; + size_t new_size = size(width, height, depth); + + if(new_size != data_size) { + device_free(); + host_free(data_pointer, sizeof(T)*data_size); + data_pointer = host_alloc(sizeof(T)*new_size); } + + data_size = new_size; data_width = width; data_height = height; data_depth = depth; - if(data_size == 0) { - data_pointer = 0; - return NULL; + assert(device_ptr == 0); + + return get_data(); + } + + /* Host memory resize. Only use this if the original data needs to be + * preserved, it is faster to call alloc() if it can be discarded. */ + T *resize(size_t width, size_t height = 0, size_t depth = 0) + { + size_t new_size = size(width, height, depth); + + if(new_size != data_size) { + device_ptr new_ptr = host_alloc(sizeof(T)*new_size); + + if(new_size && data_size) { + size_t min_size = ((new_size < data_size)? new_size: data_size); + memcpy((T*)new_ptr, (T*)data_pointer, sizeof(T)*min_size); + } + + device_free(); + host_free(data_pointer, sizeof(T)*data_size); + data_pointer = new_ptr; } - data_pointer = (device_ptr)&data[0]; - return &data[0]; + + data_size = new_size; + data_width = width; + data_height = height; + data_depth = depth; + assert(device_ptr == 0); + + return get_data(); } + /* Take over data from an existing array. */ void steal_data(array& from) { - data.steal_data(from); - data_size = data.size(); - data_pointer = (data_size)? (device_ptr)&data[0]: 0; - data_width = data_size; + device_free(); + host_free(data_pointer, sizeof(T)*data_size); + + data_size = from.size(); + data_width = 0; data_height = 0; data_depth = 0; + data_pointer = (device_ptr)from.steal_pointer(); + assert(device_pointer == 0); } - void clear() + /* Free device and host memory. */ + void free() { - data.clear(); - data_pointer = 0; + device_free(); + host_free(data_pointer, sizeof(T)*data_size); + + data_size = 0; data_width = 0; data_height = 0; data_depth = 0; - data_size = 0; - device_pointer = 0; + data_pointer = 0; + assert(device_pointer == 0); } size_t size() { - return data.size(); + return data_size; } T* get_data() { - return &data[0]; + return (T*)data_pointer; } T& operator[](size_t i) { - return data[i]; + assert(i < data_size); + return get_data()[i]; } -private: - array data; + void copy_to_device() + { + device_copy_to(); + } + + void copy_from_device(int y, int w, int h) + { + device_copy_from(y, w, h, sizeof(T)); + } + + void zero_to_device() + { + device_zero(); + } + +protected: + size_t size(size_t width, size_t height, size_t depth) + { + return width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth); + } }; -/* A device_sub_ptr is a pointer into another existing memory. - * Therefore, it is not allocated separately, but just created from the already allocated base memory. - * It is freed automatically when it goes out of scope, which should happen before the base memory is freed. - * Note that some devices require the offset and size of the sub_ptr to be properly aligned. */ +/* Pixel Memory + * + * Device memory to efficiently draw as pixels to the screen in interactive + * rendering. Only copying pixels from the device is supported, not copying to. */ + +template class device_pixels : public device_vector +{ +public: + device_pixels(Device *device, const char *name) + : device_vector(device, name, MEM_PIXELS) + { + } + + void alloc_to_device(size_t width, size_t height, size_t depth = 0) + { + device_vector::alloc(width, height, depth); + device_memory::device_alloc(); + } + + T *copy_from_device(int y, int w, int h) + { + device_memory::device_copy_from(y, w, h, sizeof(T)); + return device_vector::get_data(); + } +}; + +/* Device Sub Memory + * + * Pointer into existing memory. It is not allocated separately, but created + * from an already allocated base memory. It is freed automatically when it + * goes out of scope, which should happen before base memory is freed. + * + * Note: some devices require offset and size of the sub_ptr to be properly + * aligned to device->mem_address_alingment(). */ + class device_sub_ptr { public: device_sub_ptr(device_memory& mem, int offset, int size); ~device_sub_ptr(); - /* No copying. */ - device_sub_ptr& operator = (const device_sub_ptr&); device_ptr operator*() const { return ptr; } + protected: + /* No copying. */ + device_sub_ptr& operator = (const device_sub_ptr&); + Device *device; device_ptr ptr; }; diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 7f7fbc0d1d3..0a6dd90c86d 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -43,10 +43,10 @@ public: }; list devices; - device_ptr unique_ptr; + device_ptr unique_key; MultiDevice(DeviceInfo& info, Stats &stats, bool background_) - : Device(info, stats, background_), unique_ptr(1) + : Device(info, stats, background_), unique_key(1) { Device *device; @@ -108,68 +108,87 @@ public: void mem_alloc(device_memory& mem) { + device_ptr key = unique_key++; + foreach(SubDevice& sub, devices) { + mem.device = sub.device; mem.device_pointer = 0; + sub.device->mem_alloc(mem); - sub.ptr_map[unique_ptr] = mem.device_pointer; + sub.ptr_map[key] = mem.device_pointer; } - mem.device_pointer = unique_ptr++; - stats.mem_alloc(mem.device_size); + mem.device = this; + mem.device_pointer = key; } void mem_copy_to(device_memory& mem) { - device_ptr tmp = mem.device_pointer; + device_ptr existing_key = mem.device_pointer; + device_ptr key = (existing_key)? existing_key: unique_key++; foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; + mem.device = sub.device; + mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0; + sub.device->mem_copy_to(mem); + sub.ptr_map[key] = mem.device_pointer; } - mem.device_pointer = tmp; + mem.device = this; + mem.device_pointer = key; } void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) { - device_ptr tmp = mem.device_pointer; + device_ptr key = mem.device_pointer; int i = 0, sub_h = h/devices.size(); foreach(SubDevice& sub, devices) { int sy = y + i*sub_h; int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h; - mem.device_pointer = sub.ptr_map[tmp]; + mem.device = sub.device; + mem.device_pointer = sub.ptr_map[key]; + sub.device->mem_copy_from(mem, sy, w, sh, elem); i++; } - mem.device_pointer = tmp; + mem.device = this; + mem.device_pointer = key; } void mem_zero(device_memory& mem) { - device_ptr tmp = mem.device_pointer; + device_ptr existing_key = mem.device_pointer; + device_ptr key = (existing_key)? existing_key: unique_key++; foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; + mem.device = sub.device; + mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0; + sub.device->mem_zero(mem); + sub.ptr_map[key] = mem.device_pointer; } - mem.device_pointer = tmp; + mem.device = this; + mem.device_pointer = key; } void mem_free(device_memory& mem) { - device_ptr tmp = mem.device_pointer; - stats.mem_free(mem.device_size); + device_ptr key = mem.device_pointer; foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; + mem.device = sub.device; + mem.device_pointer = sub.ptr_map[key]; + sub.device->mem_free(mem); - sub.ptr_map.erase(sub.ptr_map.find(tmp)); + sub.ptr_map.erase(sub.ptr_map.find(key)); } + mem.device = this; mem.device_pointer = 0; } @@ -179,81 +198,10 @@ public: sub.device->const_copy_to(name, host, size); } - void tex_alloc(device_memory& mem) - { - VLOG(1) << "Texture allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; - - foreach(SubDevice& sub, devices) { - mem.device_pointer = 0; - sub.device->tex_alloc(mem); - sub.ptr_map[unique_ptr] = mem.device_pointer; - } - - mem.device_pointer = unique_ptr++; - stats.mem_alloc(mem.device_size); - } - - void tex_free(device_memory& mem) - { - device_ptr tmp = mem.device_pointer; - stats.mem_free(mem.device_size); - - foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; - sub.device->tex_free(mem); - sub.ptr_map.erase(sub.ptr_map.find(tmp)); - } - - mem.device_pointer = 0; - } - - void pixels_alloc(device_memory& mem) - { - foreach(SubDevice& sub, devices) { - mem.device_pointer = 0; - sub.device->pixels_alloc(mem); - sub.ptr_map[unique_ptr] = mem.device_pointer; - } - - mem.device_pointer = unique_ptr++; - } - - void pixels_free(device_memory& mem) - { - device_ptr tmp = mem.device_pointer; - - foreach(SubDevice& sub, devices) { - mem.device_pointer = sub.ptr_map[tmp]; - sub.device->pixels_free(mem); - sub.ptr_map.erase(sub.ptr_map.find(tmp)); - } - - mem.device_pointer = 0; - } - - void pixels_copy_from(device_memory& mem, int y, int w, int h) - { - device_ptr tmp = mem.device_pointer; - int i = 0, sub_h = h/devices.size(); - - foreach(SubDevice& sub, devices) { - int sy = y + i*sub_h; - int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h; - - mem.device_pointer = sub.ptr_map[tmp]; - sub.device->pixels_copy_from(mem, sy, w, sh); - i++; - } - - mem.device_pointer = tmp; - } - void draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int dy, int width, int height, bool transparent, const DeviceDrawParams &draw_params) { - device_ptr tmp = rgba.device_pointer; + device_ptr key = rgba.device_pointer; int i = 0, sub_h = h/devices.size(); int sub_height = height/devices.size(); @@ -264,12 +212,12 @@ public: int sdy = dy + i*sub_height; /* adjust math for w/width */ - rgba.device_pointer = sub.ptr_map[tmp]; + rgba.device_pointer = sub.ptr_map[key]; sub.device->draw_pixels(rgba, sy, w, sh, dx, sdy, width, sheight, transparent, draw_params); i++; } - rgba.device_pointer = tmp; + rgba.device_pointer = key; } void map_tile(Device *sub_device, RenderTile& tile) @@ -304,15 +252,21 @@ public: * to the current device now, for the duration of the denoising task. * Note that this temporarily modifies the RenderBuffers and calls * the device, so this function is not thread safe. */ - if(tiles[i].buffers->device != sub_device) { - device_vector &mem = tiles[i].buffers->buffer; - + device_vector &mem = tiles[i].buffers->buffer; + if(mem.device != sub_device) { tiles[i].buffers->copy_from_device(); + + Device *original_device = mem.device; device_ptr original_ptr = mem.device_pointer; + + mem.device = sub_device; mem.device_pointer = 0; + sub_device->mem_alloc(mem); sub_device->mem_copy_to(mem); tiles[i].buffer = mem.device_pointer; + + mem.device = original_device; mem.device_pointer = original_ptr; } } @@ -324,25 +278,30 @@ public: if(!tiles[i].buffers) { continue; } - if(tiles[i].buffers->device != sub_device) { - device_vector &mem = tiles[i].buffers->buffer; + device_vector &mem = tiles[i].buffers->buffer; + if(mem.device != sub_device) { + Device *original_device = mem.device; device_ptr original_ptr = mem.device_pointer; + size_t original_size = mem.device_size; + + mem.device = sub_device; mem.device_pointer = tiles[i].buffer; /* Copy denoised tile to the host. */ if(i == 4) { - tiles[i].buffers->copy_from_device(sub_device); + tiles[i].buffers->copy_from_device(); } - size_t mem_size = mem.device_size; sub_device->mem_free(mem); + + mem.device = original_device; mem.device_pointer = original_ptr; - mem.device_size = mem_size; + mem.device_size = original_size; /* Copy denoised tile to the original device. */ if(i == 4) { - tiles[i].buffers->device->mem_copy_to(mem); + mem.copy_to_device(); } } } diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index bdc88b6acae..fa231c817e6 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -172,36 +172,6 @@ public: snd.write_buffer(host, size); } - void tex_alloc(device_memory& mem) - { - VLOG(1) << "Texture allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; - - thread_scoped_lock lock(rpc_lock); - - mem.device_pointer = ++mem_counter; - - RPCSend snd(socket, &error_func, "tex_alloc"); - snd.add(mem); - snd.write(); - snd.write_buffer((void*)mem.data_pointer, mem.memory_size()); - } - - void tex_free(device_memory& mem) - { - if(mem.device_pointer) { - thread_scoped_lock lock(rpc_lock); - - RPCSend snd(socket, &error_func, "tex_free"); - - snd.add(mem); - snd.write(); - - mem.device_pointer = 0; - } - } - bool load_kernels(const DeviceRequestedFeatures& requested_features) { if(error_func.have_error()) @@ -310,7 +280,7 @@ public: snd.write(); } - int get_split_task_count(DeviceTask& task) + int get_split_task_count(DeviceTask&) { return 1; } @@ -464,21 +434,17 @@ protected: rcv.read(mem, name); lock.unlock(); + /* Allocate host side data buffer. */ + size_t data_size = mem.memory_size(); device_ptr client_pointer = mem.device_pointer; - /* create a memory buffer for the device buffer */ - size_t data_size = mem.memory_size(); DataVector &data_v = data_vector_insert(client_pointer, data_size); + mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0; - if(data_size) - mem.data_pointer = (device_ptr)&(data_v[0]); - else - mem.data_pointer = 0; - - /* perform the allocation on the actual device */ + /* Perform the allocation on the actual device. */ device->mem_alloc(mem); - /* store a mapping to/from client_pointer and real device pointer */ + /* Store a mapping to/from client_pointer and real device pointer. */ pointer_mapping_insert(client_pointer, mem.device_pointer); } else if(rcv.name == "mem_copy_to") { @@ -487,23 +453,33 @@ protected: rcv.read(mem, name); lock.unlock(); + size_t data_size = mem.memory_size(); device_ptr client_pointer = mem.device_pointer; - DataVector &data_v = data_vector_find(client_pointer); - - size_t data_size = mem.memory_size(); + if(client_pointer) { + /* Lookup existing host side data buffer. */ + DataVector &data_v = data_vector_find(client_pointer); + mem.data_pointer = (device_ptr)&data_v[0]; - /* get pointer to memory buffer for device buffer */ - mem.data_pointer = (device_ptr)&data_v[0]; + /* Translate the client pointer to a real device pointer. */ + mem.device_pointer = device_ptr_from_client_pointer(client_pointer); + } + else { + /* Allocate host side data buffer. */ + DataVector &data_v = data_vector_insert(client_pointer, data_size); + mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0; + } - /* copy data from network into memory buffer */ + /* Copy data from network into memory buffer. */ rcv.read_buffer((uint8_t*)mem.data_pointer, data_size); - /* translate the client pointer to a real device pointer */ - mem.device_pointer = device_ptr_from_client_pointer(client_pointer); - - /* copy the data from the memory buffer to the device buffer */ + /* Copy the data from the memory buffer to the device buffer. */ device->mem_copy_to(mem); + + if(!client_pointer) { + /* Store a mapping to/from client_pointer and real device pointer. */ + pointer_mapping_insert(client_pointer, mem.device_pointer); + } } else if(rcv.name == "mem_copy_from") { string name; @@ -538,14 +514,30 @@ protected: rcv.read(mem, name); lock.unlock(); + size_t data_size = mem.memory_size(); device_ptr client_pointer = mem.device_pointer; - mem.device_pointer = device_ptr_from_client_pointer(client_pointer); - DataVector &data_v = data_vector_find(client_pointer); + if(client_pointer) { + /* Lookup existing host side data buffer. */ + DataVector &data_v = data_vector_find(client_pointer); + mem.data_pointer = (device_ptr)&data_v[0]; - mem.data_pointer = (device_ptr)&(data_v[0]); + /* Translate the client pointer to a real device pointer. */ + mem.device_pointer = device_ptr_from_client_pointer(client_pointer); + } + else { + /* Allocate host side data buffer. */ + DataVector &data_v = data_vector_insert(client_pointer, data_size); + mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0; + } + /* Zero memory. */ device->mem_zero(mem); + + if(!client_pointer) { + /* Store a mapping to/from client_pointer and real device pointer. */ + pointer_mapping_insert(client_pointer, mem.device_pointer); + } } else if(rcv.name == "mem_free") { string name; @@ -573,45 +565,6 @@ protected: device->const_copy_to(name_string.c_str(), &host_vector[0], size); } - else if(rcv.name == "tex_alloc") { - string name; - network_device_memory mem(device); - device_ptr client_pointer; - - rcv.read(mem, name); - lock.unlock(); - - client_pointer = mem.device_pointer; - - size_t data_size = mem.memory_size(); - - DataVector &data_v = data_vector_insert(client_pointer, data_size); - - if(data_size) - mem.data_pointer = (device_ptr)&(data_v[0]); - else - mem.data_pointer = 0; - - rcv.read_buffer((uint8_t*)mem.data_pointer, data_size); - - device->tex_alloc(mem); - - pointer_mapping_insert(client_pointer, mem.device_pointer); - } - else if(rcv.name == "tex_free") { - string name; - network_device_memory mem(device); - device_ptr client_pointer; - - rcv.read(mem, name); - lock.unlock(); - - client_pointer = mem.device_pointer; - - mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer); - - device->tex_free(mem); - } else if(rcv.name == "load_kernels") { DeviceRequestedFeatures requested_features; rcv.read(requested_features.experimental); @@ -696,7 +649,7 @@ protected: } } - bool task_acquire_tile(Device *device, RenderTile& tile) + bool task_acquire_tile(Device *, RenderTile& tile) { thread_scoped_lock acquire_lock(acquire_mutex); diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h index 8a53290f421..a38d962c0af 100644 --- a/intern/cycles/device/device_network.h +++ b/intern/cycles/device/device_network.h @@ -279,6 +279,11 @@ public: mem.name = name.c_str(); mem.data_pointer = 0; + + /* Can't transfer OpenGL texture over network. */ + if(mem.type == MEM_PIXELS) { + mem.type = MEM_WRITE_ONLY; + } } template void read(T& data) diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index 6c8befa89be..f2839a8b1b9 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -61,11 +61,11 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) DeviceSplitKernel::~DeviceSplitKernel() { - device->mem_free(split_data); - device->mem_free(ray_state); - device->mem_free(use_queues_flag); - device->mem_free(queue_index); - device->mem_free(work_pool_wgs); + split_data.free(); + ray_state.free(); + use_queues_flag.free(); + queue_index.free(); + work_pool_wgs.free(); delete kernel_path_init; delete kernel_scene_intersect; @@ -175,20 +175,11 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, unsigned int max_work_groups = num_global_elements / work_pool_size + 1; /* Allocate work_pool_wgs memory. */ - work_pool_wgs.resize(max_work_groups); - device->mem_alloc(work_pool_wgs); - - queue_index.resize(NUM_QUEUES); - device->mem_alloc(queue_index); - - use_queues_flag.resize(1); - device->mem_alloc(use_queues_flag); - - ray_state.resize(num_global_elements); - device->mem_alloc(ray_state); - - split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); - device->mem_alloc(split_data); + work_pool_wgs.alloc_to_device(max_work_groups); + queue_index.alloc_to_device(NUM_QUEUES); + use_queues_flag.alloc_to_device(1); + split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); + ray_state.alloc(num_global_elements); } #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \ @@ -225,9 +216,9 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, /* reset state memory here as global size for data_init * kernel might not be large enough to do in kernel */ - device->mem_zero(work_pool_wgs); - device->mem_zero(split_data); - device->mem_zero(ray_state); + work_pool_wgs.zero_to_device(); + split_data.zero_to_device(); + ray_state.zero_to_device(); if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), subtile, @@ -284,7 +275,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, } /* Decide if we should exit path-iteration in host. */ - device->mem_copy_from(ray_state, 0, global_size[0] * global_size[1] * sizeof(char), 1, 1); + ray_state.copy_from_device(0, global_size[0] * global_size[1], 1); activeRaysAvailable = false; diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp index e48367b8987..a791b374774 100644 --- a/intern/cycles/device/opencl/memory_manager.cpp +++ b/intern/cycles/device/opencl/memory_manager.cpp @@ -76,8 +76,7 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) device_only_memory *new_buffer = new device_only_memory(device, "memory manager buffer"); - new_buffer->resize(total_size); - device->mem_alloc(*new_buffer); + new_buffer->alloc_to_device(total_size); size_t offset = 0; @@ -111,7 +110,6 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) offset += allocation->size; } - device->mem_free(*buffer); delete buffer; buffer = new_buffer; @@ -144,9 +142,9 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device) clFinish(device->cqCommandQueue); } -void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *device) +void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *) { - device->mem_free(*buffer); + buffer->free(); } MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer() diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 90f461b4c98..5e9debc3b17 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -74,7 +74,7 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where) OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_), memory_manager(this), - texture_info_buffer(this, "__texture_info", MEM_READ_ONLY) + texture_info(this, "__texture_info", MEM_TEXTURE) { cpPlatform = NULL; cdDevice = NULL; @@ -157,7 +157,6 @@ OpenCLDeviceBase::~OpenCLDeviceBase() ConstMemMap::iterator mt; for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) { - mem_free(*(mt->second)); delete mt->second; } @@ -318,9 +317,9 @@ void OpenCLDeviceBase::mem_alloc(device_memory& mem) cl_mem_flags mem_flag; void *mem_ptr = NULL; - if(mem.type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) mem_flag = CL_MEM_READ_ONLY; - else if(mem.type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY || mem.type == MEM_PIXELS) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -348,17 +347,27 @@ void OpenCLDeviceBase::mem_alloc(device_memory& mem) void OpenCLDeviceBase::mem_copy_to(device_memory& mem) { - /* this is blocking */ - size_t size = mem.memory_size(); - if(size != 0) { - opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, - CL_MEM_PTR(mem.device_pointer), - CL_TRUE, - 0, - size, - (void*)mem.data_pointer, - 0, - NULL, NULL)); + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + tex_alloc(mem); + } + else { + if(!mem.device_pointer) { + mem_alloc(mem); + } + + /* this is blocking */ + size_t size = mem.memory_size(); + if(size != 0) { + opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, + CL_MEM_PTR(mem.device_pointer), + CL_TRUE, + 0, + size, + (void*)mem.data_pointer, + 0, + NULL, NULL)); + } } } @@ -410,6 +419,10 @@ void OpenCLDeviceBase::mem_zero_kernel(device_ptr mem, size_t size) void OpenCLDeviceBase::mem_zero(device_memory& mem) { + if(!mem.device_pointer) { + mem_alloc(mem); + } + if(mem.device_pointer) { if(base_program.is_loaded()) { mem_zero_kernel(mem.device_pointer, mem.memory_size()); @@ -445,14 +458,19 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem) void OpenCLDeviceBase::mem_free(device_memory& mem) { - if(mem.device_pointer) { - if(mem.device_pointer != null_mem) { - opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer))); - } - mem.device_pointer = 0; + if(mem.type == MEM_TEXTURE) { + tex_free(mem); + } + else { + if(mem.device_pointer) { + if(mem.device_pointer != null_mem) { + opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer))); + } + mem.device_pointer = 0; - stats.mem_free(mem.device_size); - mem.device_size = 0; + stats.mem_free(mem.device_size); + mem.device_size = 0; + } } } @@ -464,9 +482,9 @@ int OpenCLDeviceBase::mem_address_alignment() device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size) { cl_mem_flags mem_flag; - if(mem.type == MEM_READ_ONLY) + if(mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) mem_flag = CL_MEM_READ_ONLY; - else if(mem.type == MEM_WRITE_ONLY) + else if(mem.type == MEM_WRITE_ONLY || mem.type == MEM_PIXELS) mem_flag = CL_MEM_WRITE_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -498,9 +516,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) if(i == const_mem_map.end()) { data = new device_vector(this, name, MEM_READ_ONLY); - data->resize(size); - - mem_alloc(*data); + data->alloc(size); const_mem_map.insert(ConstMemMap::value_type(name, data)); } else { @@ -508,7 +524,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) } memcpy(data->get_data(), host, size); - mem_copy_to(*data); + data->copy_to_device(); } void OpenCLDeviceBase::tex_alloc(device_memory& mem) @@ -1037,8 +1053,7 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr, bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers, DenoisingTask *task) { - mem_alloc(task->tiles_mem); - mem_copy_to(task->tiles_mem); + task->tiles_mem.copy_to_device(); cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index c966ebe0c5e..96139afa450 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -128,8 +128,7 @@ public: /* Allocate buffer for kernel globals */ device_only_memory kgbuffer(this, "kernel_globals"); - kgbuffer.resize(1); - mem_alloc(kgbuffer); + kgbuffer.alloc_to_device(1); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { @@ -160,7 +159,7 @@ public: task->release_tile(tile); } - mem_free(kgbuffer); + kgbuffer.free(); } } @@ -289,8 +288,8 @@ public: virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) { device_vector size_buffer(device, "size_buffer", MEM_READ_WRITE); - size_buffer.resize(1); - device->mem_alloc(size_buffer); + size_buffer.alloc(1); + size_buffer.zero_to_device(); uint threads = num_threads; device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer); @@ -308,9 +307,9 @@ public: device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); - device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); + size_buffer.copy_from_device(0, 1, 1); size_t size = size_buffer[0]; - device->mem_free(size_buffer); + size_buffer.free(); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index 66615bf336c..99f68b6aa00 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -151,7 +151,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre /* setup input for device task */ device_vector d_input(device, "bake_input", MEM_READ_ONLY); - uint4 *d_input_data = d_input.resize(shader_size * 2); + uint4 *d_input_data = d_input.alloc(shader_size * 2); size_t d_input_size = 0; for(size_t i = shader_offset; i < (shader_offset + shader_size); i++) { @@ -166,16 +166,13 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre /* run device task */ device_vector d_output(device, "bake_output", MEM_READ_WRITE); - d_output.resize(shader_size); + d_output.alloc(shader_size); + d_output.zero_to_device(); + d_input.copy_to_device(); /* needs to be up to data for attribute access */ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc(d_input); - device->mem_copy_to(d_input); - device->mem_alloc(d_output); - device->mem_zero(d_output); - DeviceTask task(DeviceTask::SHADER); task.shader_input = d_input.device_pointer; task.shader_output = d_output.device_pointer; @@ -192,15 +189,14 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre device->task_wait(); if(progress.get_cancel()) { - device->mem_free(d_input); - device->mem_free(d_output); + d_input.free(); + d_output.free(); m_is_baking = false; return false; } - device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); - device->mem_free(d_input); - device->mem_free(d_output); + d_output.copy_from_device(0, 1, d_output.size()); + d_input.free(); /* read result */ int k = 0; @@ -218,6 +214,8 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre } } } + + d_output.free(); } m_is_baking = false; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index 2342dd52d86..01f853dda71 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -115,54 +115,35 @@ RenderTile::RenderTile() /* Render Buffers */ RenderBuffers::RenderBuffers(Device *device) -: buffer(device, "RenderBuffers", MEM_READ_WRITE), - device(device) +: buffer(device, "RenderBuffers", MEM_READ_WRITE) { } RenderBuffers::~RenderBuffers() { - device_free(); + buffer.free(); } -void RenderBuffers::device_free() -{ - if(buffer.device_pointer) { - device->mem_free(buffer); - buffer.clear(); - } -} - -void RenderBuffers::reset(Device *device, BufferParams& params_) +void RenderBuffers::reset(BufferParams& params_) { params = params_; - /* free existing buffers */ - device_free(); - - /* allocate buffer */ - buffer.resize(params.width*params.height*params.get_passes_size()); - device->mem_alloc(buffer); - device->mem_zero(buffer); + /* re-allocate buffer */ + buffer.alloc(params.width*params.height*params.get_passes_size()); + buffer.zero_to_device(); } -void RenderBuffers::zero(Device *device) +void RenderBuffers::zero() { - if(buffer.device_pointer) { - device->mem_zero(buffer); - } + buffer.zero_to_device(); } -bool RenderBuffers::copy_from_device(Device *from_device) +bool RenderBuffers::copy_from_device() { if(!buffer.device_pointer) return false; - if(!from_device) { - from_device = device; - } - - from_device->mem_copy_from(buffer, 0, params.width, params.height, params.get_passes_size()*sizeof(float)); + buffer.copy_from_device(0, params.width * params.get_passes_size(), params.height); return true; } @@ -402,47 +383,30 @@ DisplayBuffer::DisplayBuffer(Device *device, bool linear) draw_height(0), transparent(true), /* todo: determine from background */ half_float(linear), - rgba_byte(device, "display buffer byte", MEM_WRITE_ONLY), - rgba_half(device, "display buffer half", MEM_WRITE_ONLY), - device(device) + rgba_byte(device, "display buffer byte"), + rgba_half(device, "display buffer half") { } DisplayBuffer::~DisplayBuffer() { - device_free(); -} - -void DisplayBuffer::device_free() -{ - if(rgba_byte.device_pointer) { - device->pixels_free(rgba_byte); - rgba_byte.clear(); - } - if(rgba_half.device_pointer) { - device->pixels_free(rgba_half); - rgba_half.clear(); - } + rgba_byte.free(); + rgba_half.free(); } -void DisplayBuffer::reset(Device *device, BufferParams& params_) +void DisplayBuffer::reset(BufferParams& params_) { draw_width = 0; draw_height = 0; params = params_; - /* free existing buffers */ - device_free(); - /* allocate display pixels */ if(half_float) { - rgba_half.resize(params.width, params.height); - device->pixels_alloc(rgba_half); + rgba_half.alloc_to_device(params.width, params.height); } else { - rgba_byte.resize(params.width, params.height); - device->pixels_alloc(rgba_byte); + rgba_byte.alloc_to_device(params.width, params.height); } } @@ -457,7 +421,8 @@ void DisplayBuffer::draw_set(int width, int height) void DisplayBuffer::draw(Device *device, const DeviceDrawParams& draw_params) { if(draw_width != 0 && draw_height != 0) { - device_memory& rgba = rgba_data(); + device_memory& rgba = (half_float)? (device_memory&)rgba_half: + (device_memory&)rgba_byte; device->draw_pixels(rgba, 0, draw_width, draw_height, params.full_x, params.full_y, params.width, params.height, transparent, draw_params); } @@ -468,7 +433,7 @@ bool DisplayBuffer::draw_ready() return (draw_width != 0 && draw_height != 0); } -void DisplayBuffer::write(Device *device, const string& filename) +void DisplayBuffer::write(const string& filename) { int w = draw_width; int h = draw_height; @@ -480,21 +445,19 @@ void DisplayBuffer::write(Device *device, const string& filename) return; /* read buffer from device */ - device_memory& rgba = rgba_data(); - device->pixels_copy_from(rgba, 0, w, h); + uchar4 *pixels = rgba_byte.copy_from_device(0, w, h); /* write image */ ImageOutput *out = ImageOutput::create(filename); ImageSpec spec(w, h, 4, TypeDesc::UINT8); - int scanlinesize = w*4*sizeof(uchar); out->open(filename, spec); /* conversion for different top/bottom convention */ out->write_image(TypeDesc::UINT8, - (uchar*)rgba.data_pointer + (h-1)*scanlinesize, + (uchar*)(pixels + (h-1)*w), AutoStride, - -scanlinesize, + -w*sizeof(uchar4), AutoStride); out->close(); @@ -502,13 +465,5 @@ void DisplayBuffer::write(Device *device, const string& filename) delete out; } -device_memory& DisplayBuffer::rgba_data() -{ - if(half_float) - return rgba_half; - else - return rgba_byte; -} - CCL_NAMESPACE_END diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 2780fc8a68d..8563d6674ec 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -75,20 +75,15 @@ public: /* float buffer */ device_vector buffer; - Device *device; - explicit RenderBuffers(Device *device); ~RenderBuffers(); - void reset(Device *device, BufferParams& params); - void zero(Device *device); + void reset(BufferParams& params); + void zero(); - bool copy_from_device(Device *from_device = NULL); + bool copy_from_device(); bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels); bool get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels); - -protected: - void device_free(); }; /* Display Buffer @@ -109,25 +104,18 @@ public: /* use half float? */ bool half_float; /* byte buffer for converted result */ - device_vector rgba_byte; - device_vector rgba_half; + device_pixels rgba_byte; + device_pixels rgba_half; DisplayBuffer(Device *device, bool linear = false); ~DisplayBuffer(); - void reset(Device *device, BufferParams& params); - void write(Device *device, const string& filename); + void reset(BufferParams& params); + void write(const string& filename); void draw_set(int width, int height); void draw(Device *device, const DeviceDrawParams& draw_params); bool draw_ready(); - - device_memory& rgba_data(); - -protected: - void device_free(); - - Device *device; }; /* Render Tile diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index e7f5ff002b7..625901ff258 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -532,7 +532,8 @@ bool ImageManager::file_load_image(Image *img, pixels = &pixels_storage[0]; } else { - pixels = (StorageType*)tex_img.resize(width, height, depth); + thread_scoped_lock device_lock(device_mutex); + pixels = (StorageType*)tex_img.alloc(width, height, depth); } if(pixels == NULL) { /* Could be that we've run out of memory. */ @@ -686,9 +687,16 @@ bool ImageManager::file_load_image(Image *img, scale_factor, &scaled_pixels, &scaled_width, &scaled_height, &scaled_depth); - StorageType *texture_pixels = (StorageType*)tex_img.resize(scaled_width, - scaled_height, - scaled_depth); + + StorageType *texture_pixels; + + { + thread_scoped_lock device_lock(device_mutex); + texture_pixels = (StorageType*)tex_img.alloc(scaled_width, + scaled_height, + scaled_depth); + } + memcpy(texture_pixels, &scaled_pixels[0], scaled_pixels.size() * sizeof(StorageType)); @@ -722,14 +730,14 @@ void ImageManager::device_load_image(Device *device, /* Free previous texture in slot. */ if(img->mem) { thread_scoped_lock device_lock(device_mutex); - device->tex_free(*img->mem); delete img->mem; img->mem = NULL; } /* Create new texture. */ if(type == IMAGE_DATA_TYPE_FLOAT4) { - device_vector *tex_img = new device_vector(device, name.c_str()); + device_vector *tex_img + = new device_vector(device, name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, @@ -737,7 +745,7 @@ void ImageManager::device_load_image(Device *device, *tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ - float *pixels = (float*)tex_img->resize(1, 1); + float *pixels = (float*)tex_img->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; pixels[1] = TEX_IMAGE_MISSING_G; @@ -746,9 +754,15 @@ void ImageManager::device_load_image(Device *device, } img->mem = tex_img; + img->mem->interpolation = img->interpolation; + img->mem->extension = img->extension; + + thread_scoped_lock device_lock(device_mutex); + tex_img->copy_to_device(); } else if(type == IMAGE_DATA_TYPE_FLOAT) { - device_vector *tex_img = new device_vector(device, name.c_str()); + device_vector *tex_img + = new device_vector(device, name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, @@ -756,15 +770,21 @@ void ImageManager::device_load_image(Device *device, *tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ - float *pixels = (float*)tex_img->resize(1, 1); + float *pixels = (float*)tex_img->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; } img->mem = tex_img; + img->mem->interpolation = img->interpolation; + img->mem->extension = img->extension; + + thread_scoped_lock device_lock(device_mutex); + tex_img->copy_to_device(); } else if(type == IMAGE_DATA_TYPE_BYTE4) { - device_vector *tex_img = new device_vector(device, name.c_str()); + device_vector *tex_img + = new device_vector(device, name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, @@ -772,7 +792,7 @@ void ImageManager::device_load_image(Device *device, *tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ - uchar *pixels = (uchar*)tex_img->resize(1, 1); + uchar *pixels = (uchar*)tex_img->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 255); pixels[1] = (TEX_IMAGE_MISSING_G * 255); @@ -781,31 +801,43 @@ void ImageManager::device_load_image(Device *device, } img->mem = tex_img; + img->mem->interpolation = img->interpolation; + img->mem->extension = img->extension; + + thread_scoped_lock device_lock(device_mutex); + tex_img->copy_to_device(); } else if(type == IMAGE_DATA_TYPE_BYTE) { - device_vector *tex_img = new device_vector(device, name.c_str()); + device_vector *tex_img + = new device_vector(device, name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, texture_limit, *tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ - uchar *pixels = (uchar*)tex_img->resize(1, 1); + uchar *pixels = (uchar*)tex_img->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 255); } img->mem = tex_img; + img->mem->interpolation = img->interpolation; + img->mem->extension = img->extension; + + thread_scoped_lock device_lock(device_mutex); + tex_img->copy_to_device(); } else if(type == IMAGE_DATA_TYPE_HALF4) { - device_vector *tex_img = new device_vector(device, name.c_str()); + device_vector *tex_img + = new device_vector(device, name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, texture_limit, *tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ - half *pixels = (half*)tex_img->resize(1, 1); + half *pixels = (half*)tex_img->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; pixels[1] = TEX_IMAGE_MISSING_G; @@ -814,37 +846,38 @@ void ImageManager::device_load_image(Device *device, } img->mem = tex_img; + img->mem->interpolation = img->interpolation; + img->mem->extension = img->extension; + + thread_scoped_lock device_lock(device_mutex); + tex_img->copy_to_device(); } else if(type == IMAGE_DATA_TYPE_HALF) { - device_vector *tex_img = new device_vector(device, name.c_str()); + device_vector *tex_img + = new device_vector(device, name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, texture_limit, *tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ - half *pixels = (half*)tex_img->resize(1, 1); + half *pixels = (half*)tex_img->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; } img->mem = tex_img; - } - - /* Copy to device. */ - if(img->mem) { img->mem->interpolation = img->interpolation; img->mem->extension = img->extension; thread_scoped_lock device_lock(device_mutex); - device->tex_alloc(*img->mem); + tex_img->copy_to_device(); } - img->need_load = false; } -void ImageManager::device_free_image(Device *device, ImageDataType type, int slot) +void ImageManager::device_free_image(Device *, ImageDataType type, int slot) { Image *img = images[type][slot]; @@ -858,7 +891,6 @@ void ImageManager::device_free_image(Device *device, ImageDataType type, int slo if(img->mem) { thread_scoped_lock device_lock(device_mutex); - device->tex_free(*img->mem); delete img->mem; } diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index b128f18db08..33c3dac9e81 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -191,11 +191,11 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene int dimensions = PRNG_BASE_NUM + max_samples*PRNG_BOUNCE_NUM; dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS); - uint *directions = dscene->sobol_directions.resize(SOBOL_BITS*dimensions); + uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS*dimensions); sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions); - device->tex_alloc(dscene->sobol_directions); + dscene->sobol_directions.copy_to_device(); /* Clamping. */ bool use_sample_clamp = (sample_clamp_direct != 0.0f || @@ -208,10 +208,9 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene need_update = false; } -void Integrator::device_free(Device *device, DeviceScene *dscene) +void Integrator::device_free(Device *, DeviceScene *dscene) { - device->tex_free(dscene->sobol_directions); - dscene->sobol_directions.clear(); + dscene->sobol_directions.free(); } bool Integrator::modified(const Integrator& integrator) diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 9664e1310d5..b3804f34963 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -39,7 +39,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res device_vector d_input(device, "background_input", MEM_READ_ONLY); device_vector d_output(device, "background_output", MEM_WRITE_ONLY); - uint4 *d_input_data = d_input.resize(width*height); + uint4 *d_input_data = d_input.alloc(width*height); for(int y = 0; y < height; y++) { for(int x = 0; x < width; x++) { @@ -52,16 +52,12 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res } /* compute on device */ - d_output.resize(width*height); - memset((void*)d_output.data_pointer, 0, d_output.memory_size()); + d_output.alloc(width*height); + d_output.zero_to_device(); + d_input.copy_to_device(); device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc(d_input); - device->mem_copy_to(d_input); - device->mem_alloc(d_output); - device->mem_zero(d_output); - DeviceTask main_task(DeviceTask::SHADER); main_task.shader_input = d_input.device_pointer; main_task.shader_output = d_output.device_pointer; @@ -78,13 +74,10 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res foreach(DeviceTask& task, split_tasks) { device->task_add(task); device->task_wait(); - device->mem_copy_from(d_output, task.shader_x, 1, task.shader_w, sizeof(float4)); + d_output.copy_from_device(task.shader_x, 1, task.shader_w); } - device->mem_free(d_input); - device->mem_free(d_output); - - d_input.clear(); + d_input.free(); float4 *d_output_data = reinterpret_cast(d_output.data_pointer); @@ -97,6 +90,8 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res pixels[y*width + x].z = d_output_data[y*width + x].z; } } + + d_output.free(); } /* Light */ @@ -246,7 +241,7 @@ bool LightManager::object_usable_as_light(Object *object) { return false; } -void LightManager::device_update_distribution(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) +void LightManager::device_update_distribution(Device *, DeviceScene *dscene, Scene *scene, Progress& progress) { progress.set_status("Updating Lights", "Computing distribution"); @@ -292,7 +287,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen VLOG(1) << "Total " << num_distribution << " of light distribution primitives."; /* emission area */ - float4 *distribution = dscene->light_distribution.resize(num_distribution + 1); + float4 *distribution = dscene->light_distribution.alloc(num_distribution + 1); float totarea = 0.0f; /* triangles */ @@ -451,7 +446,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen kfilm->pass_shadow_scale *= (float)(num_lights - num_background_lights)/(float)num_lights; /* CDF */ - device->tex_alloc(dscene->light_distribution); + dscene->light_distribution.copy_to_device(); /* Portals */ if(num_portals > 0) { @@ -466,7 +461,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen } } else { - dscene->light_distribution.clear(); + dscene->light_distribution.free(); kintegrator->num_distribution = 0; kintegrator->num_all_lights = 0; @@ -561,8 +556,8 @@ void LightManager::device_update_background(Device *device, /* build row distributions and column distribution for the infinite area environment light */ int cdf_count = res + 1; - float2 *marg_cdf = dscene->light_background_marginal_cdf.resize(cdf_count); - float2 *cond_cdf = dscene->light_background_conditional_cdf.resize(cdf_count * cdf_count); + float2 *marg_cdf = dscene->light_background_marginal_cdf.alloc(cdf_count); + float2 *cond_cdf = dscene->light_background_conditional_cdf.alloc(cdf_count * cdf_count); double time_start = time_dt(); if(res < 512) { @@ -611,11 +606,11 @@ void LightManager::device_update_background(Device *device, VLOG(2) << "Background MIS build time " << time_dt() - time_start << "\n"; /* update device */ - device->tex_alloc(dscene->light_background_marginal_cdf); - device->tex_alloc(dscene->light_background_conditional_cdf); + dscene->light_background_marginal_cdf.copy_to_device(); + dscene->light_background_conditional_cdf.copy_to_device(); } -void LightManager::device_update_points(Device *device, +void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *scene) { @@ -628,7 +623,7 @@ void LightManager::device_update_points(Device *device, } } - float4 *light_data = dscene->light_data.resize(num_lights*LIGHT_SIZE); + float4 *light_data = dscene->light_data.alloc(num_lights*LIGHT_SIZE); if(num_lights == 0) { VLOG(1) << "No effective light, ignoring points update."; @@ -813,7 +808,7 @@ void LightManager::device_update_points(Device *device, VLOG(1) << "Number of lights without contribution: " << num_scene_lights - light_index; - device->tex_alloc(dscene->light_data); + dscene->light_data.copy_to_device(); } void LightManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) @@ -846,17 +841,12 @@ void LightManager::device_update(Device *device, DeviceScene *dscene, Scene *sce need_update = false; } -void LightManager::device_free(Device *device, DeviceScene *dscene) +void LightManager::device_free(Device *, DeviceScene *dscene) { - device->tex_free(dscene->light_distribution); - device->tex_free(dscene->light_data); - device->tex_free(dscene->light_background_marginal_cdf); - device->tex_free(dscene->light_background_conditional_cdf); - - dscene->light_distribution.clear(); - dscene->light_data.clear(); - dscene->light_background_marginal_cdf.clear(); - dscene->light_background_conditional_cdf.clear(); + dscene->light_distribution.free(); + dscene->light_data.free(); + dscene->light_background_marginal_cdf.free(); + dscene->light_background_conditional_cdf.free(); } void LightManager::tag_update(Scene * /*scene*/) diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 685272b80c1..75bdf71616f 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -1252,7 +1252,7 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector& mesh_attributes) +void MeshManager::update_svm_attributes(Device *, DeviceScene *dscene, Scene *scene, vector& mesh_attributes) { /* for SVM, the attributes_map table is used to lookup the offset of an * attribute, based on a unique shader attribute id. */ @@ -1267,7 +1267,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce return; /* create attribute map */ - uint4 *attr_map = dscene->attributes_map.resize(attr_map_stride*scene->objects.size()); + uint4 *attr_map = dscene->attributes_map.alloc(attr_map_stride*scene->objects.size()); memset(attr_map, 0, dscene->attributes_map.size()*sizeof(uint)); for(size_t i = 0; i < scene->objects.size(); i++) { @@ -1359,7 +1359,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce /* copy to device */ dscene->data.bvh.attributes_map_stride = attr_map_stride; - device->tex_alloc(dscene->attributes_map); + dscene->attributes_map.copy_to_device(); } static void update_attribute_element_size(Mesh *mesh, @@ -1554,9 +1554,9 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene, } } - dscene->attributes_float.resize(attr_float_size); - dscene->attributes_float3.resize(attr_float3_size); - dscene->attributes_uchar4.resize(attr_uchar4_size); + dscene->attributes_float.alloc(attr_float_size); + dscene->attributes_float3.alloc(attr_float3_size); + dscene->attributes_uchar4.alloc(attr_uchar4_size); size_t attr_float_offset = 0; size_t attr_float3_offset = 0; @@ -1617,13 +1617,13 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene, progress.set_status("Updating Mesh", "Copying Attributes to device"); if(dscene->attributes_float.size()) { - device->tex_alloc(dscene->attributes_float); + dscene->attributes_float.copy_to_device(); } if(dscene->attributes_float3.size()) { - device->tex_alloc(dscene->attributes_float3); + dscene->attributes_float3.copy_to_device(); } if(dscene->attributes_uchar4.size()) { - device->tex_alloc(dscene->attributes_uchar4); + dscene->attributes_uchar4.copy_to_device(); } } @@ -1671,7 +1671,7 @@ void MeshManager::mesh_calc_offset(Scene *scene) } } -void MeshManager::device_update_mesh(Device *device, +void MeshManager::device_update_mesh(Device *, DeviceScene *dscene, Scene *scene, bool for_displacement, @@ -1732,11 +1732,11 @@ void MeshManager::device_update_mesh(Device *device, /* normals */ progress.set_status("Updating Mesh", "Computing normals"); - uint *tri_shader = dscene->tri_shader.resize(tri_size); - float4 *vnormal = dscene->tri_vnormal.resize(vert_size); - uint4 *tri_vindex = dscene->tri_vindex.resize(tri_size); - uint *tri_patch = dscene->tri_patch.resize(tri_size); - float2 *tri_patch_uv = dscene->tri_patch_uv.resize(vert_size); + uint *tri_shader = dscene->tri_shader.alloc(tri_size); + float4 *vnormal = dscene->tri_vnormal.alloc(vert_size); + uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size); + uint *tri_patch = dscene->tri_patch.alloc(tri_size); + float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size); foreach(Mesh *mesh, scene->meshes) { mesh->pack_normals(scene, @@ -1754,32 +1754,32 @@ void MeshManager::device_update_mesh(Device *device, /* vertex coordinates */ progress.set_status("Updating Mesh", "Copying Mesh to device"); - device->tex_alloc(dscene->tri_shader); - device->tex_alloc(dscene->tri_vnormal); - device->tex_alloc(dscene->tri_vindex); - device->tex_alloc(dscene->tri_patch); - device->tex_alloc(dscene->tri_patch_uv); + dscene->tri_shader.copy_to_device(); + dscene->tri_vnormal.copy_to_device(); + dscene->tri_vindex.copy_to_device(); + dscene->tri_patch.copy_to_device(); + dscene->tri_patch_uv.copy_to_device(); } if(curve_size != 0) { progress.set_status("Updating Mesh", "Copying Strands to device"); - float4 *curve_keys = dscene->curve_keys.resize(curve_key_size); - float4 *curves = dscene->curves.resize(curve_size); + float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size); + float4 *curves = dscene->curves.alloc(curve_size); foreach(Mesh *mesh, scene->meshes) { mesh->pack_curves(scene, &curve_keys[mesh->curvekey_offset], &curves[mesh->curve_offset], mesh->curvekey_offset); if(progress.get_cancel()) return; } - device->tex_alloc(dscene->curve_keys); - device->tex_alloc(dscene->curves); + dscene->curve_keys.copy_to_device(); + dscene->curves.copy_to_device(); } if(patch_size != 0) { progress.set_status("Updating Mesh", "Copying Patches to device"); - uint *patch_data = dscene->patches.resize(patch_size); + uint *patch_data = dscene->patches.alloc(patch_size); foreach(Mesh *mesh, scene->meshes) { mesh->pack_patches(&patch_data[mesh->patch_offset], mesh->vert_offset, mesh->face_offset, mesh->corner_offset); @@ -1791,11 +1791,11 @@ void MeshManager::device_update_mesh(Device *device, if(progress.get_cancel()) return; } - device->tex_alloc(dscene->patches); + dscene->patches.copy_to_device(); } if(for_displacement) { - float4 *prim_tri_verts = dscene->prim_tri_verts.resize(tri_size * 3); + float4 *prim_tri_verts = dscene->prim_tri_verts.alloc(tri_size * 3); foreach(Mesh *mesh, scene->meshes) { for(size_t i = 0; i < mesh->num_triangles(); ++i) { Mesh::Triangle t = mesh->get_triangle(i); @@ -1805,7 +1805,7 @@ void MeshManager::device_update_mesh(Device *device, prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]); } } - device->tex_alloc(dscene->prim_tri_verts); + dscene->prim_tri_verts.copy_to_device(); } } @@ -1841,43 +1841,43 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene * if(pack.nodes.size()) { dscene->bvh_nodes.steal_data(pack.nodes); - device->tex_alloc(dscene->bvh_nodes); + dscene->bvh_nodes.copy_to_device(); } if(pack.leaf_nodes.size()) { dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes); - device->tex_alloc(dscene->bvh_leaf_nodes); + dscene->bvh_leaf_nodes.copy_to_device(); } if(pack.object_node.size()) { dscene->object_node.steal_data(pack.object_node); - device->tex_alloc(dscene->object_node); + dscene->object_node.copy_to_device(); } if(pack.prim_tri_index.size()) { dscene->prim_tri_index.steal_data(pack.prim_tri_index); - device->tex_alloc(dscene->prim_tri_index); + dscene->prim_tri_index.copy_to_device(); } if(pack.prim_tri_verts.size()) { dscene->prim_tri_verts.steal_data(pack.prim_tri_verts); - device->tex_alloc(dscene->prim_tri_verts); + dscene->prim_tri_verts.copy_to_device(); } if(pack.prim_type.size()) { dscene->prim_type.steal_data(pack.prim_type); - device->tex_alloc(dscene->prim_type); + dscene->prim_type.copy_to_device(); } if(pack.prim_visibility.size()) { dscene->prim_visibility.steal_data(pack.prim_visibility); - device->tex_alloc(dscene->prim_visibility); + dscene->prim_visibility.copy_to_device(); } if(pack.prim_index.size()) { dscene->prim_index.steal_data(pack.prim_index); - device->tex_alloc(dscene->prim_index); + dscene->prim_index.copy_to_device(); } if(pack.prim_object.size()) { dscene->prim_object.steal_data(pack.prim_object); - device->tex_alloc(dscene->prim_object); + dscene->prim_object.copy_to_device(); } if(pack.prim_time.size()) { dscene->prim_time.steal_data(pack.prim_time); - device->tex_alloc(dscene->prim_time); + dscene->prim_time.copy_to_device(); } dscene->data.bvh.root = pack.root_index; @@ -2142,51 +2142,28 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen void MeshManager::device_free(Device *device, DeviceScene *dscene) { - device->tex_free(dscene->bvh_nodes); - device->tex_free(dscene->bvh_leaf_nodes); - device->tex_free(dscene->object_node); - device->tex_free(dscene->prim_tri_verts); - device->tex_free(dscene->prim_tri_index); - device->tex_free(dscene->prim_type); - device->tex_free(dscene->prim_visibility); - device->tex_free(dscene->prim_index); - device->tex_free(dscene->prim_object); - device->tex_free(dscene->prim_time); - device->tex_free(dscene->tri_shader); - device->tex_free(dscene->tri_vnormal); - device->tex_free(dscene->tri_vindex); - device->tex_free(dscene->tri_patch); - device->tex_free(dscene->tri_patch_uv); - device->tex_free(dscene->curves); - device->tex_free(dscene->curve_keys); - device->tex_free(dscene->patches); - device->tex_free(dscene->attributes_map); - device->tex_free(dscene->attributes_float); - device->tex_free(dscene->attributes_float3); - device->tex_free(dscene->attributes_uchar4); - - dscene->bvh_nodes.clear(); - dscene->bvh_leaf_nodes.clear(); - dscene->object_node.clear(); - dscene->prim_tri_verts.clear(); - dscene->prim_tri_index.clear(); - dscene->prim_type.clear(); - dscene->prim_visibility.clear(); - dscene->prim_index.clear(); - dscene->prim_object.clear(); - dscene->prim_time.clear(); - dscene->tri_shader.clear(); - dscene->tri_vnormal.clear(); - dscene->tri_vindex.clear(); - dscene->tri_patch.clear(); - dscene->tri_patch_uv.clear(); - dscene->curves.clear(); - dscene->curve_keys.clear(); - dscene->patches.clear(); - dscene->attributes_map.clear(); - dscene->attributes_float.clear(); - dscene->attributes_float3.clear(); - dscene->attributes_uchar4.clear(); + dscene->bvh_nodes.free(); + dscene->bvh_leaf_nodes.free(); + dscene->object_node.free(); + dscene->prim_tri_verts.free(); + dscene->prim_tri_index.free(); + dscene->prim_type.free(); + dscene->prim_visibility.free(); + dscene->prim_index.free(); + dscene->prim_object.free(); + dscene->prim_time.free(); + dscene->tri_shader.free(); + dscene->tri_vnormal.free(); + dscene->tri_vindex.free(); + dscene->tri_patch.free(); + dscene->tri_patch_uv.free(); + dscene->curves.free(); + dscene->curve_keys.free(); + dscene->patches.free(); + dscene->attributes_map.free(); + dscene->attributes_float.free(); + dscene->attributes_float3.free(); + dscene->attributes_uchar4.free(); #ifdef WITH_OSL OSLGlobals *og = (OSLGlobals*)device->osl_memory(); diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index c06cf86ea9c..ab3ae40d931 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -65,7 +65,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me const size_t num_verts = mesh->verts.size(); vector done(num_verts, false); device_vector d_input(device, "displace_input", MEM_READ_ONLY); - uint4 *d_input_data = d_input.resize(num_verts); + uint4 *d_input_data = d_input.alloc(num_verts); size_t d_input_size = 0; size_t num_triangles = mesh->num_triangles(); @@ -116,16 +116,13 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me /* run device task */ device_vector d_output(device, "displace_output", MEM_WRITE_ONLY); - d_output.resize(d_input_size); + d_output.alloc(d_input_size); + d_output.zero_to_device(); + d_input.copy_to_device(); /* needs to be up to data for attribute access */ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc(d_input); - device->mem_copy_to(d_input); - device->mem_alloc(d_output); - device->mem_zero(d_output); - DeviceTask task(DeviceTask::SHADER); task.shader_input = d_input.device_pointer; task.shader_output = d_output.device_pointer; @@ -139,14 +136,13 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me device->task_wait(); if(progress.get_cancel()) { - device->mem_free(d_input); - device->mem_free(d_output); + d_input.free(); + d_output.free(); return false; } - device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); - device->mem_free(d_input); - device->mem_free(d_output); + d_output.copy_from_device(0, 1, d_output.size()); + d_input.free(); /* read result */ done.clear(); @@ -183,6 +179,8 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me } } + d_output.free(); + /* for displacement method both, we only need to recompute the face * normals, as bump mapping in the shader will already alter the * vertex normal, so we start from the non-displaced vertex normals diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index daa872239ce..57e44861e40 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -488,9 +488,9 @@ void ObjectManager::device_update_transforms(Device *device, state.queue_start_object = 0; state.object_flag = object_flag; - state.objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size()); + state.objects = dscene->objects.alloc(OBJECT_SIZE*scene->objects.size()); if(state.need_motion == Scene::MOTION_PASS) { - state.objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size()); + state.objects_vector = dscene->objects_vector.alloc(OBJECT_VECTOR_SIZE*scene->objects.size()); } else { state.objects_vector = NULL; @@ -534,9 +534,9 @@ void ObjectManager::device_update_transforms(Device *device, } } - device->tex_alloc(dscene->objects); + dscene->objects.copy_to_device(); if(state.need_motion == Scene::MOTION_PASS) { - device->tex_alloc(dscene->objects_vector); + dscene->objects_vector.copy_to_device(); } dscene->data.bvh.have_motion = state.have_motion; @@ -557,7 +557,7 @@ void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *sc return; /* object info flag */ - uint *object_flag = dscene->object_flag.resize(scene->objects.size()); + uint *object_flag = dscene->object_flag.alloc(scene->objects.size()); /* set object transform matrices, before applying static transforms */ progress.set_status("Updating Objects", "Copying Transformations to device"); @@ -573,7 +573,7 @@ void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *sc } } -void ObjectManager::device_update_flags(Device *device, +void ObjectManager::device_update_flags(Device *, DeviceScene *dscene, Scene *scene, Progress& /*progress*/, @@ -638,10 +638,10 @@ void ObjectManager::device_update_flags(Device *device, } /* allocate object flag */ - device->tex_alloc(dscene->object_flag); + dscene->object_flag.copy_to_device(); } -void ObjectManager::device_update_patch_map_offsets(Device *device, DeviceScene *dscene, Scene *scene) +void ObjectManager::device_update_patch_map_offsets(Device *, DeviceScene *dscene, Scene *scene) { if(scene->objects.size() == 0) { return; @@ -671,21 +671,15 @@ void ObjectManager::device_update_patch_map_offsets(Device *device, DeviceScene } if(update) { - device->tex_free(dscene->objects); - device->tex_alloc(dscene->objects); + dscene->objects.copy_to_device(); } } -void ObjectManager::device_free(Device *device, DeviceScene *dscene) +void ObjectManager::device_free(Device *, DeviceScene *dscene) { - device->tex_free(dscene->objects); - dscene->objects.clear(); - - device->tex_free(dscene->objects_vector); - dscene->objects_vector.clear(); - - device->tex_free(dscene->object_flag); - dscene->object_flag.clear(); + dscene->objects.free(); + dscene->objects_vector.free(); + dscene->object_flag.free(); } void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress) diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp index a84ca51f274..06ff45b09bd 100644 --- a/intern/cycles/render/particles.cpp +++ b/intern/cycles/render/particles.cpp @@ -52,7 +52,7 @@ ParticleSystemManager::~ParticleSystemManager() { } -void ParticleSystemManager::device_update_particles(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) +void ParticleSystemManager::device_update_particles(Device *, DeviceScene *dscene, Scene *scene, Progress& progress) { /* count particles. * adds one dummy particle at the beginning to avoid invalid lookups, @@ -61,7 +61,7 @@ void ParticleSystemManager::device_update_particles(Device *device, DeviceScene for(size_t j = 0; j < scene->particle_systems.size(); j++) num_particles += scene->particle_systems[j]->particles.size(); - float4 *particles = dscene->particles.resize(PARTICLE_SIZE*num_particles); + float4 *particles = dscene->particles.alloc(PARTICLE_SIZE*num_particles); /* dummy particle */ particles[0] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); @@ -91,7 +91,7 @@ void ParticleSystemManager::device_update_particles(Device *device, DeviceScene } } - device->tex_alloc(dscene->particles); + dscene->particles.copy_to_device(); } void ParticleSystemManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) @@ -112,10 +112,9 @@ void ParticleSystemManager::device_update(Device *device, DeviceScene *dscene, S need_update = false; } -void ParticleSystemManager::device_free(Device *device, DeviceScene *dscene) +void ParticleSystemManager::device_free(Device *, DeviceScene *dscene) { - device->tex_free(dscene->particles); - dscene->particles.clear(); + dscene->particles.free(); } void ParticleSystemManager::tag_update(Scene * /*scene*/) diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index e362a35471d..260a325206c 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -41,40 +41,40 @@ CCL_NAMESPACE_BEGIN DeviceScene::DeviceScene(Device *device) -: bvh_nodes(device, "__bvh_nodes"), - bvh_leaf_nodes(device, "__bvh_leaf_nodes"), - object_node(device, "__object_node"), - prim_tri_index(device, "__prim_tri_index"), - prim_tri_verts(device, "__prim_tri_verts"), - prim_type(device, "__prim_type"), - prim_visibility(device, "__prim_visibility"), - prim_index(device, "__prim_index"), - prim_object(device, "__prim_object"), - prim_time(device, "__prim_time"), - tri_shader(device, "__tri_shader"), - tri_vnormal(device, "__tri_vnormal"), - tri_vindex(device, "__tri_vindex"), - tri_patch(device, "__tri_patch"), - tri_patch_uv(device, "__tri_patch_uv"), - curves(device, "__curves"), - curve_keys(device, "__curve_keys"), - patches(device, "__patches"), - objects(device, "__objects"), - objects_vector(device, "__objects_vector"), - attributes_map(device, "__attributes_map"), - attributes_float(device, "__attributes_float"), - attributes_float3(device, "__attributes_float3"), - attributes_uchar4(device, "__attributes_uchar4"), - light_distribution(device, "__light_distribution"), - light_data(device, "__light_data"), - light_background_marginal_cdf(device, "__light_background_marginal_cdf"), - light_background_conditional_cdf(device, "__light_background_conditional_cdf"), - particles(device, "__particles"), - svm_nodes(device, "__svm_nodes"), - shader_flag(device, "__shader_flag"), - object_flag(device, "__object_flag"), - lookup_table(device, "__lookup_table"), - sobol_directions(device, "__sobol_directions") +: bvh_nodes(device, "__bvh_nodes", MEM_TEXTURE), + bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_TEXTURE), + object_node(device, "__object_node", MEM_TEXTURE), + prim_tri_index(device, "__prim_tri_index", MEM_TEXTURE), + prim_tri_verts(device, "__prim_tri_verts", MEM_TEXTURE), + prim_type(device, "__prim_type", MEM_TEXTURE), + prim_visibility(device, "__prim_visibility", MEM_TEXTURE), + prim_index(device, "__prim_index", MEM_TEXTURE), + prim_object(device, "__prim_object", MEM_TEXTURE), + prim_time(device, "__prim_time", MEM_TEXTURE), + tri_shader(device, "__tri_shader", MEM_TEXTURE), + tri_vnormal(device, "__tri_vnormal", MEM_TEXTURE), + tri_vindex(device, "__tri_vindex", MEM_TEXTURE), + tri_patch(device, "__tri_patch", MEM_TEXTURE), + tri_patch_uv(device, "__tri_patch_uv", MEM_TEXTURE), + curves(device, "__curves", MEM_TEXTURE), + curve_keys(device, "__curve_keys", MEM_TEXTURE), + patches(device, "__patches", MEM_TEXTURE), + objects(device, "__objects", MEM_TEXTURE), + objects_vector(device, "__objects_vector", MEM_TEXTURE), + attributes_map(device, "__attributes_map", MEM_TEXTURE), + attributes_float(device, "__attributes_float", MEM_TEXTURE), + attributes_float3(device, "__attributes_float3", MEM_TEXTURE), + attributes_uchar4(device, "__attributes_uchar4", MEM_TEXTURE), + light_distribution(device, "__light_distribution", MEM_TEXTURE), + light_data(device, "__light_data", MEM_TEXTURE), + light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_TEXTURE), + light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_TEXTURE), + particles(device, "__particles", MEM_TEXTURE), + svm_nodes(device, "__svm_nodes", MEM_TEXTURE), + shader_flag(device, "__shader_flag", MEM_TEXTURE), + object_flag(device, "__object_flag", MEM_TEXTURE), + lookup_table(device, "__lookup_table", MEM_TEXTURE), + sobol_directions(device, "__sobol_directions", MEM_TEXTURE) { memset(&data, 0, sizeof(data)); } diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 4642dcfa9a1..74cfd02e1a4 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -106,11 +106,11 @@ Session::~Session() delete display; display = new DisplayBuffer(device, false); - display->reset(device, buffers->params); + display->reset(buffers->params); tonemap(params.samples); progress.set_status("Writing Image", params.output_path); - display->write(device, params.output_path); + display->write(params.output_path); } /* clean up */ @@ -399,7 +399,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) /* allocate buffers */ tile->buffers = new RenderBuffers(tile_device); - tile->buffers->reset(tile_device, buffer_params); + tile->buffers->reset(buffer_params); } tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride); @@ -756,9 +756,9 @@ void Session::reset_(BufferParams& buffer_params, int samples) { if(buffers && buffer_params.modified(tile_manager.params)) { gpu_draw_ready = false; - buffers->reset(device, buffer_params); + buffers->reset(buffer_params); if(display) { - display->reset(device, buffer_params); + display->reset(buffer_params); } } @@ -923,7 +923,7 @@ void Session::render() { /* Clear buffers. */ if(buffers && tile_manager.state.sample == tile_manager.range_start_sample) { - buffers->zero(device); + buffers->zero(); } /* Add path trace task. */ diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index a77df55e520..70f6d5bab47 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -416,14 +416,13 @@ void ShaderManager::device_update_common(Device *device, Scene *scene, Progress& /*progress*/) { - device->tex_free(dscene->shader_flag); - dscene->shader_flag.clear(); + dscene->shader_flag.free(); if(scene->shaders.size() == 0) return; uint shader_flag_size = scene->shaders.size()*SHADER_SIZE; - uint *shader_flag = dscene->shader_flag.resize(shader_flag_size); + uint *shader_flag = dscene->shader_flag.alloc(shader_flag_size); uint i = 0; bool has_volumes = false; bool has_transparent_shadow = false; @@ -479,7 +478,7 @@ void ShaderManager::device_update_common(Device *device, has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; } - device->tex_alloc(dscene->shader_flag); + dscene->shader_flag.copy_to_device(); /* lookup tables */ KernelTables *ktables = &dscene->data.tables; @@ -504,12 +503,11 @@ void ShaderManager::device_update_common(Device *device, kintegrator->transparent_shadows = has_transparent_shadow; } -void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scene *scene) +void ShaderManager::device_free_common(Device *, DeviceScene *dscene, Scene *scene) { scene->lookup_tables->remove_table(&beckmann_table_offset); - device->tex_free(dscene->shader_flag); - dscene->shader_flag.clear(); + dscene->shader_flag.free(); } void ShaderManager::add_default(Scene *scene) diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index cf0dc97ef3f..db53e366d1e 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -130,7 +130,7 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene } dscene->svm_nodes.steal_data(svm_nodes); - device->tex_alloc(dscene->svm_nodes); + dscene->svm_nodes.copy_to_device(); for(i = 0; i < scene->shaders.size(); i++) { Shader *shader = scene->shaders[i]; @@ -150,8 +150,7 @@ void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s { device_free_common(device, dscene, scene); - device->tex_free(dscene->svm_nodes); - dscene->svm_nodes.clear(); + dscene->svm_nodes.free(); } /* Graph Compiler */ diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp index 9d04778abc6..5cda977b7f1 100644 --- a/intern/cycles/render/tables.cpp +++ b/intern/cycles/render/tables.cpp @@ -35,25 +35,22 @@ LookupTables::~LookupTables() assert(lookup_tables.size() == 0); } -void LookupTables::device_update(Device *device, DeviceScene *dscene) +void LookupTables::device_update(Device *, DeviceScene *dscene) { if(!need_update) return; VLOG(1) << "Total " << lookup_tables.size() << " lookup tables."; - device->tex_free(dscene->lookup_table); - if(lookup_tables.size() > 0) - device->tex_alloc(dscene->lookup_table); + dscene->lookup_table.copy_to_device(); need_update = false; } -void LookupTables::device_free(Device *device, DeviceScene *dscene) +void LookupTables::device_free(Device *, DeviceScene *dscene) { - device->tex_free(dscene->lookup_table); - dscene->lookup_table.clear(); + dscene->lookup_table.free(); } static size_t round_up_to_multiple(size_t size, size_t chunk) diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h index 9e74505b14a..ca6b56c9c7e 100644 --- a/intern/cycles/util/util_vector.h +++ b/intern/cycles/util/util_vector.h @@ -177,6 +177,14 @@ public: } } + T *steal_pointer() + { + T *ptr = data_; + data_ = NULL; + clear(); + return ptr; + } + T* resize(size_t newsize) { if(newsize == 0) { -- cgit v1.2.3