From 26bea849cfa1d020150e0862002d7d5463f07817 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Thu, 12 Mar 2020 15:22:18 +0100 Subject: Cleanup: add device_texture for images, distinct from other global memory There was too much image texture specific stuff in device_memory, and too much code duplication between devices. --- intern/cycles/device/cuda/device_cuda.h | 8 +- intern/cycles/device/cuda/device_cuda_impl.cpp | 84 +++++++------- intern/cycles/device/device_cpu.cpp | 86 ++++++++------- intern/cycles/device/device_memory.cpp | 94 +++++++++++++++- intern/cycles/device/device_memory.h | 42 ++++++- intern/cycles/device/opencl/device_opencl.h | 6 +- intern/cycles/device/opencl/device_opencl_impl.cpp | 76 ++++++++----- intern/cycles/kernel/kernel.h | 2 +- intern/cycles/kernel/kernels/cpu/kernel.cpp | 2 +- intern/cycles/render/image.cpp | 122 +++++---------------- intern/cycles/render/image.h | 8 +- intern/cycles/render/scene.cpp | 76 ++++++------- release/datafiles/locale | 2 +- release/scripts/addons | 2 +- release/scripts/addons_contrib | 2 +- source/tools | 2 +- 16 files changed, 352 insertions(+), 262 deletions(-) diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h index 6a0b39434aa..3e397da895b 100644 --- a/intern/cycles/device/cuda/device_cuda.h +++ b/intern/cycles/device/cuda/device_cuda.h @@ -155,9 +155,13 @@ class CUDADevice : public Device { virtual void const_copy_to(const char *name, void *host, size_t size); - void tex_alloc(device_memory &mem); + void global_alloc(device_memory &mem); - void tex_free(device_memory &mem); + void global_free(device_memory &mem); + + void tex_alloc(device_texture &mem); + + void tex_free(device_texture &mem); bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index 4df1ca2097a..a4e7bc2f64a 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -185,7 +185,7 @@ void CUDADevice::cuda_error_message(const string &message) } CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_) - : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_TEXTURE) + : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_GLOBAL) { first_error = true; background = background_; @@ -684,7 +684,8 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture) device_memory &mem = *pair.first; CUDAMem *cmem = &pair.second; - bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); + bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && + (&mem != &texture_info); bool is_image = is_texture && (mem.data_height > 1); /* Can't move this type of memory. */ @@ -724,8 +725,7 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture) device_ptr prev_pointer = max_mem->device_pointer; size_t prev_size = max_mem->device_size; - tex_free(*max_mem); - tex_alloc(*max_mem); + mem_copy_to(*max_mem); size = (max_size >= size) ? 0 : size - max_size; max_mem->device_pointer = prev_pointer; @@ -759,7 +759,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ * If there is not enough room for working memory, we will try to move * textures to host memory, assuming the performance impact would have * been worse for working memory. */ - bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); + bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info); bool is_image = is_texture && (mem.data_height > 1); size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom; @@ -922,6 +922,9 @@ void CUDADevice::mem_alloc(device_memory &mem) else if (mem.type == MEM_TEXTURE) { assert(!"mem_alloc not supported for textures."); } + else if (mem.type == MEM_GLOBAL) { + assert(!"mem_alloc not supported for global memory."); + } else { generic_alloc(mem); } @@ -932,9 +935,13 @@ void CUDADevice::mem_copy_to(device_memory &mem) if (mem.type == MEM_PIXELS) { assert(!"mem_copy_to not supported for pixels."); } + else if (mem.type == MEM_GLOBAL) { + global_free(mem); + global_alloc(mem); + } else if (mem.type == MEM_TEXTURE) { - tex_free(mem); - tex_alloc(mem); + tex_free((device_texture &)mem); + tex_alloc((device_texture &)mem); } else { if (!mem.device_pointer) { @@ -950,7 +957,7 @@ void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem if (mem.type == MEM_PIXELS && !background) { pixels_copy_from(mem, y, w, h); } - else if (mem.type == MEM_TEXTURE) { + else if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) { assert(!"mem_copy_from not supported for textures."); } else if (mem.host_pointer) { @@ -993,8 +1000,11 @@ void CUDADevice::mem_free(device_memory &mem) if (mem.type == MEM_PIXELS && !background) { pixels_free(mem); } + else if (mem.type == MEM_GLOBAL) { + global_free(mem); + } else if (mem.type == MEM_TEXTURE) { - tex_free(mem); + tex_free((device_texture &)mem); } else { generic_free(mem); @@ -1017,7 +1027,25 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size) cuda_assert(cuMemcpyHtoD(mem, host, size)); } -void CUDADevice::tex_alloc(device_memory &mem) +void CUDADevice::global_alloc(device_memory &mem) +{ + CUDAContextScope scope(this); + + generic_alloc(mem); + generic_copy_to(mem); + + const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer)); +} + +void CUDADevice::global_free(device_memory &mem) +{ + if (mem.device_pointer) { + CUDAContextScope scope(this); + generic_free(mem); + } +} + +void CUDADevice::tex_alloc(device_texture &mem) { CUDAContextScope scope(this); @@ -1027,7 +1055,7 @@ void CUDADevice::tex_alloc(device_memory &mem) size_t size = mem.memory_size(); CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP; - switch (mem.extension) { + switch (mem.info.extension) { case EXTENSION_REPEAT: address_mode = CU_TR_ADDRESS_MODE_WRAP; break; @@ -1043,22 +1071,13 @@ void CUDADevice::tex_alloc(device_memory &mem) } CUfilter_mode filter_mode; - if (mem.interpolation == INTERPOLATION_CLOSEST) { + if (mem.info.interpolation == INTERPOLATION_CLOSEST) { filter_mode = CU_TR_FILTER_MODE_POINT; } else { filter_mode = CU_TR_FILTER_MODE_LINEAR; } - /* Data Storage */ - if (mem.interpolation == INTERPOLATION_NONE) { - generic_alloc(mem); - generic_copy_to(mem); - - const_copy_to(bind_name.c_str(), &mem.device_pointer, sizeof(mem.device_pointer)); - return; - } - /* Image Texture Storage */ CUarray_format_enum format; switch (mem.data_type) { @@ -1169,15 +1188,6 @@ void CUDADevice::tex_alloc(device_memory &mem) } /* Kepler+, bindless textures. */ - int slot = 0; - if (string_startswith(mem.name, "__tex_image")) { - int pos = string(mem.name).rfind("_"); - slot = atoi(mem.name + pos + 1); - } - else { - assert(0); - } - CUDA_RESOURCE_DESC resDesc; memset(&resDesc, 0, sizeof(resDesc)); @@ -1214,6 +1224,7 @@ void CUDADevice::tex_alloc(device_memory &mem) cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); /* Resize once */ + const uint slot = mem.slot; if (slot >= texture_info.size()) { /* Allocate some slots in advance, to reduce amount * of re-allocations. */ @@ -1221,19 +1232,12 @@ void CUDADevice::tex_alloc(device_memory &mem) } /* Set Mapping and tag that we need to (re-)upload to device */ - TextureInfo &info = texture_info[slot]; - info.data = (uint64_t)cmem->texobject; - info.data_type = mem.image_data_type; - info.cl_buffer = 0; - info.interpolation = mem.interpolation; - info.extension = mem.extension; - info.width = mem.data_width; - info.height = mem.data_height; - info.depth = mem.data_depth; + texture_info[slot] = mem.info; + texture_info[slot].data = (uint64_t)cmem->texobject; need_texture_info = true; } -void CUDADevice::tex_free(device_memory &mem) +void CUDADevice::tex_free(device_texture &mem) { if (mem.device_pointer) { CUDAContextScope scope(this); diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 56569a5ee3d..57e8523e02a 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -264,7 +264,7 @@ class CPUDevice : public Device { CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) : Device(info_, stats_, profiler_, background_), - texture_info(this, "__texture_info", MEM_TEXTURE), + texture_info(this, "__texture_info", MEM_GLOBAL), #define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name)) REGISTER_KERNEL(path_trace), REGISTER_KERNEL(convert_to_half_float), @@ -372,6 +372,9 @@ class CPUDevice : public Device { if (mem.type == MEM_TEXTURE) { assert(!"mem_alloc not supported for textures."); } + else if (mem.type == MEM_GLOBAL) { + assert(!"mem_alloc not supported for global memory."); + } else { if (mem.name) { VLOG(1) << "Buffer allocate: " << mem.name << ", " @@ -396,9 +399,13 @@ class CPUDevice : public Device { void mem_copy_to(device_memory &mem) { - if (mem.type == MEM_TEXTURE) { - tex_free(mem); - tex_alloc(mem); + if (mem.type == MEM_GLOBAL) { + global_free(mem); + global_alloc(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + tex_alloc((device_texture &)mem); } else if (mem.type == MEM_PIXELS) { assert(!"mem_copy_to not supported for pixels."); @@ -430,8 +437,11 @@ class CPUDevice : public Device { void mem_free(device_memory &mem) { - if (mem.type == MEM_TEXTURE) { - tex_free(mem); + if (mem.type == MEM_GLOBAL) { + global_free(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); } else if (mem.device_pointer) { if (mem.type == MEM_DEVICE_ONLY) { @@ -453,52 +463,50 @@ class CPUDevice : public Device { kernel_const_copy(&kernel_globals, name, host, size); } - void tex_alloc(device_memory &mem) + void global_alloc(device_memory &mem) { - VLOG(1) << "Texture allocate: " << mem.name << ", " + VLOG(1) << "Global memory allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - if (mem.interpolation == INTERPOLATION_NONE) { - /* Data texture. */ - kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size); - } - else { - /* Image Texture. */ - int slot = 0; - if (string_startswith(mem.name, "__tex_image")) { - int pos = string(mem.name).rfind("_"); - slot = atoi(mem.name + pos + 1); - } - else { - assert(0); - } - - if (slot >= texture_info.size()) { - /* Allocate some slots in advance, to reduce amount - * of re-allocations. */ - texture_info.resize(slot + 128); - } + kernel_global_memory_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size); - TextureInfo &info = texture_info[slot]; - info.data = (uint64_t)mem.host_pointer; - info.data_type = mem.image_data_type; - info.cl_buffer = 0; - info.interpolation = mem.interpolation; - info.extension = mem.extension; - info.width = mem.data_width; - info.height = mem.data_height; - info.depth = mem.data_depth; + mem.device_pointer = (device_ptr)mem.host_pointer; + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); + } - need_texture_info = true; + void global_free(device_memory &mem) + { + if (mem.device_pointer) { + mem.device_pointer = 0; + stats.mem_free(mem.device_size); + mem.device_size = 0; } + } + + void tex_alloc(device_texture &mem) + { + VLOG(1) << "Texture allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; mem.device_pointer = (device_ptr)mem.host_pointer; mem.device_size = mem.memory_size(); stats.mem_alloc(mem.device_size); + + const uint slot = mem.slot; + if (slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount of re-allocations. */ + texture_info.resize(slot + 128); + } + + texture_info[slot] = mem.info; + texture_info[slot].data = (uint64_t)mem.host_pointer; + need_texture_info = true; } - void tex_free(device_memory &mem) + void tex_free(device_texture &mem) { if (mem.device_pointer) { mem.device_pointer = 0; diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp index f22b91f3fa1..36a0247bb3d 100644 --- a/intern/cycles/device/device_memory.cpp +++ b/intern/cycles/device/device_memory.cpp @@ -31,9 +31,6 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type) data_depth(0), type(type), name(name), - image_data_type(IMAGE_DATA_NUM_TYPES), - interpolation(INTERPOLATION_NONE), - extension(EXTENSION_REPEAT), device(device), device_pointer(0), host_pointer(0), @@ -77,7 +74,7 @@ void device_memory::host_free() void device_memory::device_alloc() { - assert(!device_pointer && type != MEM_TEXTURE); + assert(!device_pointer && type != MEM_TEXTURE && type != MEM_GLOBAL); device->mem_alloc(*this); } @@ -97,7 +94,7 @@ void device_memory::device_copy_to() void device_memory::device_copy_from(int y, int w, int h, int elem) { - assert(type != MEM_TEXTURE && type != MEM_READ_ONLY); + assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL); device->mem_copy_from(*this, y, w, h, elem); } @@ -140,4 +137,91 @@ device_sub_ptr::~device_sub_ptr() device->mem_free_sub_ptr(ptr); } +/* Device Texture */ + +device_texture::device_texture(Device *device, + const char *name, + const uint slot, + ImageDataType image_data_type, + InterpolationType interpolation, + ExtensionType extension) + : device_memory(device, name, MEM_TEXTURE), slot(slot) +{ + switch (image_data_type) { + case IMAGE_DATA_TYPE_FLOAT4: + data_type = TYPE_FLOAT; + data_elements = 4; + break; + case IMAGE_DATA_TYPE_FLOAT: + data_type = TYPE_FLOAT; + data_elements = 1; + break; + case IMAGE_DATA_TYPE_BYTE4: + data_type = TYPE_UCHAR; + data_elements = 4; + break; + case IMAGE_DATA_TYPE_BYTE: + data_type = TYPE_UCHAR; + data_elements = 1; + break; + case IMAGE_DATA_TYPE_HALF4: + data_type = TYPE_HALF; + data_elements = 4; + break; + case IMAGE_DATA_TYPE_HALF: + data_type = TYPE_HALF; + data_elements = 1; + break; + case IMAGE_DATA_TYPE_USHORT4: + data_type = TYPE_UINT16; + data_elements = 4; + break; + case IMAGE_DATA_TYPE_USHORT: + data_type = TYPE_UINT16; + data_elements = 1; + break; + case IMAGE_DATA_NUM_TYPES: + assert(0); + return; + } + + memset(&info, 0, sizeof(info)); + info.data_type = image_data_type; + info.interpolation = interpolation; + info.extension = extension; +} + +device_texture::~device_texture() +{ +} + +/* Host memory allocation. */ +void *device_texture::alloc(const size_t width, const size_t height, const size_t depth) +{ + const size_t new_size = size(width, height, depth); + + if (new_size != data_size) { + device_free(); + host_free(); + host_pointer = host_alloc(data_elements * datatype_size(data_type) * new_size); + assert(device_pointer == 0); + } + + data_size = new_size; + data_width = width; + data_height = height; + data_depth = depth; + + info.width = width; + info.height = height; + info.depth = depth; + + return host_pointer; +} + +void device_texture::copy_to_device() +{ + device_copy_to(); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 617cc0c4342..1c20db900bc 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -23,6 +23,7 @@ #include "util/util_array.h" #include "util/util_half.h" +#include "util/util_string.h" #include "util/util_texture.h" #include "util/util_types.h" #include "util/util_vector.h" @@ -31,7 +32,14 @@ CCL_NAMESPACE_BEGIN class Device; -enum MemoryType { MEM_READ_ONLY, MEM_READ_WRITE, MEM_DEVICE_ONLY, MEM_TEXTURE, MEM_PIXELS }; +enum MemoryType { + MEM_READ_ONLY, + MEM_READ_WRITE, + MEM_DEVICE_ONLY, + MEM_GLOBAL, + MEM_TEXTURE, + MEM_PIXELS +}; /* Supported Data Types */ @@ -208,9 +216,6 @@ class device_memory { size_t data_depth; MemoryType type; const char *name; - ImageDataType image_data_type; - InterpolationType interpolation; - ExtensionType extension; /* Pointers. */ Device *device; @@ -311,7 +316,7 @@ template class device_only_memory : public device_memory { * in and copied to the device with copy_to_device(). Or alternatively * allocated and set to zero on the device with zero_to_device(). * - * When using memory type MEM_TEXTURE, a pointer to this memory will be + * When using memory type MEM_GLOBAL, a pointer to this memory will be * automatically attached to kernel globals, using the provided name * matching an entry in kernel_textures.h. */ @@ -504,6 +509,33 @@ class device_sub_ptr { device_ptr ptr; }; +/* Device Texture + * + * 2D or 3D image texture memory. */ + +class device_texture : public device_memory { + public: + device_texture(Device *device, + const char *name, + const uint slot, + ImageDataType image_data_type, + InterpolationType interpolation, + ExtensionType extension); + ~device_texture(); + + void *alloc(const size_t width, const size_t height, const size_t depth = 0); + void copy_to_device(); + + uint slot; + TextureInfo info; + + protected: + size_t size(const size_t width, const size_t height, const size_t depth) + { + return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth); + } +}; + CCL_NAMESPACE_END #endif /* __DEVICE_MEMORY_H__ */ diff --git a/intern/cycles/device/opencl/device_opencl.h b/intern/cycles/device/opencl/device_opencl.h index b761726b1ad..53017c34b2b 100644 --- a/intern/cycles/device/opencl/device_opencl.h +++ b/intern/cycles/device/opencl/device_opencl.h @@ -428,8 +428,10 @@ class OpenCLDevice : public Device { int mem_sub_ptr_alignment(); void const_copy_to(const char *name, void *host, size_t size); - void tex_alloc(device_memory &mem); - void tex_free(device_memory &mem); + void global_alloc(device_memory &mem); + void global_free(device_memory &mem); + void tex_alloc(device_texture &mem); + void tex_free(device_texture &mem); size_t global_size_round_up(int group_size, int global_size); void enqueue_kernel(cl_kernel kernel, diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index 09d3b78dd28..6b1b0e4c369 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -613,7 +613,7 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b kernel_programs(this), preview_programs(this), memory_manager(this), - texture_info(this, "__texture_info", MEM_TEXTURE) + texture_info(this, "__texture_info", MEM_GLOBAL) { cpPlatform = NULL; cdDevice = NULL; @@ -945,7 +945,7 @@ void OpenCLDevice::mem_alloc(device_memory &mem) cl_mem_flags mem_flag; void *mem_ptr = NULL; - if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) + if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) mem_flag = CL_MEM_READ_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -969,9 +969,13 @@ void OpenCLDevice::mem_alloc(device_memory &mem) void OpenCLDevice::mem_copy_to(device_memory &mem) { - if (mem.type == MEM_TEXTURE) { - tex_free(mem); - tex_alloc(mem); + if (mem.type == MEM_GLOBAL) { + global_free(mem); + global_alloc(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + tex_alloc((device_texture &)mem); } else { if (!mem.device_pointer) { @@ -1077,8 +1081,11 @@ void OpenCLDevice::mem_zero(device_memory &mem) void OpenCLDevice::mem_free(device_memory &mem) { - if (mem.type == MEM_TEXTURE) { - tex_free(mem); + if (mem.type == MEM_GLOBAL) { + global_free(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); } else { if (mem.device_pointer) { @@ -1101,7 +1108,7 @@ int OpenCLDevice::mem_sub_ptr_alignment() device_ptr OpenCLDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int size) { cl_mem_flags mem_flag; - if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) + if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) mem_flag = CL_MEM_READ_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -1141,9 +1148,9 @@ void OpenCLDevice::const_copy_to(const char *name, void *host, size_t size) data->copy_to_device(); } -void OpenCLDevice::tex_alloc(device_memory &mem) +void OpenCLDevice::global_alloc(device_memory &mem) { - VLOG(1) << "Texture allocate: " << mem.name << ", " + VLOG(1) << "Global memory allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; @@ -1155,7 +1162,7 @@ void OpenCLDevice::tex_alloc(device_memory &mem) textures_need_update = true; } -void OpenCLDevice::tex_free(device_memory &mem) +void OpenCLDevice::global_free(device_memory &mem) { if (mem.device_pointer) { mem.device_pointer = 0; @@ -1173,6 +1180,25 @@ void OpenCLDevice::tex_free(device_memory &mem) } } +void OpenCLDevice::tex_alloc(device_texture &mem) +{ + VLOG(1) << "Texture allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + + memory_manager.alloc(mem.name, mem); + /* Set the pointer to non-null to keep code that inspects its value from thinking its + * unallocated. */ + mem.device_pointer = 1; + textures[mem.name] = &mem; + textures_need_update = true; +} + +void OpenCLDevice::tex_free(device_texture &mem) +{ + global_free(mem); +} + size_t OpenCLDevice::global_size_round_up(int group_size, int global_size) { int r = global_size % group_size; @@ -1273,10 +1299,10 @@ void OpenCLDevice::flush_texture_buffers() foreach (TexturesMap::value_type &tex, textures) { string name = tex.first; + device_memory *mem = tex.second; - if (string_startswith(name, "__tex_image")) { - int pos = name.rfind("_"); - int id = atoi(name.data() + pos + 1); + if (mem->type == MEM_TEXTURE) { + const uint id = ((device_texture *)mem)->slot; texture_slots.push_back(texture_slot_t(name, num_data_slots + id)); num_slots = max(num_slots, num_data_slots + id + 1); } @@ -1289,24 +1315,20 @@ void OpenCLDevice::flush_texture_buffers() /* Fill in descriptors */ foreach (texture_slot_t &slot, texture_slots) { + device_memory *mem = textures[slot.name]; TextureInfo &info = texture_info[slot.slot]; MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name); - info.data = desc.offset; - info.cl_buffer = desc.device_buffer; - - if (string_startswith(slot.name, "__tex_image")) { - device_memory *mem = textures[slot.name]; - info.data_type = mem->image_data_type; - - info.width = mem->data_width; - info.height = mem->data_height; - info.depth = mem->data_depth; - - info.interpolation = mem->interpolation; - info.extension = mem->extension; + if (mem->type == MEM_TEXTURE) { + info = ((device_texture *)mem)->info; + } + else { + memset(&info, 0, sizeof(TextureInfo)); } + + info.data = desc.offset; + info.cl_buffer = desc.device_buffer; } /* Force write of descriptors. */ diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index dfdd8843f29..3debfc76fbb 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -38,7 +38,7 @@ void *kernel_osl_memory(KernelGlobals *kg); bool kernel_osl_use(KernelGlobals *kg); void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size); -void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size); +void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size); #define KERNEL_ARCH cpu #include "kernel/kernels/cpu/kernel_cpu.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index f2146302a27..8829a14ead5 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -72,7 +72,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s assert(0); } -void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size) +void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size) { if (0) { } diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 692f6683e1b..78c854c9261 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -158,7 +158,7 @@ int ImageHandle::svm_slot(const int tile_index) const return tile_slots[tile_index]; } -device_memory *ImageHandle::image_memory(const int tile_index) const +device_texture *ImageHandle::image_memory(const int tile_index) const { if (tile_index >= tile_slots.size()) { return NULL; @@ -438,10 +438,8 @@ static bool image_associate_alpha(ImageManager::Image *img) img->params.alpha_type == IMAGE_ALPHA_CHANNEL_PACKED); } -template -bool ImageManager::file_load_image(Image *img, - int texture_limit, - device_vector &tex_img) +template +bool ImageManager::file_load_image(Image *img, int texture_limit) { /* we only handle certain number of components */ if (!(img->metadata.channels >= 1 && img->metadata.channels <= 4)) { @@ -470,7 +468,7 @@ bool ImageManager::file_load_image(Image *img, } else { thread_scoped_lock device_lock(device_mutex); - pixels = (StorageType *)tex_img.alloc(width, height, depth); + pixels = (StorageType *)img->mem->alloc(width, height, depth); } if (pixels == NULL) { @@ -587,7 +585,7 @@ bool ImageManager::file_load_image(Image *img, { thread_scoped_lock device_lock(device_mutex); - texture_pixels = (StorageType *)tex_img.alloc(scaled_width, scaled_height, scaled_depth); + texture_pixels = (StorageType *)img->mem->alloc(scaled_width, scaled_height, scaled_depth); } memcpy(texture_pixels, &scaled_pixels[0], scaled_pixels.size() * sizeof(StorageType)); @@ -596,14 +594,6 @@ bool ImageManager::file_load_image(Image *img, return true; } -static void image_set_device_memory(ImageManager::Image *img, device_memory *mem) -{ - img->mem = mem; - mem->image_data_type = img->metadata.type; - mem->interpolation = img->params.interpolation; - mem->extension = img->params.extension; -} - void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Progress *progress) { if (progress->get_cancel()) { @@ -619,7 +609,7 @@ void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Pro load_image_metadata(img); ImageDataType type = img->metadata.type; - /* Slot assignment */ + /* Name for debugging. */ img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type), slot); /* Free previous texture in slot. */ @@ -629,154 +619,98 @@ void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Pro img->mem = NULL; } + img->mem = new device_texture( + device, img->mem_name.c_str(), slot, type, img->params.interpolation, img->params.extension); + /* Create new texture. */ if (type == IMAGE_DATA_TYPE_FLOAT4) { - device_vector *tex_img = new device_vector( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image(img, texture_limit, *tex_img)) { + if (!file_load_image(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - float *pixels = (float *)tex_img->alloc(1, 1); + float *pixels = (float *)img->mem->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; pixels[1] = TEX_IMAGE_MISSING_G; pixels[2] = TEX_IMAGE_MISSING_B; pixels[3] = TEX_IMAGE_MISSING_A; } - - image_set_device_memory(img, tex_img); - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_FLOAT) { - device_vector *tex_img = new device_vector( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image(img, texture_limit, *tex_img)) { + if (!file_load_image(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - float *pixels = (float *)tex_img->alloc(1, 1); + float *pixels = (float *)img->mem->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; } - - image_set_device_memory(img, tex_img); - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_BYTE4) { - device_vector *tex_img = new device_vector( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image(img, texture_limit, *tex_img)) { + if (!file_load_image(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - uchar *pixels = (uchar *)tex_img->alloc(1, 1); + uchar *pixels = (uchar *)img->mem->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 255); pixels[1] = (TEX_IMAGE_MISSING_G * 255); pixels[2] = (TEX_IMAGE_MISSING_B * 255); pixels[3] = (TEX_IMAGE_MISSING_A * 255); } - - image_set_device_memory(img, tex_img); - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_BYTE) { - device_vector *tex_img = new device_vector( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image(img, texture_limit, *tex_img)) { + if (!file_load_image(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - uchar *pixels = (uchar *)tex_img->alloc(1, 1); + uchar *pixels = (uchar *)img->mem->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 255); } - - image_set_device_memory(img, tex_img); - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_HALF4) { - device_vector *tex_img = new device_vector( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image(img, texture_limit, *tex_img)) { + if (!file_load_image(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - half *pixels = (half *)tex_img->alloc(1, 1); + half *pixels = (half *)img->mem->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; pixels[1] = TEX_IMAGE_MISSING_G; pixels[2] = TEX_IMAGE_MISSING_B; pixels[3] = TEX_IMAGE_MISSING_A; } - - image_set_device_memory(img, tex_img); - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_USHORT) { - device_vector *tex_img = new device_vector( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image(img, texture_limit, *tex_img)) { + if (!file_load_image(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - uint16_t *pixels = (uint16_t *)tex_img->alloc(1, 1); + uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 65535); } - - image_set_device_memory(img, tex_img); - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_USHORT4) { - device_vector *tex_img = new device_vector( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image(img, texture_limit, *tex_img)) { + if (!file_load_image(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - uint16_t *pixels = (uint16_t *)tex_img->alloc(1, 1); + uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 65535); pixels[1] = (TEX_IMAGE_MISSING_G * 65535); pixels[2] = (TEX_IMAGE_MISSING_B * 65535); pixels[3] = (TEX_IMAGE_MISSING_A * 65535); } - - image_set_device_memory(img, tex_img); - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_HALF) { - device_vector *tex_img = new device_vector( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image(img, texture_limit, *tex_img)) { + if (!file_load_image(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - half *pixels = (half *)tex_img->alloc(1, 1); + half *pixels = (half *)img->mem->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; } + } - image_set_device_memory(img, tex_img); - + { thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); + img->mem->copy_to_device(); } /* Cleanup memory in image loader. */ diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index 82f3e2759c6..734bb83f774 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -142,7 +142,7 @@ class ImageHandle { ImageMetaData metadata(); int svm_slot(const int tile_index = 0) const; - device_memory *image_memory(const int tile_index = 0) const; + device_texture *image_memory(const int tile_index = 0) const; protected: vector tile_slots; @@ -191,7 +191,7 @@ class ImageManager { bool builtin; string mem_name; - device_memory *mem; + device_texture *mem; int users; thread_mutex mutex; @@ -212,8 +212,8 @@ class ImageManager { void load_image_metadata(Image *img); - template - bool file_load_image(Image *img, int texture_limit, device_vector &tex_img); + template + bool file_load_image(Image *img, int texture_limit); void device_load_image(Device *device, Scene *scene, int slot, Progress *progress); void device_free_image(Device *device, int slot); diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 1e20513e88a..76f62fd6690 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -41,44 +41,44 @@ CCL_NAMESPACE_BEGIN DeviceScene::DeviceScene(Device *device) - : bvh_nodes(device, "__bvh_nodes", MEM_TEXTURE), - bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_TEXTURE), - object_node(device, "__object_node", MEM_TEXTURE), - prim_tri_index(device, "__prim_tri_index", MEM_TEXTURE), - prim_tri_verts(device, "__prim_tri_verts", MEM_TEXTURE), - prim_type(device, "__prim_type", MEM_TEXTURE), - prim_visibility(device, "__prim_visibility", MEM_TEXTURE), - prim_index(device, "__prim_index", MEM_TEXTURE), - prim_object(device, "__prim_object", MEM_TEXTURE), - prim_time(device, "__prim_time", MEM_TEXTURE), - tri_shader(device, "__tri_shader", MEM_TEXTURE), - tri_vnormal(device, "__tri_vnormal", MEM_TEXTURE), - tri_vindex(device, "__tri_vindex", MEM_TEXTURE), - tri_patch(device, "__tri_patch", MEM_TEXTURE), - tri_patch_uv(device, "__tri_patch_uv", MEM_TEXTURE), - curves(device, "__curves", MEM_TEXTURE), - curve_keys(device, "__curve_keys", MEM_TEXTURE), - patches(device, "__patches", MEM_TEXTURE), - objects(device, "__objects", MEM_TEXTURE), - object_motion_pass(device, "__object_motion_pass", MEM_TEXTURE), - object_motion(device, "__object_motion", MEM_TEXTURE), - object_flag(device, "__object_flag", MEM_TEXTURE), - camera_motion(device, "__camera_motion", MEM_TEXTURE), - attributes_map(device, "__attributes_map", MEM_TEXTURE), - attributes_float(device, "__attributes_float", MEM_TEXTURE), - attributes_float2(device, "__attributes_float2", MEM_TEXTURE), - attributes_float3(device, "__attributes_float3", MEM_TEXTURE), - attributes_uchar4(device, "__attributes_uchar4", MEM_TEXTURE), - light_distribution(device, "__light_distribution", MEM_TEXTURE), - lights(device, "__lights", MEM_TEXTURE), - light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_TEXTURE), - light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_TEXTURE), - particles(device, "__particles", MEM_TEXTURE), - svm_nodes(device, "__svm_nodes", MEM_TEXTURE), - shaders(device, "__shaders", MEM_TEXTURE), - lookup_table(device, "__lookup_table", MEM_TEXTURE), - sample_pattern_lut(device, "__sample_pattern_lut", MEM_TEXTURE), - ies_lights(device, "__ies", MEM_TEXTURE) + : bvh_nodes(device, "__bvh_nodes", MEM_GLOBAL), + bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_GLOBAL), + object_node(device, "__object_node", MEM_GLOBAL), + prim_tri_index(device, "__prim_tri_index", MEM_GLOBAL), + prim_tri_verts(device, "__prim_tri_verts", MEM_GLOBAL), + prim_type(device, "__prim_type", MEM_GLOBAL), + prim_visibility(device, "__prim_visibility", MEM_GLOBAL), + prim_index(device, "__prim_index", MEM_GLOBAL), + prim_object(device, "__prim_object", MEM_GLOBAL), + prim_time(device, "__prim_time", MEM_GLOBAL), + tri_shader(device, "__tri_shader", MEM_GLOBAL), + tri_vnormal(device, "__tri_vnormal", MEM_GLOBAL), + tri_vindex(device, "__tri_vindex", MEM_GLOBAL), + tri_patch(device, "__tri_patch", MEM_GLOBAL), + tri_patch_uv(device, "__tri_patch_uv", MEM_GLOBAL), + curves(device, "__curves", MEM_GLOBAL), + curve_keys(device, "__curve_keys", MEM_GLOBAL), + patches(device, "__patches", MEM_GLOBAL), + objects(device, "__objects", MEM_GLOBAL), + object_motion_pass(device, "__object_motion_pass", MEM_GLOBAL), + object_motion(device, "__object_motion", MEM_GLOBAL), + object_flag(device, "__object_flag", MEM_GLOBAL), + camera_motion(device, "__camera_motion", MEM_GLOBAL), + attributes_map(device, "__attributes_map", MEM_GLOBAL), + attributes_float(device, "__attributes_float", MEM_GLOBAL), + attributes_float2(device, "__attributes_float2", MEM_GLOBAL), + attributes_float3(device, "__attributes_float3", MEM_GLOBAL), + attributes_uchar4(device, "__attributes_uchar4", MEM_GLOBAL), + light_distribution(device, "__light_distribution", MEM_GLOBAL), + lights(device, "__lights", MEM_GLOBAL), + light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_GLOBAL), + light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_GLOBAL), + particles(device, "__particles", MEM_GLOBAL), + svm_nodes(device, "__svm_nodes", MEM_GLOBAL), + shaders(device, "__shaders", MEM_GLOBAL), + lookup_table(device, "__lookup_table", MEM_GLOBAL), + sample_pattern_lut(device, "__sample_pattern_lut", MEM_GLOBAL), + ies_lights(device, "__ies", MEM_GLOBAL) { memset((void *)&data, 0, sizeof(data)); } diff --git a/release/datafiles/locale b/release/datafiles/locale index 2a25e004396..34d98762cef 160000 --- a/release/datafiles/locale +++ b/release/datafiles/locale @@ -1 +1 @@ -Subproject commit 2a25e004396d3f135a98da132b496923275a3dcc +Subproject commit 34d98762cef85b9c065f21a051d1dbe3bf2979b7 diff --git a/release/scripts/addons b/release/scripts/addons index 7f50343d1c5..47a32a5370d 160000 --- a/release/scripts/addons +++ b/release/scripts/addons @@ -1 +1 @@ -Subproject commit 7f50343d1c53ca9a5ca747f47e3b1e5bbcbe8471 +Subproject commit 47a32a5370d36942674621e5a03e57e8dd4986d8 diff --git a/release/scripts/addons_contrib b/release/scripts/addons_contrib index 9468c406fb5..70b649775ee 160000 --- a/release/scripts/addons_contrib +++ b/release/scripts/addons_contrib @@ -1 +1 @@ -Subproject commit 9468c406fb554e32ff47b62bfef356b3908ec651 +Subproject commit 70b649775eeeebedb02c1c7b7aa996a7f6294177 diff --git a/source/tools b/source/tools index 4a6f1706258..603f076606f 160000 --- a/source/tools +++ b/source/tools @@ -1 +1 @@ -Subproject commit 4a6f1706258439db3ee5a50ec6938fef79c7cb97 +Subproject commit 603f076606f052adc97d937633bfeb9b268ec202 -- cgit v1.2.3