Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMai Lavelle <mai.lavelle@gmail.com>2017-08-08 14:12:04 +0300
committerMai Lavelle <mai.lavelle@gmail.com>2017-08-08 14:12:04 +0300
commitec8ae4d5e9f735ab5aeb149dea8aa47ab8f8f977 (patch)
treeea5ca862e3ee999fa65ebfefd46d19ba7ef034cd /intern/cycles/device
parentb53e35c655d40769e46cbe91929531fbe20d2977 (diff)
Cycles: Pack kernel textures into buffers for OpenCL
Image textures were being packed into a single buffer for OpenCL, which limited the amount of memory available for images to the size of one buffer (usually 4gb on AMD hardware). By packing textures into multiple buffers that limit is removed, while simultaneously reducing the number of buffers that need to be passed to each kernel. Benchmarks were within 2%. Fixes T51554. Differential Revision: https://developer.blender.org/D2745
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/CMakeLists.txt2
-rw-r--r--intern/cycles/device/device.cpp2
-rw-r--r--intern/cycles/device/device.h2
-rw-r--r--intern/cycles/device/device_cpu.cpp1
-rw-r--r--intern/cycles/device/device_cuda.cpp1
-rw-r--r--intern/cycles/device/device_opencl.cpp1
-rw-r--r--intern/cycles/device/opencl/memory_manager.cpp253
-rw-r--r--intern/cycles/device/opencl/memory_manager.h105
-rw-r--r--intern/cycles/device/opencl/opencl.h43
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp149
-rw-r--r--intern/cycles/device/opencl/opencl_mega.cpp5
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp25
12 files changed, 544 insertions, 45 deletions
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 74ec57ddf74..3c632160fbd 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -34,11 +34,13 @@ set(SRC
set(SRC_OPENCL
opencl/opencl.h
+ opencl/memory_manager.h
opencl/opencl_base.cpp
opencl/opencl_mega.cpp
opencl/opencl_split.cpp
opencl/opencl_util.cpp
+ opencl/memory_manager.cpp
)
if(WITH_CYCLES_NETWORK)
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index a54bb77f9f3..f64436aec7b 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -379,11 +379,9 @@ DeviceInfo Device::get_multi_device(vector<DeviceInfo> subdevices)
info.num = 0;
info.has_bindless_textures = true;
- info.pack_images = false;
foreach(DeviceInfo &device, subdevices) {
assert(device.type == info.multi_devices[0].type);
- info.pack_images |= device.pack_images;
info.has_bindless_textures &= device.has_bindless_textures;
}
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index b3b693c630c..26d6d380a10 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -53,7 +53,6 @@ public:
int num;
bool display_device;
bool advanced_shading;
- bool pack_images;
bool has_bindless_textures; /* flag for GPU and Multi device */
bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */
vector<DeviceInfo> multi_devices;
@@ -65,7 +64,6 @@ public:
num = 0;
display_device = false;
advanced_shading = true;
- pack_images = false;
has_bindless_textures = false;
use_split_kernel = false;
}
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index a00be3eeaab..6e09c5f88c2 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -977,7 +977,6 @@ void device_cpu_info(vector<DeviceInfo>& devices)
info.id = "CPU";
info.num = 0;
info.advanced_shading = true;
- info.pack_images = false;
devices.insert(devices.begin(), info);
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index dbf636e1405..6769ed0229e 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -2164,7 +2164,6 @@ void device_cuda_info(vector<DeviceInfo>& devices)
info.advanced_shading = (major >= 2);
info.has_bindless_textures = (major >= 3);
- info.pack_images = false;
int pci_location[3] = {0, 0, 0};
cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 681b8214b03..aa380ec4b94 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -95,7 +95,6 @@ void device_opencl_info(vector<DeviceInfo>& devices)
/* We don't know if it's used for display, but assume it is. */
info.display_device = true;
info.advanced_shading = OpenCLInfo::kernel_use_advanced_shading(platform_name);
- info.pack_images = true;
info.use_split_kernel = OpenCLInfo::kernel_use_split(platform_name,
device_type);
info.id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;
diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp
new file mode 100644
index 00000000000..b67dfef88aa
--- /dev/null
+++ b/intern/cycles/device/opencl/memory_manager.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPENCL
+
+#include "util/util_foreach.h"
+
+#include "device/opencl/opencl.h"
+#include "device/opencl/memory_manager.h"
+
+CCL_NAMESPACE_BEGIN
+
+void MemoryManager::DeviceBuffer::add_allocation(Allocation& allocation)
+{
+ allocations.push_back(&allocation);
+}
+
+void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
+{
+ bool need_realloc = false;
+
+ /* Calculate total size and remove any freed. */
+ size_t total_size = 0;
+
+ for(int i = allocations.size()-1; i >= 0; i--) {
+ Allocation* allocation = allocations[i];
+
+ /* Remove allocations that have been freed. */
+ if(!allocation->mem || allocation->mem->memory_size() == 0) {
+ allocation->device_buffer = NULL;
+ allocation->size = 0;
+
+ allocations.erase(allocations.begin()+i);
+
+ need_realloc = true;
+
+ continue;
+ }
+
+ /* Get actual size for allocation. */
+ size_t alloc_size = align_up(allocation->mem->memory_size(), 16);
+
+ if(allocation->size != alloc_size) {
+ /* Allocation is either new or resized. */
+ allocation->size = alloc_size;
+ allocation->needs_copy_to_device = true;
+
+ need_realloc = true;
+ }
+
+ total_size += alloc_size;
+ }
+
+ if(need_realloc) {
+ cl_ulong max_buffer_size;
+ clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
+
+ if(total_size > max_buffer_size) {
+ device->set_error("Scene too complex to fit in available memory.");
+ return;
+ }
+
+ device_memory *new_buffer = new device_memory;
+
+ new_buffer->resize(total_size);
+ device->mem_alloc(string_printf("buffer_%p", this).data(), *new_buffer, MEM_READ_ONLY);
+
+ size_t offset = 0;
+
+ foreach(Allocation* allocation, allocations) {
+ if(allocation->needs_copy_to_device) {
+ /* Copy from host to device. */
+ opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
+ CL_MEM_PTR(new_buffer->device_pointer),
+ CL_FALSE,
+ offset,
+ allocation->mem->memory_size(),
+ (void*)allocation->mem->data_pointer,
+ 0, NULL, NULL
+ ));
+
+ allocation->needs_copy_to_device = false;
+ }
+ else {
+ /* Fast copy from memory already on device. */
+ opencl_device_assert(device, clEnqueueCopyBuffer(device->cqCommandQueue,
+ CL_MEM_PTR(buffer->device_pointer),
+ CL_MEM_PTR(new_buffer->device_pointer),
+ allocation->desc.offset,
+ offset,
+ allocation->mem->memory_size(),
+ 0, NULL, NULL
+ ));
+ }
+
+ allocation->desc.offset = offset;
+ offset += allocation->size;
+ }
+
+ device->mem_free(*buffer);
+ delete buffer;
+
+ buffer = new_buffer;
+ }
+ else {
+ assert(total_size == buffer->data_size);
+
+ size_t offset = 0;
+
+ foreach(Allocation* allocation, allocations) {
+ if(allocation->needs_copy_to_device) {
+ /* Copy from host to device. */
+ opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
+ CL_MEM_PTR(buffer->device_pointer),
+ CL_FALSE,
+ offset,
+ allocation->mem->memory_size(),
+ (void*)allocation->mem->data_pointer,
+ 0, NULL, NULL
+ ));
+
+ allocation->needs_copy_to_device = false;
+ }
+
+ offset += allocation->size;
+ }
+ }
+
+ /* Not really necessary, but seems to improve responsiveness for some reason. */
+ clFinish(device->cqCommandQueue);
+}
+
+void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *device)
+{
+ device->mem_free(*buffer);
+}
+
+MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer()
+{
+ DeviceBuffer* smallest = device_buffers;
+
+ foreach(DeviceBuffer& device_buffer, device_buffers) {
+ if(device_buffer.size < smallest->size) {
+ smallest = &device_buffer;
+ }
+ }
+
+ return smallest;
+}
+
+MemoryManager::MemoryManager(OpenCLDeviceBase *device) : device(device), need_update(false)
+{
+}
+
+void MemoryManager::free()
+{
+ foreach(DeviceBuffer& device_buffer, device_buffers) {
+ device_buffer.free(device);
+ }
+}
+
+void MemoryManager::alloc(const char *name, device_memory& mem)
+{
+ Allocation& allocation = allocations[name];
+
+ allocation.mem = &mem;
+ allocation.needs_copy_to_device = true;
+
+ if(!allocation.device_buffer) {
+ DeviceBuffer* device_buffer = smallest_device_buffer();
+ allocation.device_buffer = device_buffer;
+
+ allocation.desc.device_buffer = device_buffer - device_buffers;
+
+ device_buffer->add_allocation(allocation);
+
+ device_buffer->size += mem.memory_size();
+ }
+
+ need_update = true;
+}
+
+bool MemoryManager::free(device_memory& mem)
+{
+ foreach(AllocationsMap::value_type& value, allocations) {
+ Allocation& allocation = value.second;
+ if(allocation.mem == &mem) {
+
+ allocation.device_buffer->size -= mem.memory_size();
+
+ allocation.mem = NULL;
+ allocation.needs_copy_to_device = false;
+
+ need_update = true;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+MemoryManager::BufferDescriptor MemoryManager::get_descriptor(string name)
+{
+ update_device_memory();
+
+ Allocation& allocation = allocations[name];
+ return allocation.desc;
+}
+
+void MemoryManager::update_device_memory()
+{
+ if(!need_update) {
+ return;
+ }
+
+ need_update = false;
+
+ foreach(DeviceBuffer& device_buffer, device_buffers) {
+ device_buffer.update_device_memory(device);
+ }
+}
+
+void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
+{
+ update_device_memory();
+
+ foreach(DeviceBuffer& device_buffer, device_buffers) {
+ if(device_buffer.buffer->device_pointer) {
+ device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer);
+ }
+ else {
+ device->kernel_set_args(kernel, (*narg)++, device->null_mem);
+ }
+ }
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_OPENCL */
+
diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h
new file mode 100644
index 00000000000..3714405d026
--- /dev/null
+++ b/intern/cycles/device/opencl/memory_manager.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "device/device.h"
+
+#include "util/util_map.h"
+#include "util/util_vector.h"
+#include "util/util_string.h"
+
+#include "clew.h"
+
+CCL_NAMESPACE_BEGIN
+
+class OpenCLDeviceBase;
+
+class MemoryManager {
+public:
+ static const int NUM_DEVICE_BUFFERS = 8;
+
+ struct BufferDescriptor {
+ uint device_buffer;
+ cl_ulong offset;
+ };
+
+private:
+ struct DeviceBuffer;
+
+ struct Allocation {
+ device_memory *mem;
+
+ DeviceBuffer *device_buffer;
+ size_t size; /* Size of actual allocation, may be larger than requested. */
+
+ BufferDescriptor desc;
+
+ bool needs_copy_to_device;
+
+ Allocation() : mem(NULL), device_buffer(NULL), size(0), needs_copy_to_device(false)
+ {
+ }
+ };
+
+ struct DeviceBuffer {
+ device_memory *buffer;
+ vector<Allocation*> allocations;
+ size_t size; /* Size of all allocations. */
+
+ DeviceBuffer() : buffer(new device_memory), size(0)
+ {
+ }
+
+ ~DeviceBuffer() {
+ delete buffer;
+ buffer = NULL;
+ }
+
+ void add_allocation(Allocation& allocation);
+
+ void update_device_memory(OpenCLDeviceBase *device);
+
+ void free(OpenCLDeviceBase *device);
+ };
+
+ OpenCLDeviceBase *device;
+
+ DeviceBuffer device_buffers[NUM_DEVICE_BUFFERS];
+
+ typedef unordered_map<string, Allocation> AllocationsMap;
+ AllocationsMap allocations;
+
+ bool need_update;
+
+ DeviceBuffer* smallest_device_buffer();
+
+public:
+ MemoryManager(OpenCLDeviceBase *device);
+
+ void free(); /* Free all memory. */
+
+ void alloc(const char *name, device_memory& mem);
+ bool free(device_memory& mem);
+
+ BufferDescriptor get_descriptor(string name);
+
+ void update_device_memory();
+ void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
+};
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 78ca377d933..0dae9136870 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -25,6 +25,8 @@
#include "clew.h"
+#include "device/opencl/memory_manager.h"
+
CCL_NAMESPACE_BEGIN
/* Disable workarounds, seems to be working fine on latest drivers. */
@@ -224,6 +226,18 @@ public:
static string get_kernel_md5();
};
+#define opencl_device_assert(device, stmt) \
+ { \
+ cl_int err = stmt; \
+ \
+ if(err != CL_SUCCESS) { \
+ string message = string_printf("OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
+ if((device)->error_msg == "") \
+ (device)->error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ } \
+ } (void)0
+
#define opencl_assert(stmt) \
{ \
cl_int err = stmt; \
@@ -344,6 +358,7 @@ public:
size_t global_size_round_up(int group_size, int global_size);
void enqueue_kernel(cl_kernel kernel, size_t w, size_t h, size_t max_workgroup_size = -1);
void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
+ void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
void shader(DeviceTask& task);
@@ -525,6 +540,34 @@ protected:
virtual string build_options_for_base_program(
const DeviceRequestedFeatures& /*requested_features*/);
+
+private:
+ MemoryManager memory_manager;
+ friend MemoryManager;
+
+ struct tex_info_t {
+ uint buffer, padding;
+ cl_ulong offset;
+ uint width, height, depth, options;
+ };
+ static_assert_align(tex_info_t, 16);
+
+ vector<tex_info_t> texture_descriptors;
+ device_memory texture_descriptors_buffer;
+
+ struct Texture {
+ device_memory* mem;
+ InterpolationType interpolation;
+ ExtensionType extension;
+ };
+
+ typedef map<string, Texture> TexturesMap;
+ TexturesMap textures;
+
+ bool textures_need_update;
+
+protected:
+ void flush_texture_buffers();
};
Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background);
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 509da7a0a84..63b5e004b7d 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -63,7 +63,7 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where)
}
OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_)
-: Device(info, stats, background_)
+: Device(info, stats, background_), memory_manager(this)
{
cpPlatform = NULL;
cdDevice = NULL;
@@ -71,6 +71,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
cqCommandQueue = NULL;
null_mem = 0;
device_initialized = false;
+ textures_need_update = true;
vector<OpenCLPlatformDevice> usable_devices;
OpenCLInfo::get_usable_devices(&usable_devices);
@@ -126,6 +127,12 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
return;
}
+ /* Allocate this right away so that texture_descriptors_buffer is placed at offset 0 in the device memory buffers */
+ texture_descriptors.resize(1);
+ texture_descriptors_buffer.resize(1);
+ texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0];
+ memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
+
fprintf(stderr, "Device init success\n");
device_initialized = true;
}
@@ -134,6 +141,8 @@ OpenCLDeviceBase::~OpenCLDeviceBase()
{
task_pool.stop();
+ memory_manager.free();
+
if(null_mem)
clReleaseMemObject(CL_MEM_PTR(null_mem));
@@ -493,29 +502,31 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
void OpenCLDeviceBase::tex_alloc(const char *name,
device_memory& mem,
- InterpolationType /*interpolation*/,
- ExtensionType /*extension*/)
+ InterpolationType interpolation,
+ ExtensionType extension)
{
VLOG(1) << "Texture allocate: " << name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
- mem_alloc(NULL, mem, MEM_READ_ONLY);
- mem_copy_to(mem);
- assert(mem_map.find(name) == mem_map.end());
- mem_map.insert(MemMap::value_type(name, mem.device_pointer));
+
+ memory_manager.alloc(name, mem);
+
+ textures[name] = {&mem, interpolation, extension};
+
+ textures_need_update = true;
}
void OpenCLDeviceBase::tex_free(device_memory& mem)
{
- if(mem.device_pointer) {
- foreach(const MemMap::value_type& value, mem_map) {
- if(value.second == mem.device_pointer) {
- mem_map.erase(value.first);
- break;
- }
- }
+ if(memory_manager.free(mem)) {
+ textures_need_update = true;
+ }
- mem_free(mem);
+ foreach(TexturesMap::value_type& value, textures) {
+ if(value.second.mem == &mem) {
+ textures.erase(value.first);
+ break;
+ }
}
}
@@ -581,6 +592,104 @@ void OpenCLDeviceBase::set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const
opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr));
}
+void OpenCLDeviceBase::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
+{
+ flush_texture_buffers();
+
+ memory_manager.set_kernel_arg_buffers(kernel, narg);
+}
+
+void OpenCLDeviceBase::flush_texture_buffers()
+{
+ if(!textures_need_update) {
+ return;
+ }
+ textures_need_update = false;
+
+ /* Setup slots for textures. */
+ int num_slots = 0;
+
+ struct texture_slot_t {
+ string name;
+ int slot;
+ };
+
+ vector<texture_slot_t> texture_slots;
+
+#define KERNEL_TEX(type, ttype, name) \
+ if(textures.find(#name) != textures.end()) { \
+ texture_slots.push_back({#name, num_slots}); \
+ } \
+ num_slots++;
+#include "kernel/kernel_textures.h"
+
+ int num_data_slots = num_slots;
+
+ foreach(TexturesMap::value_type& tex, textures) {
+ string name = tex.first;
+
+ if(string_startswith(name, "__tex_image")) {
+ int pos = name.rfind("_");
+ int id = atoi(name.data() + pos + 1);
+
+ texture_slots.push_back({name, num_data_slots + id});
+
+ num_slots = max(num_slots, num_data_slots + id + 1);
+ }
+ }
+
+ /* Realloc texture descriptors buffer. */
+ memory_manager.free(texture_descriptors_buffer);
+
+ texture_descriptors.resize(num_slots);
+ texture_descriptors_buffer.resize(num_slots * sizeof(tex_info_t));
+ texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0];
+
+ memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
+
+ /* Fill in descriptors */
+ foreach(texture_slot_t& slot, texture_slots) {
+ Texture& tex = textures[slot.name];
+
+ tex_info_t& info = texture_descriptors[slot.slot];
+
+ MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name);
+
+ info.offset = desc.offset;
+ info.buffer = desc.device_buffer;
+
+ if(string_startswith(slot.name, "__tex_image")) {
+ info.width = tex.mem->data_width;
+ info.height = tex.mem->data_height;
+ info.depth = tex.mem->data_depth;
+
+ info.options = 0;
+
+ if(tex.interpolation == INTERPOLATION_CLOSEST) {
+ info.options |= (1 << 0);
+ }
+
+ switch(tex.extension) {
+ case EXTENSION_REPEAT:
+ info.options |= (1 << 1);
+ break;
+ case EXTENSION_EXTEND:
+ info.options |= (1 << 2);
+ break;
+ case EXTENSION_CLIP:
+ info.options |= (1 << 3);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ /* Force write of descriptors. */
+ memory_manager.free(texture_descriptors_buffer);
+ memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
+}
+
void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
{
/* cast arguments to cl types */
@@ -605,10 +714,7 @@ void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_
d_rgba,
d_buffer);
-#define KERNEL_TEX(type, ttype, name) \
-set_kernel_arg_mem(ckFilmConvertKernel, &start_arg_index, #name);
-#include "kernel/kernel_textures.h"
-#undef KERNEL_TEX
+ set_kernel_arg_buffers(ckFilmConvertKernel, &start_arg_index);
start_arg_index += kernel_set_args(ckFilmConvertKernel,
start_arg_index,
@@ -1030,10 +1136,7 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
d_output_luma);
}
-#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(kernel, &start_arg_index, #name);
-#include "kernel/kernel_textures.h"
-#undef KERNEL_TEX
+ set_kernel_arg_buffers(kernel, &start_arg_index);
start_arg_index += kernel_set_args(kernel,
start_arg_index,
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index 06c15bcf401..ec47fdafa3d 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -82,10 +82,7 @@ public:
d_buffer,
d_rng_state);
-#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(ckPathTraceKernel, &start_arg_index, #name);
-#include "kernel/kernel_textures.h"
-#undef KERNEL_TEX
+ set_kernel_arg_buffers(ckPathTraceKernel, &start_arg_index);
start_arg_index += kernel_set_args(ckPathTraceKernel,
start_arg_index,
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 76d9983e9a2..df7c064a24f 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -99,6 +99,8 @@ public:
void thread_run(DeviceTask *task)
{
+ flush_texture_buffers();
+
if(task->type == DeviceTask::FILM_CONVERT) {
film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
}
@@ -113,10 +115,19 @@ public:
*/
typedef struct KernelGlobals {
ccl_constant KernelData *data;
+ ccl_global char *buffers[8];
+
+ typedef struct _tex_info_t {
+ uint buffer, padding;
+ ulong offset;
+ uint width, height, depth, options;
+ } _tex_info_t;
+
#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name;
+ _tex_info_t name;
#include "kernel/kernel_textures.h"
#undef KERNEL_TEX
+
SplitData split_data;
SplitParams split_param_data;
} KernelGlobals;
@@ -217,11 +228,7 @@ public:
*cached_memory.ray_state,
*cached_memory.rng_state);
-/* TODO(sergey): Avoid map lookup here. */
-#define KERNEL_TEX(type, ttype, name) \
- device->set_kernel_arg_mem(program(), &start_arg_index, #name);
-#include "kernel/kernel_textures.h"
-#undef KERNEL_TEX
+ device->set_kernel_arg_buffers(program(), &start_arg_index);
start_arg_index +=
device->kernel_set_args(program(),
@@ -352,11 +359,7 @@ public:
ray_state,
rtile.rng_state);
-/* TODO(sergey): Avoid map lookup here. */
-#define KERNEL_TEX(type, ttype, name) \
- device->set_kernel_arg_mem(device->program_data_init(), &start_arg_index, #name);
-#include "kernel/kernel_textures.h"
-#undef KERNEL_TEX
+ device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index);
start_arg_index +=
device->kernel_set_args(device->program_data_init(),