Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2017-10-24 03:04:58 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2017-10-24 03:05:41 +0300
commitf5456df095291c6cb2d0223a179746c8e514cd15 (patch)
tree76384b0c91806c95d55b007850ee225889a3250e /intern/cycles/device
parent254daf8f8c276a4e5292e5a12fcfa88296131878 (diff)
parent070a668d04844610059aaedc80c49e9038fd1779 (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/CMakeLists.txt1
-rw-r--r--intern/cycles/device/device.cpp37
-rw-r--r--intern/cycles/device/device.h44
-rw-r--r--intern/cycles/device/device_cpu.cpp111
-rw-r--r--intern/cycles/device/device_cuda.cpp305
-rw-r--r--intern/cycles/device/device_denoising.cpp96
-rw-r--r--intern/cycles/device/device_denoising.h18
-rw-r--r--intern/cycles/device/device_memory.cpp122
-rw-r--r--intern/cycles/device/device_memory.h265
-rw-r--r--intern/cycles/device/device_multi.cpp171
-rw-r--r--intern/cycles/device/device_network.cpp200
-rw-r--r--intern/cycles/device/device_network.h26
-rw-r--r--intern/cycles/device/device_split_kernel.cpp45
-rw-r--r--intern/cycles/device/device_split_kernel.h2
-rw-r--r--intern/cycles/device/opencl/memory_manager.cpp18
-rw-r--r--intern/cycles/device/opencl/memory_manager.h8
-rw-r--r--intern/cycles/device/opencl/opencl.h34
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp137
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp17
19 files changed, 921 insertions, 736 deletions
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 3c632160fbd..959c0aa97c9 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -26,6 +26,7 @@ set(SRC
device_cpu.cpp
device_cuda.cpp
device_denoising.cpp
+ device_memory.cpp
device_multi.cpp
device_opencl.cpp
device_split_kernel.cpp
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 5f01bd535d0..0c20270ea42 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -90,24 +90,6 @@ Device::~Device()
}
}
-void Device::pixels_alloc(device_memory& mem)
-{
- mem_alloc("pixels", mem, MEM_READ_WRITE);
-}
-
-void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)
-{
- if(mem.data_type == TYPE_HALF)
- mem_copy_from(mem, y, w, h, sizeof(half4));
- else
- mem_copy_from(mem, y, w, h, sizeof(uchar4));
-}
-
-void Device::pixels_free(device_memory& mem)
-{
- mem_free(mem);
-}
-
/* TODO move shaders to standalone .glsl file. */
const char *FALLBACK_VERTEX_SHADER =
"#version 330\n"
@@ -257,7 +239,9 @@ void Device::draw_pixels(
bool transparent, const DeviceDrawParams &draw_params)
{
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
- pixels_copy_from(rgba, y, w, h);
+
+ assert(mem.type == MEM_PIXELS);
+ mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
GLuint texid;
glGenTextures(1, &texid);
@@ -526,10 +510,13 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
info.has_bindless_textures = true;
info.has_volume_decoupled = true;
info.has_qbvh = true;
+ info.has_osl = true;
+
foreach(const DeviceInfo &device, subdevices) {
info.has_bindless_textures &= device.has_bindless_textures;
info.has_volume_decoupled &= device.has_volume_decoupled;
info.has_qbvh &= device.has_qbvh;
+ info.has_osl &= device.has_osl;
if(device.type == DEVICE_CPU && subdevices.size() > 1) {
if(background) {
@@ -572,16 +559,4 @@ void Device::free_memory()
devices.free_memory();
}
-
-device_sub_ptr::device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type)
- : device(device)
-{
- ptr = device->mem_alloc_sub_ptr(mem, offset, size, type);
-}
-
-device_sub_ptr::~device_sub_ptr()
-{
- device->mem_free_sub_ptr(ptr);
-}
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 47332f52ace..c79f086fc2d 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -57,6 +57,7 @@ public:
bool has_bindless_textures; /* flag for GPU and Multi device */
bool has_volume_decoupled;
bool has_qbvh;
+ bool has_osl;
bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */
int cpu_threads;
vector<DeviceInfo> multi_devices;
@@ -72,6 +73,7 @@ public:
has_bindless_textures = false;
has_volume_decoupled = false;
has_qbvh = false;
+ has_osl = false;
use_split_kernel = false;
}
@@ -264,7 +266,7 @@ protected:
bool bind_fallback_display_space_shader(const float width, const float height);
- virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/, MemoryType /*type*/)
+ virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/)
{
/* Only required for devices that implement denoising. */
assert(false);
@@ -292,36 +294,12 @@ public:
/* statistics */
Stats &stats;
- /* regular memory */
- virtual void mem_alloc(const char *name, device_memory& mem, MemoryType type) = 0;
- virtual void mem_copy_to(device_memory& mem) = 0;
- virtual void mem_copy_from(device_memory& mem,
- int y, int w, int h, int elem) = 0;
- virtual void mem_zero(device_memory& mem) = 0;
- virtual void mem_free(device_memory& mem) = 0;
-
+ /* memory alignment */
virtual int mem_address_alignment() { return 16; }
/* constant memory */
virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
- /* texture memory */
- virtual void tex_alloc(const char * /*name*/,
- device_memory& /*mem*/,
- InterpolationType interpolation = INTERPOLATION_NONE,
- ExtensionType extension = EXTENSION_REPEAT)
- {
- (void)interpolation; /* Ignored. */
- (void)extension; /* Ignored. */
- };
-
- virtual void tex_free(device_memory& /*mem*/) {};
-
- /* pixel memory */
- virtual void pixels_alloc(device_memory& mem);
- virtual void pixels_copy_from(device_memory& mem, int y, int w, int h);
- virtual void pixels_free(device_memory& mem);
-
/* open shading language, only for CPU device */
virtual void *osl_memory() { return NULL; }
@@ -369,6 +347,20 @@ public:
static void tag_update();
static void free_memory();
+
+protected:
+ /* Memory allocation, only accessed through device_memory. */
+ friend class MultiDevice;
+ friend class DeviceServer;
+ friend class device_memory;
+
+ virtual void mem_alloc(device_memory& mem) = 0;
+ virtual void mem_copy_to(device_memory& mem) = 0;
+ virtual void mem_copy_from(device_memory& mem,
+ int y, int w, int h, int elem) = 0;
+ virtual void mem_zero(device_memory& mem) = 0;
+ virtual void mem_free(device_memory& mem) = 0;
+
private:
/* Indicted whether device types and devices lists were initialized. */
static bool need_types_update, need_devices_update;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 0ba00da16a6..32ab18fe164 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -209,6 +209,7 @@ public:
CPUDevice(DeviceInfo& info_, Stats &stats_, bool background_)
: Device(info_, stats_, background_),
+ texture_info(this, "__texture_info", MEM_TEXTURE),
#define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name))
REGISTER_KERNEL(path_trace),
REGISTER_KERNEL(convert_to_half_float),
@@ -268,7 +269,7 @@ public:
~CPUDevice()
{
task_pool.stop();
- tex_free(texture_info);
+ texture_info.free();
}
virtual bool show_samples() const
@@ -279,33 +280,50 @@ public:
void load_texture_info()
{
if(need_texture_info) {
- tex_free(texture_info);
- tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
+ texture_info.copy_to_device();
need_texture_info = false;
}
}
- void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
+ void mem_alloc(device_memory& mem)
{
- if(name) {
- VLOG(1) << "Buffer allocate: " << name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
+ if(mem.type == MEM_TEXTURE) {
+ assert(!"mem_alloc not supported for textures.");
}
+ else {
+ if(mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+ }
- mem.device_pointer = mem.data_pointer;
+ mem.device_pointer = mem.data_pointer;
- if(!mem.device_pointer) {
- mem.device_pointer = (device_ptr)malloc(mem.memory_size());
- }
+ if(!mem.device_pointer) {
+ mem.device_pointer = (device_ptr)malloc(mem.memory_size());
+ }
- mem.device_size = mem.memory_size();
- stats.mem_alloc(mem.device_size);
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+ }
}
- void mem_copy_to(device_memory& /*mem*/)
+ void mem_copy_to(device_memory& mem)
{
- /* no-op */
+ if(mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ tex_alloc(mem);
+ }
+ else if(mem.type == MEM_PIXELS) {
+ assert(!"mem_copy_to not supported for pixels.");
+ }
+ else {
+ if(!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
+ /* copy is no-op */
+ }
}
void mem_copy_from(device_memory& /*mem*/,
@@ -317,12 +335,21 @@ public:
void mem_zero(device_memory& mem)
{
- memset((void*)mem.device_pointer, 0, mem.memory_size());
+ if(!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
+ if(mem.device_pointer) {
+ memset((void*)mem.device_pointer, 0, mem.memory_size());
+ }
}
void mem_free(device_memory& mem)
{
- if(mem.device_pointer) {
+ if(mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ }
+ else if(mem.device_pointer) {
if(!mem.data_pointer) {
free((void*)mem.device_pointer);
}
@@ -332,7 +359,7 @@ public:
}
}
- virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/)
+ virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/)
{
return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset));
}
@@ -342,32 +369,25 @@ public:
kernel_const_copy(&kernel_globals, name, host, size);
}
- void tex_alloc(const char *name,
- device_memory& mem,
- InterpolationType interpolation,
- ExtensionType extension)
+ void tex_alloc(device_memory& mem)
{
- VLOG(1) << "Texture allocate: " << name << ", "
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
- if(interpolation == INTERPOLATION_NONE) {
+ if(mem.interpolation == INTERPOLATION_NONE) {
/* Data texture. */
kernel_tex_copy(&kernel_globals,
- name,
+ mem.name,
mem.data_pointer,
- mem.data_width,
- mem.data_height,
- mem.data_depth,
- interpolation,
- extension);
+ mem.data_size);
}
else {
/* Image Texture. */
int flat_slot = 0;
- if(string_startswith(name, "__tex_image")) {
- int pos = string(name).rfind("_");
- flat_slot = atoi(name + pos + 1);
+ if(string_startswith(mem.name, "__tex_image")) {
+ int pos = string(mem.name).rfind("_");
+ flat_slot = atoi(mem.name + pos + 1);
}
else {
assert(0);
@@ -382,8 +402,8 @@ public:
TextureInfo& info = texture_info[flat_slot];
info.data = (uint64_t)mem.data_pointer;
info.cl_buffer = 0;
- info.interpolation = interpolation;
- info.extension = extension;
+ info.interpolation = mem.interpolation;
+ info.extension = mem.extension;
info.width = mem.data_width;
info.height = mem.data_height;
info.depth = mem.data_depth;
@@ -437,13 +457,13 @@ public:
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
{
- mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY);
-
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
for(int i = 0; i < 9; i++) {
tiles->buffers[i] = buffers[i];
}
+ task->tiles_mem.copy_to_device();
+
return true;
}
@@ -728,9 +748,8 @@ public:
}
/* allocate buffer for kernel globals */
- device_only_memory<KernelGlobals> kgbuffer;
- kgbuffer.resize(1);
- mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
+ device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals");
+ kgbuffer.alloc_to_device(1);
KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init());
@@ -740,8 +759,7 @@ public:
requested_features.max_closure = MAX_CLOSURE;
if(!split_kernel->load_kernels(requested_features)) {
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
- mem_free(kgbuffer);
-
+ kgbuffer.free();
delete split_kernel;
return;
}
@@ -751,8 +769,8 @@ public:
while(task.acquire_tile(this, tile)) {
if(tile.task == RenderTile::PATH_TRACE) {
if(use_split_kernel) {
- device_memory data;
- split_kernel->path_trace(&task, tile, kgbuffer, data);
+ device_only_memory<uchar> void_buffer(this, "void_buffer");
+ split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
}
else {
path_trace(task, tile, kg);
@@ -772,7 +790,7 @@ public:
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
kg->~KernelGlobals();
- mem_free(kgbuffer);
+ kgbuffer.free();
delete split_kernel;
}
@@ -1028,6 +1046,7 @@ void device_cpu_info(vector<DeviceInfo>& devices)
info.advanced_shading = true;
info.has_qbvh = system_cpu_support_sse2();
info.has_volume_decoupled = true;
+ info.has_osl = true;
devices.insert(devices.begin(), info);
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0f057e9966f..c742e91c561 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -217,7 +217,8 @@ public:
}
CUDADevice(DeviceInfo& info, Stats &stats, bool background_)
- : Device(info, stats, background_)
+ : Device(info, stats, background_),
+ texture_info(this, "__texture_info", MEM_TEXTURE)
{
first_error = true;
background = background_;
@@ -274,7 +275,7 @@ public:
delete split_kernel;
if(info.has_bindless_textures) {
- tex_free(texture_info);
+ texture_info.free();
}
cuda_assert(cuCtxDestroy(cuContext));
@@ -547,20 +548,19 @@ public:
void load_texture_info()
{
if(info.has_bindless_textures && need_texture_info) {
- tex_free(texture_info);
- tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
+ texture_info.copy_to_device();
need_texture_info = false;
}
}
- void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
+ void generic_alloc(device_memory& mem)
{
CUDAContextScope scope(this);
- if(name) {
- VLOG(1) << "Buffer allocate: " << name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
+ if(mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
}
CUdeviceptr device_pointer;
@@ -571,31 +571,88 @@ public:
stats.mem_alloc(size);
}
+ void generic_copy_to(device_memory& mem)
+ {
+ if(mem.device_pointer) {
+ CUDAContextScope scope(this);
+ cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
+ }
+ }
+
+ void generic_free(device_memory& mem)
+ {
+ if(mem.device_pointer) {
+ CUDAContextScope scope(this);
+
+ cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
+
+ mem.device_pointer = 0;
+
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
+ }
+
+ void mem_alloc(device_memory& mem)
+ {
+ if(mem.type == MEM_PIXELS && !background) {
+ pixels_alloc(mem);
+ }
+ else if(mem.type == MEM_TEXTURE) {
+ assert(!"mem_alloc not supported for textures.");
+ }
+ else {
+ generic_alloc(mem);
+ }
+ }
+
void mem_copy_to(device_memory& mem)
{
- CUDAContextScope scope(this);
+ if(mem.type == MEM_PIXELS) {
+ assert(!"mem_copy_to not supported for pixels.");
+ }
+ else if(mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ tex_alloc(mem);
+ }
+ else {
+ if(!mem.device_pointer) {
+ generic_alloc(mem);
+ }
- if(mem.device_pointer)
- cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
+ generic_copy_to(mem);
+ }
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
{
- CUDAContextScope scope(this);
- size_t offset = elem*y*w;
- size_t size = elem*w*h;
-
- if(mem.device_pointer) {
- cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
- (CUdeviceptr)(mem.device_pointer + offset), size));
+ if(mem.type == MEM_PIXELS && !background) {
+ pixels_copy_from(mem, y, w, h);
+ }
+ else if(mem.type == MEM_TEXTURE) {
+ assert(!"mem_copy_from not supported for textures.");
}
else {
- memset((char*)mem.data_pointer + offset, 0, size);
+ CUDAContextScope scope(this);
+ size_t offset = elem*y*w;
+ size_t size = elem*w*h;
+
+ if(mem.device_pointer) {
+ cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
+ (CUdeviceptr)(mem.device_pointer + offset), size));
+ }
+ else {
+ memset((char*)mem.data_pointer + offset, 0, size);
+ }
}
}
void mem_zero(device_memory& mem)
{
+ if(!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
if(mem.data_pointer) {
memset((void*)mem.data_pointer, 0, mem.memory_size());
}
@@ -608,18 +665,18 @@ public:
void mem_free(device_memory& mem)
{
- if(mem.device_pointer) {
- CUDAContextScope scope(this);
- cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
-
- mem.device_pointer = 0;
-
- stats.mem_free(mem.device_size);
- mem.device_size = 0;
+ if(mem.type == MEM_PIXELS && !background) {
+ pixels_free(mem);
+ }
+ else if(mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ }
+ else {
+ generic_free(mem);
}
}
- virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/)
+ virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/)
{
return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset));
}
@@ -635,14 +692,11 @@ public:
cuda_assert(cuMemcpyHtoD(mem, host, size));
}
- void tex_alloc(const char *name,
- device_memory& mem,
- InterpolationType interpolation,
- ExtensionType extension)
+ void tex_alloc(device_memory& mem)
{
CUDAContextScope scope(this);
- VLOG(1) << "Texture allocate: " << name << ", "
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
@@ -650,12 +704,12 @@ public:
bool has_bindless_textures = info.has_bindless_textures;
/* General variables for both architectures */
- string bind_name = name;
+ string bind_name = mem.name;
size_t dsize = datatype_size(mem.data_type);
size_t size = mem.memory_size();
CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
- switch(extension) {
+ switch(mem.extension) {
case EXTENSION_REPEAT:
address_mode = CU_TR_ADDRESS_MODE_WRAP;
break;
@@ -671,7 +725,7 @@ public:
}
CUfilter_mode filter_mode;
- if(interpolation == INTERPOLATION_CLOSEST) {
+ if(mem.interpolation == INTERPOLATION_CLOSEST) {
filter_mode = CU_TR_FILTER_MODE_POINT;
}
else {
@@ -681,13 +735,13 @@ public:
/* General variables for Fermi */
CUtexref texref = NULL;
- if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) {
+ if(!has_bindless_textures && mem.interpolation != INTERPOLATION_NONE) {
if(mem.data_depth > 1) {
/* Kernel uses different bind names for 2d and 3d float textures,
* so we have to adjust couple of things here.
*/
vector<string> tokens;
- string_split(tokens, name, "_");
+ string_split(tokens, mem.name, "_");
bind_name = string_printf("__tex_image_%s_3d_%s",
tokens[2].c_str(),
tokens[3].c_str());
@@ -700,10 +754,10 @@ public:
}
}
- if(interpolation == INTERPOLATION_NONE) {
+ if(mem.interpolation == INTERPOLATION_NONE) {
/* Data Storage */
- mem_alloc(NULL, mem, MEM_READ_ONLY);
- mem_copy_to(mem);
+ generic_alloc(mem);
+ generic_copy_to(mem);
CUdeviceptr cumem;
size_t cubytes;
@@ -802,9 +856,9 @@ public:
if(has_bindless_textures) {
/* Bindless Textures - Kepler */
int flat_slot = 0;
- if(string_startswith(name, "__tex_image")) {
- int pos = string(name).rfind("_");
- flat_slot = atoi(name + pos + 1);
+ if(string_startswith(mem.name, "__tex_image")) {
+ int pos = string(mem.name).rfind("_");
+ flat_slot = atoi(mem.name + pos + 1);
}
else {
assert(0);
@@ -843,8 +897,8 @@ public:
TextureInfo& info = texture_info[flat_slot];
info.data = (uint64_t)tex;
info.cl_buffer = 0;
- info.interpolation = interpolation;
- info.extension = extension;
+ info.interpolation = mem.interpolation;
+ info.extension = mem.extension;
info.width = mem.data_width;
info.height = mem.data_height;
info.depth = mem.data_depth;
@@ -869,7 +923,7 @@ public:
}
/* Fermi and Kepler */
- tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE);
+ tex_interp_map[mem.device_pointer] = (mem.interpolation != INTERPOLATION_NONE);
}
void tex_free(device_memory& mem)
@@ -893,21 +947,19 @@ public:
}
else {
tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
- mem_free(mem);
+ generic_free(mem);
}
}
}
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
{
- mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY);
-
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
for(int i = 0; i < 9; i++) {
tiles->buffers[i] = buffers[i];
}
- mem_copy_to(task->tiles_mem);
+ task->tiles_mem.copy_to_device();
return !have_error();
}
@@ -1274,7 +1326,7 @@ public:
task.unmap_neighbor_tiles(rtiles, this);
}
- void path_trace(DeviceTask& task, RenderTile& rtile)
+ void path_trace(DeviceTask& task, RenderTile& rtile, device_vector<WorkTile>& work_tiles)
{
if(have_error())
return;
@@ -1297,8 +1349,7 @@ public:
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
/* Allocate work tile. */
- device_vector<WorkTile> work_tiles;
- work_tiles.resize(1);
+ work_tiles.alloc(1);
WorkTile *wtile = work_tiles.get_data();
wtile->x = rtile.x;
@@ -1308,9 +1359,6 @@ public:
wtile->offset = rtile.offset;
wtile->stride = rtile.stride;
wtile->buffer = (float*)cuda_device_ptr(rtile.buffer);
- mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY);
-
- CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
/* Prepare work size. More step samples render faster, but for now we
* remain conservative for GPUs connected to a display to avoid driver
@@ -1331,8 +1379,9 @@ public:
/* Setup and copy work tile to device. */
wtile->start_sample = sample;
wtile->num_samples = min(step_samples, end_sample - sample);;
- mem_copy_to(work_tiles);
+ work_tiles.copy_to_device();
+ CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
uint num_blocks = divide_up(total_work_size, num_threads_per_block);
@@ -1356,8 +1405,6 @@ public:
break;
}
}
-
- mem_free(work_tiles);
}
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
@@ -1510,98 +1557,82 @@ public:
void pixels_alloc(device_memory& mem)
{
- if(!background) {
- PixelMem pmem;
+ PixelMem pmem;
- pmem.w = mem.data_width;
- pmem.h = mem.data_height;
+ pmem.w = mem.data_width;
+ pmem.h = mem.data_height;
- CUDAContextScope scope(this);
+ CUDAContextScope scope(this);
- glGenBuffers(1, &pmem.cuPBO);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- if(mem.data_type == TYPE_HALF)
- glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW);
- else
- glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW);
+ glGenBuffers(1, &pmem.cuPBO);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ if(mem.data_type == TYPE_HALF)
+ glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW);
+ else
+ glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
- glGenTextures(1, &pmem.cuTexId);
- glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
- if(mem.data_type == TYPE_HALF)
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
- else
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glBindTexture(GL_TEXTURE_2D, 0);
+ glGenTextures(1, &pmem.cuTexId);
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+ if(mem.data_type == TYPE_HALF)
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
+ else
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glBindTexture(GL_TEXTURE_2D, 0);
- CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
+ CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
- if(result == CUDA_SUCCESS) {
- mem.device_pointer = pmem.cuTexId;
- pixel_mem_map[mem.device_pointer] = pmem;
+ if(result == CUDA_SUCCESS) {
+ mem.device_pointer = pmem.cuTexId;
+ pixel_mem_map[mem.device_pointer] = pmem;
- mem.device_size = mem.memory_size();
- stats.mem_alloc(mem.device_size);
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
- return;
- }
- else {
- /* failed to register buffer, fallback to no interop */
- glDeleteBuffers(1, &pmem.cuPBO);
- glDeleteTextures(1, &pmem.cuTexId);
-
- background = true;
- }
+ return;
}
+ else {
+ /* failed to register buffer, fallback to no interop */
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
- Device::pixels_alloc(mem);
+ background = true;
+ }
}
void pixels_copy_from(device_memory& mem, int y, int w, int h)
{
- if(!background) {
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
-
- CUDAContextScope scope(this);
-
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
- size_t offset = sizeof(uchar)*4*y*w;
- memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
- glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
- return;
- }
+ CUDAContextScope scope(this);
- Device::pixels_copy_from(mem, y, w, h);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
+ size_t offset = sizeof(uchar)*4*y*w;
+ memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
+ glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
void pixels_free(device_memory& mem)
{
if(mem.device_pointer) {
- if(!background) {
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
-
- CUDAContextScope scope(this);
-
- cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
- glDeleteBuffers(1, &pmem.cuPBO);
- glDeleteTextures(1, &pmem.cuTexId);
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
- pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
- mem.device_pointer = 0;
+ CUDAContextScope scope(this);
- stats.mem_free(mem.device_size);
- mem.device_size = 0;
+ cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
- return;
- }
+ pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
+ mem.device_pointer = 0;
- Device::pixels_free(mem);
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
}
}
@@ -1611,6 +1642,8 @@ public:
int dx, int dy, int dw, int dh, bool transparent,
const DeviceDrawParams &draw_params)
{
+ assert(mem.type == MEM_PIXELS);
+
if(!background) {
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
PixelMem pmem = pixel_mem_map[mem.device_pointer];
@@ -1747,15 +1780,17 @@ public:
}
}
+ device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
+
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
if(tile.task == RenderTile::PATH_TRACE) {
if(use_split_kernel()) {
- device_memory void_buffer;
+ device_only_memory<uchar> void_buffer(this, "void_buffer");
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
}
else {
- path_trace(*task, tile);
+ path_trace(*task, tile, work_tiles);
}
}
else if(tile.task == RenderTile::DENOISE) {
@@ -1773,6 +1808,8 @@ public:
break;
}
}
+
+ work_tiles.free();
}
else if(task->type == DeviceTask::SHADER) {
shader(*task);
@@ -1906,9 +1943,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory
{
CUDAContextScope scope(device);
- device_vector<uint64_t> size_buffer;
- size_buffer.resize(1);
- device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
+ device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
+ size_buffer.alloc(1);
+ size_buffer.zero_to_device();
uint threads = num_threads;
CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
@@ -1931,9 +1968,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory
1, 1, 1,
0, 0, (void**)&args, 0));
- device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
+ size_buffer.copy_from_device(0, 1, 1);
size_t size = size_buffer[0];
- device->mem_free(size_buffer);
+ size_buffer.free();
return size;
}
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 619cc1d171e..2d39721e3d3 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -44,7 +44,7 @@ void DenoisingTask::init_from_devicetask(const DeviceTask &task)
void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
{
- tiles = (TilesInfo*) tiles_mem.resize(sizeof(TilesInfo)/sizeof(int));
+ tiles = (TilesInfo*) tiles_mem.alloc(sizeof(TilesInfo)/sizeof(int));
device_ptr buffers[9];
for(int i = 0; i < 9; i++) {
@@ -75,22 +75,21 @@ bool DenoisingTask::run_denoising()
buffer.w = align_up(rect.z - rect.x, 4);
buffer.h = rect.w - rect.y;
buffer.pass_stride = align_up(buffer.w * buffer.h, divide_up(device->mem_address_alignment(), sizeof(float)));
- buffer.mem.resize(buffer.pass_stride * buffer.passes);
- device->mem_alloc("Denoising Pixel Buffer", buffer.mem, MEM_READ_WRITE);
+ buffer.mem.alloc_to_device(buffer.pass_stride * buffer.passes);
device_ptr null_ptr = (device_ptr) 0;
/* Prefilter shadow feature. */
{
- device_sub_ptr unfiltered_a (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr unfiltered_b (device, buffer.mem, 1*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr sample_var (device, buffer.mem, 2*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr sample_var_var (device, buffer.mem, 3*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr buffer_var (device, buffer.mem, 5*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr filtered_var (device, buffer.mem, 6*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr nlm_temporary_1(device, buffer.mem, 7*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr nlm_temporary_2(device, buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr nlm_temporary_3(device, buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride);
+ device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr nlm_temporary_1(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr nlm_temporary_2(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr nlm_temporary_3(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
nlm_state.temporary_1_ptr = *nlm_temporary_1;
nlm_state.temporary_2_ptr = *nlm_temporary_2;
@@ -123,17 +122,17 @@ bool DenoisingTask::run_denoising()
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
/* Combine the two double-filtered halves to a final shadow feature. */
- device_sub_ptr shadow_pass(device, buffer.mem, 4*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride);
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
}
/* Prefilter general features. */
{
- device_sub_ptr unfiltered (device, buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr variance (device, buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr nlm_temporary_1(device, buffer.mem, 10*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr nlm_temporary_2(device, buffer.mem, 11*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr nlm_temporary_3(device, buffer.mem, 12*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr nlm_temporary_1(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr nlm_temporary_2(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr nlm_temporary_3(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride);
nlm_state.temporary_1_ptr = *nlm_temporary_1;
nlm_state.temporary_2_ptr = *nlm_temporary_2;
@@ -143,7 +142,7 @@ bool DenoisingTask::run_denoising()
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
for(int pass = 0; pass < 7; pass++) {
- device_sub_ptr feature_pass(device, buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride);
/* Get the unfiltered pass and its variance from the RenderBuffers. */
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance);
/* Smooth the pass and store the result in the denoising buffers. */
@@ -160,48 +159,41 @@ bool DenoisingTask::run_denoising()
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
- device_only_memory<float> temp_color;
- temp_color.resize(3*buffer.pass_stride);
- device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE);
+ device_only_memory<float> temp_color(device, "Denoising temporary color");
+ temp_color.alloc_to_device(3*buffer.pass_stride);
for(int pass = 0; pass < num_color_passes; pass++) {
- device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr color_pass(temp_color, pass*buffer.pass_stride, buffer.pass_stride);
+ device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
}
{
- device_sub_ptr depth_pass (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr output_pass (device, buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride);
+ device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
+ device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
}
- device->mem_free(temp_color);
+ temp_color.free();
}
storage.w = filter_area.z;
storage.h = filter_area.w;
- storage.transform.resize(storage.w*storage.h*TRANSFORM_SIZE);
- storage.rank.resize(storage.w*storage.h);
- device->mem_alloc("Denoising Transform", storage.transform, MEM_READ_WRITE);
- device->mem_alloc("Denoising Rank", storage.rank, MEM_READ_WRITE);
+ storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE);
+ storage.rank.alloc_to_device(storage.w*storage.h);
functions.construct_transform();
- device_only_memory<float> temporary_1;
- device_only_memory<float> temporary_2;
- temporary_1.resize(buffer.w*buffer.h);
- temporary_2.resize(buffer.w*buffer.h);
- device->mem_alloc("Denoising NLM temporary 1", temporary_1, MEM_READ_WRITE);
- device->mem_alloc("Denoising NLM temporary 2", temporary_2, MEM_READ_WRITE);
+ device_only_memory<float> temporary_1(device, "Denoising NLM temporary 1");
+ device_only_memory<float> temporary_2(device, "Denoising NLM temporary 2");
+ temporary_1.alloc_to_device(buffer.w*buffer.h);
+ temporary_2.alloc_to_device(buffer.w*buffer.h);
reconstruction_state.temporary_1_ptr = temporary_1.device_pointer;
reconstruction_state.temporary_2_ptr = temporary_2.device_pointer;
- storage.XtWX.resize(storage.w*storage.h*XTWX_SIZE);
- storage.XtWY.resize(storage.w*storage.h*XTWY_SIZE);
- device->mem_alloc("Denoising XtWX", storage.XtWX, MEM_READ_WRITE);
- device->mem_alloc("Denoising XtWY", storage.XtWY, MEM_READ_WRITE);
+ storage.XtWX.alloc_to_device(storage.w*storage.h*XTWX_SIZE);
+ storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE);
reconstruction_state.filter_rect = make_int4(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x;
@@ -213,19 +205,19 @@ bool DenoisingTask::run_denoising()
reconstruction_state.source_h = rect.w-rect.y;
{
- device_sub_ptr color_ptr (device, buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
- device_sub_ptr color_var_ptr(device, buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+ device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride);
+ device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride);
functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr);
}
- device->mem_free(storage.XtWX);
- device->mem_free(storage.XtWY);
- device->mem_free(storage.transform);
- device->mem_free(storage.rank);
- device->mem_free(temporary_1);
- device->mem_free(temporary_2);
- device->mem_free(buffer.mem);
- device->mem_free(tiles_mem);
+ storage.XtWX.free();
+ storage.XtWY.free();
+ storage.transform.free();
+ storage.rank.free();
+ temporary_1.free();
+ temporary_2.free();
+ buffer.mem.free();
+ tiles_mem.free();
return true;
}
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index def7b72f67d..606f7422ac8 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -123,9 +123,21 @@ public:
device_only_memory<float3> XtWY;
int w;
int h;
+
+ Storage(Device *device)
+ : transform(device, "denoising transform"),
+ rank(device, "denoising rank"),
+ XtWX(device, "denoising XtWX"),
+ XtWY(device, "denoising XtWY")
+ {}
} storage;
- DenoisingTask(Device *device) : device(device) {}
+ DenoisingTask(Device *device)
+ : tiles_mem(device, "denoising tiles_mem", MEM_READ_WRITE),
+ storage(device),
+ buffer(device),
+ device(device)
+ {}
void init_from_devicetask(const DeviceTask &task);
@@ -137,6 +149,10 @@ public:
int w;
int h;
device_only_memory<float> mem;
+
+ DenoiseBuffers(Device *device)
+ : mem(device, "denoising pixel buffer")
+ {}
} buffer;
protected:
diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp
new file mode 100644
index 00000000000..9f4f60e7531
--- /dev/null
+++ b/intern/cycles/device/device_memory.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/device.h"
+#include "device/device_memory.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Device Memory */
+
+device_memory::device_memory(Device *device, const char *name, MemoryType type)
+: data_type(device_type_traits<uchar>::data_type),
+ data_elements(device_type_traits<uchar>::num_elements),
+ data_pointer(0),
+ data_size(0),
+ device_size(0),
+ data_width(0),
+ data_height(0),
+ data_depth(0),
+ type(type),
+ name(name),
+ interpolation(INTERPOLATION_NONE),
+ extension(EXTENSION_REPEAT),
+ device(device),
+ device_pointer(0)
+{
+}
+
+device_memory::~device_memory()
+{
+}
+
+device_ptr device_memory::host_alloc(size_t size)
+{
+ if(!size) {
+ return 0;
+ }
+
+ size_t alignment = device->mem_address_alignment();
+ device_ptr ptr = (device_ptr)util_aligned_malloc(size, alignment);
+
+ if(ptr) {
+ util_guarded_mem_alloc(size);
+ }
+ else {
+ throw std::bad_alloc();
+ }
+
+ return ptr;
+}
+
+void device_memory::host_free(device_ptr ptr, size_t size)
+{
+ if(ptr) {
+ util_guarded_mem_free(size);
+ util_aligned_free((void*)ptr);
+ }
+}
+
+void device_memory::device_alloc()
+{
+ assert(!device_pointer && type != MEM_TEXTURE);
+ device->mem_alloc(*this);
+}
+
+void device_memory::device_free()
+{
+ if(device_pointer) {
+ device->mem_free(*this);
+ }
+}
+
+void device_memory::device_copy_to()
+{
+ assert(type != MEM_PIXELS && type != MEM_WRITE_ONLY);
+ if(data_size) {
+ device->mem_copy_to(*this);
+ }
+}
+
+void device_memory::device_copy_from(int y, int w, int h, int elem)
+{
+ assert(type != MEM_TEXTURE && type != MEM_READ_ONLY);
+ device->mem_copy_from(*this, y, w, h, elem);
+}
+
+void device_memory::device_zero()
+{
+ assert(type != MEM_PIXELS && type != MEM_WRITE_ONLY);
+ if(data_size) {
+ device->mem_zero(*this);
+ }
+}
+
+/* Device Sub Ptr */
+
+device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size)
+: device(mem.device)
+{
+ ptr = device->mem_alloc_sub_ptr(mem, offset, size);
+}
+
+device_sub_ptr::~device_sub_ptr()
+{
+ device->mem_free_sub_ptr(ptr);
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index eeeca61496e..7bf8bdc1cea 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -19,17 +19,11 @@
/* Device Memory
*
- * This file defines data types that can be used in device memory arrays, and
- * a device_vector<T> type to store such arrays.
- *
- * device_vector<T> contains an STL vector, metadata about the data type,
- * dimensions, elements, and a device pointer. For the CPU device this is just
- * a pointer to the STL vector data, as no copying needs to take place. For
- * other devices this is a pointer to device memory, where we will copy memory
- * to and from. */
+ * Data types for allocating, copying and freeing device memory. */
#include "util/util_debug.h"
#include "util/util_half.h"
+#include "util/util_texture.h"
#include "util/util_types.h"
#include "util/util_vector.h"
@@ -40,7 +34,9 @@ class Device;
enum MemoryType {
MEM_READ_ONLY,
MEM_WRITE_ONLY,
- MEM_READ_WRITE
+ MEM_READ_WRITE,
+ MEM_TEXTURE,
+ MEM_PIXELS
};
/* Supported Data Types */
@@ -171,7 +167,10 @@ template<> struct device_type_traits<uint64_t> {
static const int num_elements = 1;
};
-/* Device Memory */
+/* Device Memory
+ *
+ * Base class for all device memory. This should not be allocated directly,
+ * instead the appropriate subclass can be used. */
class device_memory
{
@@ -181,7 +180,7 @@ public:
return elements*data_elements*datatype_size(data_type);
}
- /* data information */
+ /* Data information. */
DataType data_type;
int data_elements;
device_ptr data_pointer;
@@ -190,58 +189,93 @@ public:
size_t data_width;
size_t data_height;
size_t data_depth;
+ MemoryType type;
+ const char *name;
+ InterpolationType interpolation;
+ ExtensionType extension;
- /* device pointer */
+ /* Device pointer. */
+ Device *device;
device_ptr device_pointer;
- device_memory()
- {
- data_type = device_type_traits<uchar>::data_type;
- data_elements = device_type_traits<uchar>::num_elements;
- data_pointer = 0;
- data_size = 0;
- device_size = 0;
- data_width = 0;
- data_height = 0;
- data_depth = 0;
- device_pointer = 0;
- }
- virtual ~device_memory() { assert(!device_pointer); }
-
- void resize(size_t size)
- {
- data_size = size;
- data_width = size;
- }
+ virtual ~device_memory();
protected:
- /* no copying */
+ /* Only create through subclasses. */
+ device_memory(Device *device, const char *name, MemoryType type);
+
+ /* No copying allowed. */
device_memory(const device_memory&);
device_memory& operator = (const device_memory&);
+
+ /* Host allocation on the device. All data_pointer memory should be
+ * allocated with these functions, for devices that support using
+ * the same pointer for host and device. */
+ device_ptr host_alloc(size_t size);
+ void host_free(device_ptr ptr, size_t size);
+
+ /* Device memory allocation and copying. */
+ void device_alloc();
+ void device_free();
+ void device_copy_to();
+ void device_copy_from(int y, int w, int h, int elem);
+ void device_zero();
};
+/* Device Only Memory
+ *
+ * Working memory only needed by the device, with no corresponding allocation
+ * on the host. Only used internally in the device implementations. */
+
template<typename T>
class device_only_memory : public device_memory
{
public:
- device_only_memory()
+ device_only_memory(Device *device, const char *name)
+ : device_memory(device, name, MEM_READ_WRITE)
{
data_type = device_type_traits<T>::data_type;
data_elements = max(device_type_traits<T>::num_elements, 1);
}
- void resize(size_t num)
+ virtual ~device_only_memory()
{
- device_memory::resize(num*sizeof(T));
+ free();
+ }
+
+ void alloc_to_device(size_t num)
+ {
+ data_size = num*sizeof(T);
+ device_alloc();
+ }
+
+ void free()
+ {
+ device_free();
+ }
+
+ void zero_to_device()
+ {
+ device_zero();
}
};
-/* Device Vector */
+/* Device Vector
+ *
+ * Data vector to exchange data between host and device. Memory will be
+ * allocated on the host first with alloc() and resize, and then filled
+ * in and copied to the device with copy_to_device(). Or alternatively
+ * allocated and set to zero on the device with zero_to_device().
+ *
+ * When using memory type MEM_TEXTURE, a pointer to this memory will be
+ * automatically attached to kernel globals, using the provided name
+ * matching an entry in kernel_textures.h. */
template<typename T> class device_vector : public device_memory
{
public:
- device_vector()
+ device_vector(Device *device, const char *name, MemoryType type)
+ : device_memory(device, name, type)
{
data_type = device_type_traits<T>::data_type;
data_elements = device_type_traits<T>::num_elements;
@@ -249,84 +283,175 @@ public:
assert(data_elements > 0);
}
- virtual ~device_vector() {}
+ virtual ~device_vector()
+ {
+ free();
+ }
- /* vector functions */
- T *resize(size_t width, size_t height = 0, size_t depth = 0)
+ /* Host memory allocation. */
+ T *alloc(size_t width, size_t height = 0, size_t depth = 0)
{
- data_size = width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth);
- if(data.resize(data_size) == NULL) {
- clear();
- return NULL;
+ size_t new_size = size(width, height, depth);
+
+ if(new_size != data_size) {
+ device_free();
+ host_free(data_pointer, sizeof(T)*data_size);
+ data_pointer = host_alloc(sizeof(T)*new_size);
}
+
+ data_size = new_size;
data_width = width;
data_height = height;
data_depth = depth;
- if(data_size == 0) {
- data_pointer = 0;
- return NULL;
+ assert(device_ptr == 0);
+
+ return get_data();
+ }
+
+ /* Host memory resize. Only use this if the original data needs to be
+ * preserved, it is faster to call alloc() if it can be discarded. */
+ T *resize(size_t width, size_t height = 0, size_t depth = 0)
+ {
+ size_t new_size = size(width, height, depth);
+
+ if(new_size != data_size) {
+ device_ptr new_ptr = host_alloc(sizeof(T)*new_size);
+
+ if(new_size && data_size) {
+ size_t min_size = ((new_size < data_size)? new_size: data_size);
+ memcpy((T*)new_ptr, (T*)data_pointer, sizeof(T)*min_size);
+ }
+
+ device_free();
+ host_free(data_pointer, sizeof(T)*data_size);
+ data_pointer = new_ptr;
}
- data_pointer = (device_ptr)&data[0];
- return &data[0];
+
+ data_size = new_size;
+ data_width = width;
+ data_height = height;
+ data_depth = depth;
+ assert(device_ptr == 0);
+
+ return get_data();
}
+ /* Take over data from an existing array. */
void steal_data(array<T>& from)
{
- data.steal_data(from);
- data_size = data.size();
- data_pointer = (data_size)? (device_ptr)&data[0]: 0;
- data_width = data_size;
+ device_free();
+ host_free(data_pointer, sizeof(T)*data_size);
+
+ data_size = from.size();
+ data_width = 0;
data_height = 0;
data_depth = 0;
+ data_pointer = (device_ptr)from.steal_pointer();
+ assert(device_pointer == 0);
}
- void clear()
+ /* Free device and host memory. */
+ void free()
{
- data.clear();
- data_pointer = 0;
+ device_free();
+ host_free(data_pointer, sizeof(T)*data_size);
+
+ data_size = 0;
data_width = 0;
data_height = 0;
data_depth = 0;
- data_size = 0;
- device_pointer = 0;
+ data_pointer = 0;
+ assert(device_pointer == 0);
}
size_t size()
{
- return data.size();
+ return data_size;
}
T* get_data()
{
- return &data[0];
+ return (T*)data_pointer;
}
T& operator[](size_t i)
{
- return data[i];
+ assert(i < data_size);
+ return get_data()[i];
+ }
+
+ void copy_to_device()
+ {
+ device_copy_to();
+ }
+
+ void copy_from_device(int y, int w, int h)
+ {
+ device_copy_from(y, w, h, sizeof(T));
+ }
+
+ void zero_to_device()
+ {
+ device_zero();
+ }
+
+protected:
+ size_t size(size_t width, size_t height, size_t depth)
+ {
+ return width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth);
+ }
+};
+
+/* Pixel Memory
+ *
+ * Device memory to efficiently draw as pixels to the screen in interactive
+ * rendering. Only copying pixels from the device is supported, not copying to. */
+
+template<typename T> class device_pixels : public device_vector<T>
+{
+public:
+ device_pixels(Device *device, const char *name)
+ : device_vector<T>(device, name, MEM_PIXELS)
+ {
+ }
+
+ void alloc_to_device(size_t width, size_t height, size_t depth = 0)
+ {
+ device_vector<T>::alloc(width, height, depth);
+ device_memory::device_alloc();
}
-private:
- array<T> data;
+ T *copy_from_device(int y, int w, int h)
+ {
+ device_memory::device_copy_from(y, w, h, sizeof(T));
+ return device_vector<T>::get_data();
+ }
};
-/* A device_sub_ptr is a pointer into another existing memory.
- * Therefore, it is not allocated separately, but just created from the already allocated base memory.
- * It is freed automatically when it goes out of scope, which should happen before the base memory is freed.
- * Note that some devices require the offset and size of the sub_ptr to be properly aligned. */
+/* Device Sub Memory
+ *
+ * Pointer into existing memory. It is not allocated separately, but created
+ * from an already allocated base memory. It is freed automatically when it
+ * goes out of scope, which should happen before base memory is freed.
+ *
+ * Note: some devices require offset and size of the sub_ptr to be properly
+ * aligned to device->mem_address_alingment(). */
+
class device_sub_ptr
{
public:
- device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type);
+ device_sub_ptr(device_memory& mem, int offset, int size);
~device_sub_ptr();
- /* No copying. */
- device_sub_ptr& operator = (const device_sub_ptr&);
device_ptr operator*() const
{
return ptr;
}
+
protected:
+ /* No copying. */
+ device_sub_ptr& operator = (const device_sub_ptr&);
+
Device *device;
device_ptr ptr;
};
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 1b1a577d52f..db84696bcb4 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -43,10 +43,10 @@ public:
};
list<SubDevice> devices;
- device_ptr unique_ptr;
+ device_ptr unique_key;
MultiDevice(DeviceInfo& info, Stats &stats, bool background_)
- : Device(info, stats, background_), unique_ptr(1)
+ : Device(info, stats, background_), unique_key(1)
{
Device *device;
@@ -106,70 +106,89 @@ public:
return true;
}
- void mem_alloc(const char *name, device_memory& mem, MemoryType type)
+ void mem_alloc(device_memory& mem)
{
+ device_ptr key = unique_key++;
+
foreach(SubDevice& sub, devices) {
+ mem.device = sub.device;
mem.device_pointer = 0;
- sub.device->mem_alloc(name, mem, type);
- sub.ptr_map[unique_ptr] = mem.device_pointer;
+
+ sub.device->mem_alloc(mem);
+ sub.ptr_map[key] = mem.device_pointer;
}
- mem.device_pointer = unique_ptr++;
- stats.mem_alloc(mem.device_size);
+ mem.device = this;
+ mem.device_pointer = key;
}
void mem_copy_to(device_memory& mem)
{
- device_ptr tmp = mem.device_pointer;
+ device_ptr existing_key = mem.device_pointer;
+ device_ptr key = (existing_key)? existing_key: unique_key++;
foreach(SubDevice& sub, devices) {
- mem.device_pointer = sub.ptr_map[tmp];
+ mem.device = sub.device;
+ mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
+
sub.device->mem_copy_to(mem);
+ sub.ptr_map[key] = mem.device_pointer;
}
- mem.device_pointer = tmp;
+ mem.device = this;
+ mem.device_pointer = key;
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
{
- device_ptr tmp = mem.device_pointer;
+ device_ptr key = mem.device_pointer;
int i = 0, sub_h = h/devices.size();
foreach(SubDevice& sub, devices) {
int sy = y + i*sub_h;
int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
- mem.device_pointer = sub.ptr_map[tmp];
+ mem.device = sub.device;
+ mem.device_pointer = sub.ptr_map[key];
+
sub.device->mem_copy_from(mem, sy, w, sh, elem);
i++;
}
- mem.device_pointer = tmp;
+ mem.device = this;
+ mem.device_pointer = key;
}
void mem_zero(device_memory& mem)
{
- device_ptr tmp = mem.device_pointer;
+ device_ptr existing_key = mem.device_pointer;
+ device_ptr key = (existing_key)? existing_key: unique_key++;
foreach(SubDevice& sub, devices) {
- mem.device_pointer = sub.ptr_map[tmp];
+ mem.device = sub.device;
+ mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
+
sub.device->mem_zero(mem);
+ sub.ptr_map[key] = mem.device_pointer;
}
- mem.device_pointer = tmp;
+ mem.device = this;
+ mem.device_pointer = key;
}
void mem_free(device_memory& mem)
{
- device_ptr tmp = mem.device_pointer;
- stats.mem_free(mem.device_size);
+ device_ptr key = mem.device_pointer;
foreach(SubDevice& sub, devices) {
- mem.device_pointer = sub.ptr_map[tmp];
+ mem.device = sub.device;
+ mem.device_pointer = sub.ptr_map[key];
+
sub.device->mem_free(mem);
- sub.ptr_map.erase(sub.ptr_map.find(tmp));
+ sub.ptr_map.erase(sub.ptr_map.find(key));
}
+ mem.device = this;
mem.device_pointer = 0;
}
@@ -179,88 +198,13 @@ public:
sub.device->const_copy_to(name, host, size);
}
- void tex_alloc(const char *name,
- device_memory& mem,
- InterpolationType
- interpolation,
- ExtensionType extension)
- {
- VLOG(1) << "Texture allocate: " << name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
-
- foreach(SubDevice& sub, devices) {
- mem.device_pointer = 0;
- sub.device->tex_alloc(name, mem, interpolation, extension);
- sub.ptr_map[unique_ptr] = mem.device_pointer;
- }
-
- mem.device_pointer = unique_ptr++;
- stats.mem_alloc(mem.device_size);
- }
-
- void tex_free(device_memory& mem)
- {
- device_ptr tmp = mem.device_pointer;
- stats.mem_free(mem.device_size);
-
- foreach(SubDevice& sub, devices) {
- mem.device_pointer = sub.ptr_map[tmp];
- sub.device->tex_free(mem);
- sub.ptr_map.erase(sub.ptr_map.find(tmp));
- }
-
- mem.device_pointer = 0;
- }
-
- void pixels_alloc(device_memory& mem)
- {
- foreach(SubDevice& sub, devices) {
- mem.device_pointer = 0;
- sub.device->pixels_alloc(mem);
- sub.ptr_map[unique_ptr] = mem.device_pointer;
- }
-
- mem.device_pointer = unique_ptr++;
- }
-
- void pixels_free(device_memory& mem)
- {
- device_ptr tmp = mem.device_pointer;
-
- foreach(SubDevice& sub, devices) {
- mem.device_pointer = sub.ptr_map[tmp];
- sub.device->pixels_free(mem);
- sub.ptr_map.erase(sub.ptr_map.find(tmp));
- }
-
- mem.device_pointer = 0;
- }
-
- void pixels_copy_from(device_memory& mem, int y, int w, int h)
- {
- device_ptr tmp = mem.device_pointer;
- int i = 0, sub_h = h/devices.size();
-
- foreach(SubDevice& sub, devices) {
- int sy = y + i*sub_h;
- int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
-
- mem.device_pointer = sub.ptr_map[tmp];
- sub.device->pixels_copy_from(mem, sy, w, sh);
- i++;
- }
-
- mem.device_pointer = tmp;
- }
-
void draw_pixels(
device_memory& rgba, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params)
{
- device_ptr tmp = rgba.device_pointer;
+ device_ptr key = rgba.device_pointer;
int i = 0, sub_h = h/devices.size();
int sub_height = height/devices.size();
@@ -271,12 +215,12 @@ public:
int sdy = dy + i*sub_height;
/* adjust math for w/width */
- rgba.device_pointer = sub.ptr_map[tmp];
+ rgba.device_pointer = sub.ptr_map[key];
sub.device->draw_pixels(rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
i++;
}
- rgba.device_pointer = tmp;
+ rgba.device_pointer = key;
}
void map_tile(Device *sub_device, RenderTile& tile)
@@ -311,15 +255,21 @@ public:
* to the current device now, for the duration of the denoising task.
* Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */
- if(tiles[i].buffers->device != sub_device) {
- device_vector<float> &mem = tiles[i].buffers->buffer;
-
+ device_vector<float> &mem = tiles[i].buffers->buffer;
+ if(mem.device != sub_device) {
tiles[i].buffers->copy_from_device();
+
+ Device *original_device = mem.device;
device_ptr original_ptr = mem.device_pointer;
+
+ mem.device = sub_device;
mem.device_pointer = 0;
- sub_device->mem_alloc("Temporary memory for neighboring tile", mem, MEM_READ_WRITE);
+
+ sub_device->mem_alloc(mem);
sub_device->mem_copy_to(mem);
tiles[i].buffer = mem.device_pointer;
+
+ mem.device = original_device;
mem.device_pointer = original_ptr;
}
}
@@ -331,25 +281,30 @@ public:
if(!tiles[i].buffers) {
continue;
}
- if(tiles[i].buffers->device != sub_device) {
- device_vector<float> &mem = tiles[i].buffers->buffer;
+ device_vector<float> &mem = tiles[i].buffers->buffer;
+ if(mem.device != sub_device) {
+ Device *original_device = mem.device;
device_ptr original_ptr = mem.device_pointer;
+ size_t original_size = mem.device_size;
+
+ mem.device = sub_device;
mem.device_pointer = tiles[i].buffer;
/* Copy denoised tile to the host. */
if(i == 4) {
- tiles[i].buffers->copy_from_device(sub_device);
+ tiles[i].buffers->copy_from_device();
}
- size_t mem_size = mem.device_size;
sub_device->mem_free(mem);
+
+ mem.device = original_device;
mem.device_pointer = original_ptr;
- mem.device_size = mem_size;
+ mem.device_size = original_size;
/* Copy denoised tile to the original device. */
if(i == 4) {
- tiles[i].buffers->device->mem_copy_to(mem);
+ mem.copy_to_device();
}
}
}
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index ced10c98dc9..fa231c817e6 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -87,10 +87,10 @@ public:
snd.write();
}
- void mem_alloc(const char *name, device_memory& mem, MemoryType type)
+ void mem_alloc(device_memory& mem)
{
- if(name) {
- VLOG(1) << "Buffer allocate: " << name << ", "
+ if(mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
}
@@ -100,9 +100,7 @@ public:
mem.device_pointer = ++mem_counter;
RPCSend snd(socket, &error_func, "mem_alloc");
-
snd.add(mem);
- snd.add(type);
snd.write();
}
@@ -174,45 +172,6 @@ public:
snd.write_buffer(host, size);
}
- void tex_alloc(const char *name,
- device_memory& mem,
- InterpolationType interpolation,
- ExtensionType extension)
- {
- VLOG(1) << "Texture allocate: " << name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
-
- thread_scoped_lock lock(rpc_lock);
-
- mem.device_pointer = ++mem_counter;
-
- RPCSend snd(socket, &error_func, "tex_alloc");
-
- string name_string(name);
-
- snd.add(name_string);
- snd.add(mem);
- snd.add(interpolation);
- snd.add(extension);
- snd.write();
- snd.write_buffer((void*)mem.data_pointer, mem.memory_size());
- }
-
- void tex_free(device_memory& mem)
- {
- if(mem.device_pointer) {
- thread_scoped_lock lock(rpc_lock);
-
- RPCSend snd(socket, &error_func, "tex_free");
-
- snd.add(mem);
- snd.write();
-
- mem.device_pointer = 0;
- }
- }
-
bool load_kernels(const DeviceRequestedFeatures& requested_features)
{
if(error_func.have_error())
@@ -321,7 +280,7 @@ public:
snd.write();
}
- int get_split_task_count(DeviceTask& task)
+ int get_split_task_count(DeviceTask&)
{
return 1;
}
@@ -348,6 +307,7 @@ void device_network_info(vector<DeviceInfo>& devices)
info.advanced_shading = true;
info.has_volume_decoupled = false;
info.has_qbvh = false;
+ info.has_osl = false;
devices.push_back(info);
}
@@ -469,61 +429,64 @@ protected:
void process(RPCReceive& rcv, thread_scoped_lock &lock)
{
if(rcv.name == "mem_alloc") {
- MemoryType type;
- network_device_memory mem;
- device_ptr client_pointer;
-
- rcv.read(mem);
- rcv.read(type);
-
+ string name;
+ network_device_memory mem(device);
+ rcv.read(mem, name);
lock.unlock();
- client_pointer = mem.device_pointer;
-
- /* create a memory buffer for the device buffer */
+ /* Allocate host side data buffer. */
size_t data_size = mem.memory_size();
- DataVector &data_v = data_vector_insert(client_pointer, data_size);
+ device_ptr client_pointer = mem.device_pointer;
- if(data_size)
- mem.data_pointer = (device_ptr)&(data_v[0]);
- else
- mem.data_pointer = 0;
+ DataVector &data_v = data_vector_insert(client_pointer, data_size);
+ mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0;
- /* perform the allocation on the actual device */
- device->mem_alloc(NULL, mem, type);
+ /* Perform the allocation on the actual device. */
+ device->mem_alloc(mem);
- /* store a mapping to/from client_pointer and real device pointer */
+ /* Store a mapping to/from client_pointer and real device pointer. */
pointer_mapping_insert(client_pointer, mem.device_pointer);
}
else if(rcv.name == "mem_copy_to") {
- network_device_memory mem;
-
- rcv.read(mem);
+ string name;
+ network_device_memory mem(device);
+ rcv.read(mem, name);
lock.unlock();
+ size_t data_size = mem.memory_size();
device_ptr client_pointer = mem.device_pointer;
- DataVector &data_v = data_vector_find(client_pointer);
-
- size_t data_size = mem.memory_size();
+ if(client_pointer) {
+ /* Lookup existing host side data buffer. */
+ DataVector &data_v = data_vector_find(client_pointer);
+ mem.data_pointer = (device_ptr)&data_v[0];
- /* get pointer to memory buffer for device buffer */
- mem.data_pointer = (device_ptr)&data_v[0];
+ /* Translate the client pointer to a real device pointer. */
+ mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
+ }
+ else {
+ /* Allocate host side data buffer. */
+ DataVector &data_v = data_vector_insert(client_pointer, data_size);
+ mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0;
+ }
- /* copy data from network into memory buffer */
+ /* Copy data from network into memory buffer. */
rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);
- /* translate the client pointer to a real device pointer */
- mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
-
- /* copy the data from the memory buffer to the device buffer */
+ /* Copy the data from the memory buffer to the device buffer. */
device->mem_copy_to(mem);
+
+ if(!client_pointer) {
+ /* Store a mapping to/from client_pointer and real device pointer. */
+ pointer_mapping_insert(client_pointer, mem.device_pointer);
+ }
}
else if(rcv.name == "mem_copy_from") {
- network_device_memory mem;
+ string name;
+ network_device_memory mem(device);
int y, w, h, elem;
- rcv.read(mem);
+ rcv.read(mem, name);
rcv.read(y);
rcv.read(w);
rcv.read(h);
@@ -546,28 +509,44 @@ protected:
lock.unlock();
}
else if(rcv.name == "mem_zero") {
- network_device_memory mem;
-
- rcv.read(mem);
+ string name;
+ network_device_memory mem(device);
+ rcv.read(mem, name);
lock.unlock();
+ size_t data_size = mem.memory_size();
device_ptr client_pointer = mem.device_pointer;
- mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
- DataVector &data_v = data_vector_find(client_pointer);
+ if(client_pointer) {
+ /* Lookup existing host side data buffer. */
+ DataVector &data_v = data_vector_find(client_pointer);
+ mem.data_pointer = (device_ptr)&data_v[0];
- mem.data_pointer = (device_ptr)&(data_v[0]);
+ /* Translate the client pointer to a real device pointer. */
+ mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
+ }
+ else {
+ /* Allocate host side data buffer. */
+ DataVector &data_v = data_vector_insert(client_pointer, data_size);
+ mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0;
+ }
+ /* Zero memory. */
device->mem_zero(mem);
+
+ if(!client_pointer) {
+ /* Store a mapping to/from client_pointer and real device pointer. */
+ pointer_mapping_insert(client_pointer, mem.device_pointer);
+ }
}
else if(rcv.name == "mem_free") {
- network_device_memory mem;
- device_ptr client_pointer;
+ string name;
+ network_device_memory mem(device);
- rcv.read(mem);
+ rcv.read(mem, name);
lock.unlock();
- client_pointer = mem.device_pointer;
+ device_ptr client_pointer = mem.device_pointer;
mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer);
@@ -586,49 +565,6 @@ protected:
device->const_copy_to(name_string.c_str(), &host_vector[0], size);
}
- else if(rcv.name == "tex_alloc") {
- network_device_memory mem;
- string name;
- InterpolationType interpolation;
- ExtensionType extension_type;
- device_ptr client_pointer;
-
- rcv.read(name);
- rcv.read(mem);
- rcv.read(interpolation);
- rcv.read(extension_type);
- lock.unlock();
-
- client_pointer = mem.device_pointer;
-
- size_t data_size = mem.memory_size();
-
- DataVector &data_v = data_vector_insert(client_pointer, data_size);
-
- if(data_size)
- mem.data_pointer = (device_ptr)&(data_v[0]);
- else
- mem.data_pointer = 0;
-
- rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);
-
- device->tex_alloc(name.c_str(), mem, interpolation, extension_type);
-
- pointer_mapping_insert(client_pointer, mem.device_pointer);
- }
- else if(rcv.name == "tex_free") {
- network_device_memory mem;
- device_ptr client_pointer;
-
- rcv.read(mem);
- lock.unlock();
-
- client_pointer = mem.device_pointer;
-
- mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer);
-
- device->tex_free(mem);
- }
else if(rcv.name == "load_kernels") {
DeviceRequestedFeatures requested_features;
rcv.read(requested_features.experimental);
@@ -713,7 +649,7 @@ protected:
}
}
- bool task_acquire_tile(Device *device, RenderTile& tile)
+ bool task_acquire_tile(Device *, RenderTile& tile)
{
thread_scoped_lock acquire_lock(acquire_mutex);
diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h
index 3d3bd99dfe7..a38d962c0af 100644
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -38,6 +38,7 @@
#include "util/util_foreach.h"
#include "util/util_list.h"
#include "util/util_map.h"
+#include "util/util_param.h"
#include "util/util_string.h"
CCL_NAMESPACE_BEGIN
@@ -68,8 +69,15 @@ typedef boost::archive::binary_iarchive i_archive;
class network_device_memory : public device_memory
{
public:
- network_device_memory() {}
- ~network_device_memory() { device_pointer = 0; };
+ network_device_memory(Device *device)
+ : device_memory(device, "", MEM_READ_ONLY)
+ {
+ }
+
+ ~network_device_memory()
+ {
+ device_pointer = 0;
+ };
vector<char> local_data;
};
@@ -119,6 +127,9 @@ public:
{
archive & mem.data_type & mem.data_elements & mem.data_size;
archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
+ archive & mem.type & string(mem.name);
+ archive & mem.interpolation & mem.extension;
+ archive & mem.device_pointer;
}
template<typename T> void add(const T& data)
@@ -258,12 +269,21 @@ public:
delete archive_stream;
}
- void read(network_device_memory& mem)
+ void read(network_device_memory& mem, string& name)
{
*archive & mem.data_type & mem.data_elements & mem.data_size;
*archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
+ *archive & mem.type & name;
+ *archive & mem.interpolation & mem.extension;
+ *archive & mem.device_pointer;
+ mem.name = name.c_str();
mem.data_pointer = 0;
+
+ /* Can't transfer OpenGL texture over network. */
+ if(mem.type == MEM_PIXELS) {
+ mem.type = MEM_WRITE_ONLY;
+ }
}
template<typename T> void read(T& data)
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index d2b3a89fa98..f2839a8b1b9 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -26,7 +26,13 @@ CCL_NAMESPACE_BEGIN
static const double alpha = 0.1; /* alpha for rolling average */
-DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
+DeviceSplitKernel::DeviceSplitKernel(Device *device)
+: device(device),
+ split_data(device, "split_data"),
+ ray_state(device, "ray_state", MEM_READ_WRITE),
+ queue_index(device, "queue_index"),
+ use_queues_flag(device, "use_queues_flag"),
+ work_pool_wgs(device, "work_pool_wgs")
{
current_max_closure = -1;
first_tile = true;
@@ -55,11 +61,11 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
DeviceSplitKernel::~DeviceSplitKernel()
{
- device->mem_free(split_data);
- device->mem_free(ray_state);
- device->mem_free(use_queues_flag);
- device->mem_free(queue_index);
- device->mem_free(work_pool_wgs);
+ split_data.free();
+ ray_state.free();
+ use_queues_flag.free();
+ queue_index.free();
+ work_pool_wgs.free();
delete kernel_path_init;
delete kernel_scene_intersect;
@@ -169,20 +175,11 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
/* Allocate work_pool_wgs memory. */
- work_pool_wgs.resize(max_work_groups);
- device->mem_alloc("work_pool_wgs", work_pool_wgs, MEM_READ_WRITE);
-
- queue_index.resize(NUM_QUEUES);
- device->mem_alloc("queue_index", queue_index, MEM_READ_WRITE);
-
- use_queues_flag.resize(1);
- device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE);
-
- ray_state.resize(num_global_elements);
- device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE);
-
- split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
- device->mem_alloc("split_data", split_data, MEM_READ_WRITE);
+ work_pool_wgs.alloc_to_device(max_work_groups);
+ queue_index.alloc_to_device(NUM_QUEUES);
+ use_queues_flag.alloc_to_device(1);
+ split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
+ ray_state.alloc(num_global_elements);
}
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
@@ -219,9 +216,9 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
/* reset state memory here as global size for data_init
* kernel might not be large enough to do in kernel
*/
- device->mem_zero(work_pool_wgs);
- device->mem_zero(split_data);
- device->mem_zero(ray_state);
+ work_pool_wgs.zero_to_device();
+ split_data.zero_to_device();
+ ray_state.zero_to_device();
if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
subtile,
@@ -278,7 +275,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
}
/* Decide if we should exit path-iteration in host. */
- device->mem_copy_from(ray_state, 0, global_size[0] * global_size[1] * sizeof(char), 1, 1);
+ ray_state.copy_from_device(0, global_size[0] * global_size[1], 1);
activeRaysAvailable = false;
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 9c42cb58520..0647c664447 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -79,7 +79,7 @@ private:
* kernel will be available to another kernel via this global
* memory.
*/
- device_memory split_data;
+ device_only_memory<uchar> split_data;
device_vector<uchar> ray_state;
device_only_memory<int> queue_index; /* Array of size num_queues that tracks the size of each queue. */
diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp
index b67dfef88aa..a791b374774 100644
--- a/intern/cycles/device/opencl/memory_manager.cpp
+++ b/intern/cycles/device/opencl/memory_manager.cpp
@@ -73,10 +73,10 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
return;
}
- device_memory *new_buffer = new device_memory;
+ device_only_memory<uchar> *new_buffer =
+ new device_only_memory<uchar>(device, "memory manager buffer");
- new_buffer->resize(total_size);
- device->mem_alloc(string_printf("buffer_%p", this).data(), *new_buffer, MEM_READ_ONLY);
+ new_buffer->alloc_to_device(total_size);
size_t offset = 0;
@@ -110,7 +110,6 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
offset += allocation->size;
}
- device->mem_free(*buffer);
delete buffer;
buffer = new_buffer;
@@ -143,9 +142,9 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
clFinish(device->cqCommandQueue);
}
-void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *device)
+void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *)
{
- device->mem_free(*buffer);
+ buffer->free();
}
MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer()
@@ -161,8 +160,13 @@ MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer()
return smallest;
}
-MemoryManager::MemoryManager(OpenCLDeviceBase *device) : device(device), need_update(false)
+MemoryManager::MemoryManager(OpenCLDeviceBase *device)
+: device(device), need_update(false)
{
+ foreach(DeviceBuffer& device_buffer, device_buffers) {
+ device_buffer.buffer =
+ new device_only_memory<uchar>(device, "memory manager buffer");
+ }
}
void MemoryManager::free()
diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h
index 3714405d026..b3d861275f0 100644
--- a/intern/cycles/device/opencl/memory_manager.h
+++ b/intern/cycles/device/opencl/memory_manager.h
@@ -56,15 +56,17 @@ private:
};
struct DeviceBuffer {
- device_memory *buffer;
+ device_only_memory<uchar> *buffer;
vector<Allocation*> allocations;
size_t size; /* Size of all allocations. */
- DeviceBuffer() : buffer(new device_memory), size(0)
+ DeviceBuffer()
+ : buffer(NULL), size(0)
{
}
- ~DeviceBuffer() {
+ ~DeviceBuffer()
+ {
delete buffer;
buffer = NULL;
}
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index bd956e29083..55848c8112d 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -340,7 +340,7 @@ public:
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
vector<OpenCLProgram*> &programs) = 0;
- void mem_alloc(const char *name, device_memory& mem, MemoryType type);
+ void mem_alloc(device_memory& mem);
void mem_copy_to(device_memory& mem);
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
void mem_zero(device_memory& mem);
@@ -349,10 +349,7 @@ public:
int mem_address_alignment();
void const_copy_to(const char *name, void *host, size_t size);
- void tex_alloc(const char *name,
- device_memory& mem,
- InterpolationType /*interpolation*/,
- ExtensionType /*extension*/);
+ void tex_alloc(device_memory& mem);
void tex_free(device_memory& mem);
size_t global_size_round_up(int group_size, int global_size);
@@ -440,7 +437,7 @@ protected:
bool denoising_set_tiles(device_ptr *buffers,
DenoisingTask *task);
- device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type);
+ device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size);
void mem_free_sub_ptr(device_ptr ptr);
class ArgumentWrapper {
@@ -461,6 +458,11 @@ protected:
}
template<typename T>
+ ArgumentWrapper(device_only_memory<T>& argument) : size(sizeof(void*)),
+ pointer((void*)(&argument.device_pointer))
+ {
+ }
+ template<typename T>
ArgumentWrapper(T& argument) : size(sizeof(argument)),
pointer(&argument)
{
@@ -546,25 +548,9 @@ private:
friend class MemoryManager;
static_assert_align(TextureInfo, 16);
+ device_vector<TextureInfo> texture_info;
- vector<TextureInfo> texture_info;
- device_memory texture_info_buffer;
-
- struct Texture {
- Texture() {}
- Texture(device_memory* mem,
- InterpolationType interpolation,
- ExtensionType extension)
- : mem(mem),
- interpolation(interpolation),
- extension(extension) {
- }
- device_memory* mem;
- InterpolationType interpolation;
- ExtensionType extension;
- };
-
- typedef map<string, Texture> TexturesMap;
+ typedef map<string, device_memory*> TexturesMap;
TexturesMap textures;
bool textures_need_update;
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 48c32a9dc5c..5e9debc3b17 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -72,7 +72,9 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where)
}
OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_)
-: Device(info, stats, background_), memory_manager(this)
+: Device(info, stats, background_),
+ memory_manager(this),
+ texture_info(this, "__texture_info", MEM_TEXTURE)
{
cpPlatform = NULL;
cdDevice = NULL;
@@ -136,11 +138,9 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
return;
}
- /* Allocate this right away so that texture_info_buffer is placed at offset 0 in the device memory buffers */
+ /* Allocate this right away so that texture_info is placed at offset 0 in the device memory buffers */
texture_info.resize(1);
- texture_info_buffer.resize(1);
- texture_info_buffer.data_pointer = (device_ptr)&texture_info[0];
- memory_manager.alloc("texture_info", texture_info_buffer);
+ memory_manager.alloc("texture_info", texture_info);
fprintf(stderr, "Device init success\n");
device_initialized = true;
@@ -157,7 +157,6 @@ OpenCLDeviceBase::~OpenCLDeviceBase()
ConstMemMap::iterator mt;
for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
- mem_free(*(mt->second));
delete mt->second;
}
@@ -286,10 +285,10 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
return true;
}
-void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryType type)
+void OpenCLDeviceBase::mem_alloc(device_memory& mem)
{
- if(name) {
- VLOG(1) << "Buffer allocate: " << name << ", "
+ if(mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
}
@@ -307,8 +306,8 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp
if(size > max_alloc_size) {
string error = "Scene too complex to fit in available memory.";
- if(name != NULL) {
- error += string_printf(" (allocating buffer %s failed.)", name);
+ if(mem.name != NULL) {
+ error += string_printf(" (allocating buffer %s failed.)", mem.name);
}
set_error(error);
@@ -318,9 +317,9 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp
cl_mem_flags mem_flag;
void *mem_ptr = NULL;
- if(type == MEM_READ_ONLY)
+ if(mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE)
mem_flag = CL_MEM_READ_ONLY;
- else if(type == MEM_WRITE_ONLY)
+ else if(mem.type == MEM_WRITE_ONLY || mem.type == MEM_PIXELS)
mem_flag = CL_MEM_WRITE_ONLY;
else
mem_flag = CL_MEM_READ_WRITE;
@@ -348,17 +347,27 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp
void OpenCLDeviceBase::mem_copy_to(device_memory& mem)
{
- /* this is blocking */
- size_t size = mem.memory_size();
- if(size != 0) {
- opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
- CL_MEM_PTR(mem.device_pointer),
- CL_TRUE,
- 0,
- size,
- (void*)mem.data_pointer,
- 0,
- NULL, NULL));
+ if(mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ tex_alloc(mem);
+ }
+ else {
+ if(!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
+ /* this is blocking */
+ size_t size = mem.memory_size();
+ if(size != 0) {
+ opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
+ CL_MEM_PTR(mem.device_pointer),
+ CL_TRUE,
+ 0,
+ size,
+ (void*)mem.data_pointer,
+ 0,
+ NULL, NULL));
+ }
}
}
@@ -410,6 +419,10 @@ void OpenCLDeviceBase::mem_zero_kernel(device_ptr mem, size_t size)
void OpenCLDeviceBase::mem_zero(device_memory& mem)
{
+ if(!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
if(mem.device_pointer) {
if(base_program.is_loaded()) {
mem_zero_kernel(mem.device_pointer, mem.memory_size());
@@ -445,14 +458,19 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem)
void OpenCLDeviceBase::mem_free(device_memory& mem)
{
- if(mem.device_pointer) {
- if(mem.device_pointer != null_mem) {
- opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)));
- }
- mem.device_pointer = 0;
+ if(mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ }
+ else {
+ if(mem.device_pointer) {
+ if(mem.device_pointer != null_mem) {
+ opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)));
+ }
+ mem.device_pointer = 0;
- stats.mem_free(mem.device_size);
- mem.device_size = 0;
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
}
}
@@ -461,12 +479,12 @@ int OpenCLDeviceBase::mem_address_alignment()
return OpenCLInfo::mem_address_alignment(cdDevice);
}
-device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type)
+device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size)
{
cl_mem_flags mem_flag;
- if(type == MEM_READ_ONLY)
+ if(mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE)
mem_flag = CL_MEM_READ_ONLY;
- else if(type == MEM_WRITE_ONLY)
+ else if(mem.type == MEM_WRITE_ONLY || mem.type == MEM_PIXELS)
mem_flag = CL_MEM_WRITE_ONLY;
else
mem_flag = CL_MEM_READ_WRITE;
@@ -497,10 +515,8 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
device_vector<uchar> *data;
if(i == const_mem_map.end()) {
- data = new device_vector<uchar>();
- data->resize(size);
-
- mem_alloc(name, *data, MEM_READ_ONLY);
+ data = new device_vector<uchar>(this, name, MEM_READ_ONLY);
+ data->alloc(size);
const_mem_map.insert(ConstMemMap::value_type(name, data));
}
else {
@@ -508,22 +524,19 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
}
memcpy(data->get_data(), host, size);
- mem_copy_to(*data);
+ data->copy_to_device();
}
-void OpenCLDeviceBase::tex_alloc(const char *name,
- device_memory& mem,
- InterpolationType interpolation,
- ExtensionType extension)
+void OpenCLDeviceBase::tex_alloc(device_memory& mem)
{
- VLOG(1) << "Texture allocate: " << name << ", "
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
- memory_manager.alloc(name, mem);
+ memory_manager.alloc(mem.name, mem);
/* Set the pointer to non-null to keep code that inspects its value from thinking its unallocated. */
mem.device_pointer = 1;
- textures[name] = Texture(&mem, interpolation, extension);
+ textures[mem.name] = &mem;
textures_need_update = true;
}
@@ -537,7 +550,7 @@ void OpenCLDeviceBase::tex_free(device_memory& mem)
}
foreach(TexturesMap::value_type& value, textures) {
- if(value.second.mem == &mem) {
+ if(value.second == &mem) {
textures.erase(value.first);
break;
}
@@ -648,38 +661,33 @@ void OpenCLDeviceBase::flush_texture_buffers()
}
/* Realloc texture descriptors buffer. */
- memory_manager.free(texture_info_buffer);
-
+ memory_manager.free(texture_info);
texture_info.resize(num_slots);
- texture_info_buffer.resize(num_slots * sizeof(TextureInfo));
- texture_info_buffer.data_pointer = (device_ptr)&texture_info[0];
-
- memory_manager.alloc("texture_info", texture_info_buffer);
+ memory_manager.alloc("texture_info", texture_info);
/* Fill in descriptors */
foreach(texture_slot_t& slot, texture_slots) {
- Texture& tex = textures[slot.name];
-
TextureInfo& info = texture_info[slot.slot];
MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name);
-
info.data = desc.offset;
info.cl_buffer = desc.device_buffer;
if(string_startswith(slot.name, "__tex_image")) {
- info.width = tex.mem->data_width;
- info.height = tex.mem->data_height;
- info.depth = tex.mem->data_depth;
+ device_memory *mem = textures[slot.name];
+
+ info.width = mem->data_width;
+ info.height = mem->data_height;
+ info.depth = mem->data_depth;
- info.interpolation = tex.interpolation;
- info.extension = tex.extension;
+ info.interpolation = mem->interpolation;
+ info.extension = mem->extension;
}
}
/* Force write of descriptors. */
- memory_manager.free(texture_info_buffer);
- memory_manager.alloc("texture_info", texture_info_buffer);
+ memory_manager.free(texture_info);
+ memory_manager.alloc("texture_info", texture_info);
}
void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
@@ -1045,8 +1053,7 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers,
DenoisingTask *task)
{
- mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_WRITE);
- mem_copy_to(task->tiles_mem);
+ task->tiles_mem.copy_to_device();
cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer);
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 920106f92d4..96139afa450 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -127,9 +127,8 @@ public:
} KernelGlobals;
/* Allocate buffer for kernel globals */
- device_memory kgbuffer;
- kgbuffer.resize(sizeof(KernelGlobals));
- mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
+ device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals");
+ kgbuffer.alloc_to_device(1);
/* Keep rendering tiles until done. */
while(task->acquire_tile(this, tile)) {
@@ -160,7 +159,7 @@ public:
task->release_tile(tile);
}
- mem_free(kgbuffer);
+ kgbuffer.free();
}
}
@@ -288,9 +287,9 @@ public:
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
{
- device_vector<uint64_t> size_buffer;
- size_buffer.resize(1);
- device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
+ device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
+ size_buffer.alloc(1);
+ size_buffer.zero_to_device();
uint threads = num_threads;
device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer);
@@ -308,9 +307,9 @@ public:
device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
- device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
+ size_buffer.copy_from_device(0, 1, 1);
size_t size = size_buffer[0];
- device->mem_free(size_buffer);
+ size_buffer.free();
if(device->ciErr != CL_SUCCESS) {
string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",