Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2017-10-24 03:04:58 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2017-10-24 03:05:41 +0300
commitf5456df095291c6cb2d0223a179746c8e514cd15 (patch)
tree76384b0c91806c95d55b007850ee225889a3250e /intern/cycles/device/device_cuda.cpp
parent254daf8f8c276a4e5292e5a12fcfa88296131878 (diff)
parent070a668d04844610059aaedc80c49e9038fd1779 (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r--intern/cycles/device/device_cuda.cpp305
1 files changed, 171 insertions, 134 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0f057e9966f..c742e91c561 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -217,7 +217,8 @@ public:
}
CUDADevice(DeviceInfo& info, Stats &stats, bool background_)
- : Device(info, stats, background_)
+ : Device(info, stats, background_),
+ texture_info(this, "__texture_info", MEM_TEXTURE)
{
first_error = true;
background = background_;
@@ -274,7 +275,7 @@ public:
delete split_kernel;
if(info.has_bindless_textures) {
- tex_free(texture_info);
+ texture_info.free();
}
cuda_assert(cuCtxDestroy(cuContext));
@@ -547,20 +548,19 @@ public:
void load_texture_info()
{
if(info.has_bindless_textures && need_texture_info) {
- tex_free(texture_info);
- tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
+ texture_info.copy_to_device();
need_texture_info = false;
}
}
- void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
+ void generic_alloc(device_memory& mem)
{
CUDAContextScope scope(this);
- if(name) {
- VLOG(1) << "Buffer allocate: " << name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
+ if(mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
}
CUdeviceptr device_pointer;
@@ -571,31 +571,88 @@ public:
stats.mem_alloc(size);
}
+ void generic_copy_to(device_memory& mem)
+ {
+ if(mem.device_pointer) {
+ CUDAContextScope scope(this);
+ cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
+ }
+ }
+
+ void generic_free(device_memory& mem)
+ {
+ if(mem.device_pointer) {
+ CUDAContextScope scope(this);
+
+ cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
+
+ mem.device_pointer = 0;
+
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
+ }
+
+ void mem_alloc(device_memory& mem)
+ {
+ if(mem.type == MEM_PIXELS && !background) {
+ pixels_alloc(mem);
+ }
+ else if(mem.type == MEM_TEXTURE) {
+ assert(!"mem_alloc not supported for textures.");
+ }
+ else {
+ generic_alloc(mem);
+ }
+ }
+
void mem_copy_to(device_memory& mem)
{
- CUDAContextScope scope(this);
+ if(mem.type == MEM_PIXELS) {
+ assert(!"mem_copy_to not supported for pixels.");
+ }
+ else if(mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ tex_alloc(mem);
+ }
+ else {
+ if(!mem.device_pointer) {
+ generic_alloc(mem);
+ }
- if(mem.device_pointer)
- cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
+ generic_copy_to(mem);
+ }
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
{
- CUDAContextScope scope(this);
- size_t offset = elem*y*w;
- size_t size = elem*w*h;
-
- if(mem.device_pointer) {
- cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
- (CUdeviceptr)(mem.device_pointer + offset), size));
+ if(mem.type == MEM_PIXELS && !background) {
+ pixels_copy_from(mem, y, w, h);
+ }
+ else if(mem.type == MEM_TEXTURE) {
+ assert(!"mem_copy_from not supported for textures.");
}
else {
- memset((char*)mem.data_pointer + offset, 0, size);
+ CUDAContextScope scope(this);
+ size_t offset = elem*y*w;
+ size_t size = elem*w*h;
+
+ if(mem.device_pointer) {
+ cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
+ (CUdeviceptr)(mem.device_pointer + offset), size));
+ }
+ else {
+ memset((char*)mem.data_pointer + offset, 0, size);
+ }
}
}
void mem_zero(device_memory& mem)
{
+ if(!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
if(mem.data_pointer) {
memset((void*)mem.data_pointer, 0, mem.memory_size());
}
@@ -608,18 +665,18 @@ public:
void mem_free(device_memory& mem)
{
- if(mem.device_pointer) {
- CUDAContextScope scope(this);
- cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
-
- mem.device_pointer = 0;
-
- stats.mem_free(mem.device_size);
- mem.device_size = 0;
+ if(mem.type == MEM_PIXELS && !background) {
+ pixels_free(mem);
+ }
+ else if(mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ }
+ else {
+ generic_free(mem);
}
}
- virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/)
+ virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/)
{
return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset));
}
@@ -635,14 +692,11 @@ public:
cuda_assert(cuMemcpyHtoD(mem, host, size));
}
- void tex_alloc(const char *name,
- device_memory& mem,
- InterpolationType interpolation,
- ExtensionType extension)
+ void tex_alloc(device_memory& mem)
{
CUDAContextScope scope(this);
- VLOG(1) << "Texture allocate: " << name << ", "
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
@@ -650,12 +704,12 @@ public:
bool has_bindless_textures = info.has_bindless_textures;
/* General variables for both architectures */
- string bind_name = name;
+ string bind_name = mem.name;
size_t dsize = datatype_size(mem.data_type);
size_t size = mem.memory_size();
CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
- switch(extension) {
+ switch(mem.extension) {
case EXTENSION_REPEAT:
address_mode = CU_TR_ADDRESS_MODE_WRAP;
break;
@@ -671,7 +725,7 @@ public:
}
CUfilter_mode filter_mode;
- if(interpolation == INTERPOLATION_CLOSEST) {
+ if(mem.interpolation == INTERPOLATION_CLOSEST) {
filter_mode = CU_TR_FILTER_MODE_POINT;
}
else {
@@ -681,13 +735,13 @@ public:
/* General variables for Fermi */
CUtexref texref = NULL;
- if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) {
+ if(!has_bindless_textures && mem.interpolation != INTERPOLATION_NONE) {
if(mem.data_depth > 1) {
/* Kernel uses different bind names for 2d and 3d float textures,
* so we have to adjust couple of things here.
*/
vector<string> tokens;
- string_split(tokens, name, "_");
+ string_split(tokens, mem.name, "_");
bind_name = string_printf("__tex_image_%s_3d_%s",
tokens[2].c_str(),
tokens[3].c_str());
@@ -700,10 +754,10 @@ public:
}
}
- if(interpolation == INTERPOLATION_NONE) {
+ if(mem.interpolation == INTERPOLATION_NONE) {
/* Data Storage */
- mem_alloc(NULL, mem, MEM_READ_ONLY);
- mem_copy_to(mem);
+ generic_alloc(mem);
+ generic_copy_to(mem);
CUdeviceptr cumem;
size_t cubytes;
@@ -802,9 +856,9 @@ public:
if(has_bindless_textures) {
/* Bindless Textures - Kepler */
int flat_slot = 0;
- if(string_startswith(name, "__tex_image")) {
- int pos = string(name).rfind("_");
- flat_slot = atoi(name + pos + 1);
+ if(string_startswith(mem.name, "__tex_image")) {
+ int pos = string(mem.name).rfind("_");
+ flat_slot = atoi(mem.name + pos + 1);
}
else {
assert(0);
@@ -843,8 +897,8 @@ public:
TextureInfo& info = texture_info[flat_slot];
info.data = (uint64_t)tex;
info.cl_buffer = 0;
- info.interpolation = interpolation;
- info.extension = extension;
+ info.interpolation = mem.interpolation;
+ info.extension = mem.extension;
info.width = mem.data_width;
info.height = mem.data_height;
info.depth = mem.data_depth;
@@ -869,7 +923,7 @@ public:
}
/* Fermi and Kepler */
- tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE);
+ tex_interp_map[mem.device_pointer] = (mem.interpolation != INTERPOLATION_NONE);
}
void tex_free(device_memory& mem)
@@ -893,21 +947,19 @@ public:
}
else {
tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
- mem_free(mem);
+ generic_free(mem);
}
}
}
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
{
- mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY);
-
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
for(int i = 0; i < 9; i++) {
tiles->buffers[i] = buffers[i];
}
- mem_copy_to(task->tiles_mem);
+ task->tiles_mem.copy_to_device();
return !have_error();
}
@@ -1274,7 +1326,7 @@ public:
task.unmap_neighbor_tiles(rtiles, this);
}
- void path_trace(DeviceTask& task, RenderTile& rtile)
+ void path_trace(DeviceTask& task, RenderTile& rtile, device_vector<WorkTile>& work_tiles)
{
if(have_error())
return;
@@ -1297,8 +1349,7 @@ public:
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
/* Allocate work tile. */
- device_vector<WorkTile> work_tiles;
- work_tiles.resize(1);
+ work_tiles.alloc(1);
WorkTile *wtile = work_tiles.get_data();
wtile->x = rtile.x;
@@ -1308,9 +1359,6 @@ public:
wtile->offset = rtile.offset;
wtile->stride = rtile.stride;
wtile->buffer = (float*)cuda_device_ptr(rtile.buffer);
- mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY);
-
- CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
/* Prepare work size. More step samples render faster, but for now we
* remain conservative for GPUs connected to a display to avoid driver
@@ -1331,8 +1379,9 @@ public:
/* Setup and copy work tile to device. */
wtile->start_sample = sample;
wtile->num_samples = min(step_samples, end_sample - sample);;
- mem_copy_to(work_tiles);
+ work_tiles.copy_to_device();
+ CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
uint num_blocks = divide_up(total_work_size, num_threads_per_block);
@@ -1356,8 +1405,6 @@ public:
break;
}
}
-
- mem_free(work_tiles);
}
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
@@ -1510,98 +1557,82 @@ public:
void pixels_alloc(device_memory& mem)
{
- if(!background) {
- PixelMem pmem;
+ PixelMem pmem;
- pmem.w = mem.data_width;
- pmem.h = mem.data_height;
+ pmem.w = mem.data_width;
+ pmem.h = mem.data_height;
- CUDAContextScope scope(this);
+ CUDAContextScope scope(this);
- glGenBuffers(1, &pmem.cuPBO);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- if(mem.data_type == TYPE_HALF)
- glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW);
- else
- glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW);
+ glGenBuffers(1, &pmem.cuPBO);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ if(mem.data_type == TYPE_HALF)
+ glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW);
+ else
+ glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
- glGenTextures(1, &pmem.cuTexId);
- glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
- if(mem.data_type == TYPE_HALF)
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
- else
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glBindTexture(GL_TEXTURE_2D, 0);
+ glGenTextures(1, &pmem.cuTexId);
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+ if(mem.data_type == TYPE_HALF)
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
+ else
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glBindTexture(GL_TEXTURE_2D, 0);
- CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
+ CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
- if(result == CUDA_SUCCESS) {
- mem.device_pointer = pmem.cuTexId;
- pixel_mem_map[mem.device_pointer] = pmem;
+ if(result == CUDA_SUCCESS) {
+ mem.device_pointer = pmem.cuTexId;
+ pixel_mem_map[mem.device_pointer] = pmem;
- mem.device_size = mem.memory_size();
- stats.mem_alloc(mem.device_size);
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
- return;
- }
- else {
- /* failed to register buffer, fallback to no interop */
- glDeleteBuffers(1, &pmem.cuPBO);
- glDeleteTextures(1, &pmem.cuTexId);
-
- background = true;
- }
+ return;
}
+ else {
+ /* failed to register buffer, fallback to no interop */
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
- Device::pixels_alloc(mem);
+ background = true;
+ }
}
void pixels_copy_from(device_memory& mem, int y, int w, int h)
{
- if(!background) {
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
-
- CUDAContextScope scope(this);
-
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
- size_t offset = sizeof(uchar)*4*y*w;
- memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
- glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
- return;
- }
+ CUDAContextScope scope(this);
- Device::pixels_copy_from(mem, y, w, h);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
+ size_t offset = sizeof(uchar)*4*y*w;
+ memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
+ glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
void pixels_free(device_memory& mem)
{
if(mem.device_pointer) {
- if(!background) {
- PixelMem pmem = pixel_mem_map[mem.device_pointer];
-
- CUDAContextScope scope(this);
-
- cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
- glDeleteBuffers(1, &pmem.cuPBO);
- glDeleteTextures(1, &pmem.cuTexId);
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
- pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
- mem.device_pointer = 0;
+ CUDAContextScope scope(this);
- stats.mem_free(mem.device_size);
- mem.device_size = 0;
+ cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
- return;
- }
+ pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
+ mem.device_pointer = 0;
- Device::pixels_free(mem);
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
}
}
@@ -1611,6 +1642,8 @@ public:
int dx, int dy, int dw, int dh, bool transparent,
const DeviceDrawParams &draw_params)
{
+ assert(mem.type == MEM_PIXELS);
+
if(!background) {
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
PixelMem pmem = pixel_mem_map[mem.device_pointer];
@@ -1747,15 +1780,17 @@ public:
}
}
+ device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
+
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
if(tile.task == RenderTile::PATH_TRACE) {
if(use_split_kernel()) {
- device_memory void_buffer;
+ device_only_memory<uchar> void_buffer(this, "void_buffer");
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
}
else {
- path_trace(*task, tile);
+ path_trace(*task, tile, work_tiles);
}
}
else if(tile.task == RenderTile::DENOISE) {
@@ -1773,6 +1808,8 @@ public:
break;
}
}
+
+ work_tiles.free();
}
else if(task->type == DeviceTask::SHADER) {
shader(*task);
@@ -1906,9 +1943,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory
{
CUDAContextScope scope(device);
- device_vector<uint64_t> size_buffer;
- size_buffer.resize(1);
- device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
+ device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
+ size_buffer.alloc(1);
+ size_buffer.zero_to_device();
uint threads = num_threads;
CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
@@ -1931,9 +1968,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory
1, 1, 1,
0, 0, (void**)&args, 0));
- device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
+ size_buffer.copy_from_device(0, 1, 1);
size_t size = size_buffer[0];
- device->mem_free(size_buffer);
+ size_buffer.free();
return size;
}