From 5c0a67b325b2c07574ef303947d5c026ab3f55d5 Mon Sep 17 00:00:00 2001 From: Thomas Dinges Date: Sun, 14 Aug 2016 20:21:08 +0200 Subject: Cycles: Add single channel texture support for OpenCL. This way OpenCL devices can also benefit from a smaller memory footprint, when using e.g. bumpmaps (greyscale, 1 channel). Additional target for my GSoC 2016. --- intern/cycles/kernel/kernel_compat_cpu.h | 1 + intern/cycles/kernel/kernel_textures.h | 2 + intern/cycles/kernel/svm/svm_image.h | 17 +++++- intern/cycles/render/image.cpp | 90 +++++++++++++++++++++++++++++--- intern/cycles/render/scene.h | 2 + intern/cycles/util/util_texture.h | 4 +- 6 files changed, 105 insertions(+), 11 deletions(-) (limited to 'intern/cycles') diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index c882b477c35..3775934f293 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -495,6 +495,7 @@ typedef texture texture_uint; typedef texture texture_int; typedef texture texture_uint4; typedef texture texture_uchar4; +typedef texture texture_uchar; typedef texture_image texture_image_float; typedef texture_image texture_image_uchar; typedef texture_image texture_image_half; diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 7d6fec02331..8d5bb75a428 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -188,6 +188,8 @@ KERNEL_TEX(uint, texture_uint, __bindless_mapping) /* packed image (opencl) */ KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed) KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed) +KERNEL_TEX(uchar, texture_uchar, __tex_image_byte_packed) +KERNEL_TEX(float, texture_float, __tex_image_float_packed) KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info) #undef KERNEL_TEX diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 9050ce93951..5d02be1fa2f 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -36,13 +36,26 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) { - if(id >= TEX_NUM_FLOAT4_IMAGES) { + /* Float4 */ + if(id < TEX_START_BYTE4_OPENCL) { + return kernel_tex_fetch(__tex_image_float4_packed, offset); + } + /* Byte4 */ + else if(id < TEX_START_FLOAT_OPENCL) { uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset); float f = 1.0f/255.0f; return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); } + /* Float */ + else if(id < TEX_START_BYTE_OPENCL) { + float f = kernel_tex_fetch(__tex_image_float_packed, offset); + return make_float4(f, f, f, 1.0f); + } + /* Byte */ else { - return kernel_tex_fetch(__tex_image_float4_packed, offset); + uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset); + float f = r * (1.0f/255.0f); + return make_float4(f, f, f, 1.0f); } } diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 284af5f90f7..24543601ef9 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -284,7 +284,7 @@ int ImageManager::add_image(const string& filename, if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) is_float = true; - /* No single channel and half textures on CUDA (Fermi) and OpenCL, use available slots */ + /* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */ if((type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_HALF4 || type == IMAGE_DATA_TYPE_HALF) && @@ -1105,10 +1105,11 @@ void ImageManager::device_pack_images(Device *device, size_t size = 0, offset = 0; ImageDataType type; - int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]; + int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4] + + tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE]; uint4 *info = dscene->tex_image_packed_info.resize(info_size); - /* Byte Textures*/ + /* Byte4 Textures*/ type = IMAGE_DATA_TYPE_BYTE4; for(size_t slot = 0; slot < images[type].size(); slot++) { @@ -1119,7 +1120,7 @@ void ImageManager::device_pack_images(Device *device, size += tex_img.size(); } - uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size); + uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size); for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) @@ -1131,11 +1132,11 @@ void ImageManager::device_pack_images(Device *device, info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); - memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); + memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } - /* Float Textures*/ + /* Float4 Textures*/ type = IMAGE_DATA_TYPE_FLOAT4; size = 0, offset = 0; @@ -1147,7 +1148,7 @@ void ImageManager::device_pack_images(Device *device, size += tex_img.size(); } - float4 *pixels_float = dscene->tex_image_float4_packed.resize(size); + float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size); for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) @@ -1160,6 +1161,63 @@ void ImageManager::device_pack_images(Device *device, uint8_t options = pack_image_options(type, slot); info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); + memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); + offset += tex_img.size(); + } + + /* Byte Textures*/ + type = IMAGE_DATA_TYPE_BYTE; + size = 0, offset = 0; + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector& tex_img = dscene->tex_byte_image[slot]; + size += tex_img.size(); + } + + uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size); + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector& tex_img = dscene->tex_byte_image[slot]; + + uint8_t options = pack_image_options(type, slot); + + info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); + + memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); + offset += tex_img.size(); + } + + /* Float Textures*/ + type = IMAGE_DATA_TYPE_FLOAT; + size = 0, offset = 0; + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector& tex_img = dscene->tex_float_image[slot]; + size += tex_img.size(); + } + + float *pixels_float = dscene->tex_image_float_packed.resize(size); + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector& tex_img = dscene->tex_float_image[slot]; + + /* todo: support 3D textures, only CPU for now */ + + uint8_t options = pack_image_options(type, slot); + info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); + memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } @@ -1178,6 +1236,20 @@ void ImageManager::device_pack_images(Device *device, } device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed); } + if(dscene->tex_image_byte_packed.size()) { + if(dscene->tex_image_byte_packed.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(dscene->tex_image_byte_packed); + } + device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed); + } + if(dscene->tex_image_float_packed.size()) { + if(dscene->tex_image_float_packed.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(dscene->tex_image_float_packed); + } + device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed); + } if(dscene->tex_image_packed_info.size()) { if(dscene->tex_image_packed_info.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -1208,10 +1280,14 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene) device->tex_free(dscene->tex_image_byte4_packed); device->tex_free(dscene->tex_image_float4_packed); + device->tex_free(dscene->tex_image_byte_packed); + device->tex_free(dscene->tex_image_float_packed); device->tex_free(dscene->tex_image_packed_info); dscene->tex_image_byte4_packed.clear(); dscene->tex_image_float4_packed.clear(); + dscene->tex_image_byte_packed.clear(); + dscene->tex_image_float_packed.clear(); dscene->tex_image_packed_info.clear(); } diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 9e72f197cce..8fec171b6fb 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -123,6 +123,8 @@ public: /* opencl images */ device_vector tex_image_byte4_packed; device_vector tex_image_float4_packed; + device_vector tex_image_byte_packed; + device_vector tex_image_float_packed; device_vector tex_image_packed_info; KernelData data; diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index be1177d3be9..aff928ea2ee 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -67,8 +67,8 @@ CCL_NAMESPACE_BEGIN #define TEX_NUM_FLOAT4_OPENCL 1024 #define TEX_NUM_BYTE4_OPENCL 1024 #define TEX_NUM_HALF4_OPENCL 0 -#define TEX_NUM_FLOAT_OPENCL 0 -#define TEX_NUM_BYTE_OPENCL 0 +#define TEX_NUM_FLOAT_OPENCL 1024 +#define TEX_NUM_BYTE_OPENCL 1024 #define TEX_NUM_HALF_OPENCL 0 #define TEX_START_FLOAT4_OPENCL 0 #define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL -- cgit v1.2.3