diff options
author | Thomas Dinges <blender@dingto.org> | 2016-05-09 18:06:22 +0300 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2016-05-10 03:53:50 +0300 |
commit | 76481eaeff77e46555f8a0458d860911a9a57a9c (patch) | |
tree | aa3e872c967d4744ad7504a5e54f1003da441984 /intern | |
parent | dc82c2cd4817c6c84a4dd7e313eb2659a8830d59 (diff) |
Cycles: Add support for float4 textures on OpenCL.
Title says it all, this adds OpenCL float4 texture support.
There is a bug in the code still, I get a "Out of ressources error" on nvidia hardware here, not sure whats wrong yet.
Will investigate further, but maybe someone else has an idea. :)
Reviewers: #cycles, brecht
Subscribers: brecht, candreacchio
Differential Revision: https://developer.blender.org/D1983
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/kernel_textures.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_image.h | 23 | ||||
-rw-r--r-- | intern/cycles/render/image.cpp | 76 | ||||
-rw-r--r-- | intern/cycles/render/scene.h | 3 | ||||
-rw-r--r-- | intern/cycles/util/util_texture.h | 4 |
5 files changed, 78 insertions, 31 deletions
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 86979d8c31f..62b0a6f2923 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -236,7 +236,8 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_149) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_150) /* packed image (opencl) */ -KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed) +KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed) +KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed) KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info) #undef KERNEL_TEX diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 1f5ea8cc0ee..faff4ce3e6d 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -30,11 +30,16 @@ CCL_NAMESPACE_BEGIN /* For OpenCL all images are packed in a single array, and we do manual lookup * and interpolation. */ -ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int offset) +ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) { - uchar4 r = kernel_tex_fetch(__tex_image_packed, offset); - float f = 1.0f/255.0f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); + if(id >= TEX_NUM_FLOAT4_IMAGES) { + uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset); + float f = 1.0f/255.0f; + return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); + } + else { + return kernel_tex_fetch(__tex_image_float4_packed, offset); + } } ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width) @@ -81,7 +86,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, iy = svm_image_texture_wrap_clamp(iy, height); } - r = svm_image_texture_read(kg, offset + ix + iy*width); + r = svm_image_texture_read(kg, id, offset + ix + iy*width); } else { /* We default to linear interpolation if it is not closest */ float tx = svm_image_texture_frac(x*width, &ix); @@ -103,10 +108,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, } - r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width); - r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width); - r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width); - r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width); + r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width); + r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width); + r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width); + r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width); } if(use_alpha && r.w != 1.0f && r.w != 0.0f) { diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 4a1b06ed438..10a5ca42fc0 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -223,7 +223,7 @@ int ImageManager::add_image(const string& filename, size_t slot; /* Load image info and find out if we need a float texture. */ - is_float = (pack_images)? false: is_float_image(filename, builtin_data, is_linear); + is_float = is_float_image(filename, builtin_data, is_linear); ImageDataType type = is_float? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4; @@ -803,12 +803,16 @@ void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progress& /*progess*/) { - /* for OpenCL, we pack all image textures inside a single big texture, and - * will do our own interpolation in the kernel */ - size_t size = 0; + /* For OpenCL, we pack all image textures into a single large texture, and + * do our own interpolation in the kernel. */ + size_t size = 0, offset = 0; + ImageDataType type; + + int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]; + uint4 *info = dscene->tex_image_packed_info.resize(info_size); - /* Only byte textures are supported atm */ - ImageDataType type = IMAGE_DATA_TYPE_BYTE4; + /* Byte Textures*/ + type = IMAGE_DATA_TYPE_BYTE4; for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) @@ -818,10 +822,7 @@ void ImageManager::device_pack_images(Device *device, size += tex_img.size(); } - uint4 *info = dscene->tex_image_packed_info.resize(images[type].size()); - uchar4 *pixels = dscene->tex_image_packed.resize(size); - - size_t offset = 0; + uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size); for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) @@ -829,24 +830,61 @@ void ImageManager::device_pack_images(Device *device, device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; + /* The image options are packed + bit 0 -> periodic + bit 1 + 2 -> interpolation type */ + uint8_t interpolation = (images[type][slot]->interpolation << 1) + 1; + info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation); + + memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); + offset += tex_img.size(); + } + + /* Float Textures*/ + type = IMAGE_DATA_TYPE_FLOAT4; + size = 0, offset = 0; + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; + size += tex_img.size(); + } + + float4 *pixels_float = dscene->tex_image_float4_packed.resize(size); + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; + /* todo: support 3D textures, only CPU for now */ /* The image options are packed bit 0 -> periodic bit 1 + 2 -> interpolation type */ uint8_t interpolation = (images[type][slot]->interpolation << 1) + 1; - info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation); + info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation); - memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); + memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } - if(dscene->tex_image_packed.size()) { - if(dscene->tex_image_packed.device_pointer) { + if(dscene->tex_image_byte4_packed.size()) { + if(dscene->tex_image_byte4_packed.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(dscene->tex_image_byte4_packed); + } + device->tex_alloc("__tex_image_byte4_packed", dscene->tex_image_byte4_packed); + } + if(dscene->tex_image_float4_packed.size()) { + if(dscene->tex_image_float4_packed.device_pointer) { thread_scoped_lock device_lock(device_mutex); - device->tex_free(dscene->tex_image_packed); + device->tex_free(dscene->tex_image_float4_packed); } - device->tex_alloc("__tex_image_packed", dscene->tex_image_packed); + device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed); } if(dscene->tex_image_packed_info.size()) { if(dscene->tex_image_packed_info.device_pointer) { @@ -876,10 +914,12 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene) images[type].clear(); } - device->tex_free(dscene->tex_image_packed); + device->tex_free(dscene->tex_image_byte4_packed); + device->tex_free(dscene->tex_image_float4_packed); device->tex_free(dscene->tex_image_packed_info); - dscene->tex_image_packed.clear(); + dscene->tex_image_byte4_packed.clear(); + dscene->tex_image_float4_packed.clear(); dscene->tex_image_packed_info.clear(); } diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index b05e5a61ee9..455053fb3f6 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -113,7 +113,8 @@ public: device_vector<float4> tex_float4_image[TEX_NUM_FLOAT4_IMAGES_CPU]; /* opencl images */ - device_vector<uchar4> tex_image_packed; + device_vector<uchar4> tex_image_byte4_packed; + device_vector<float4> tex_image_float4_packed; device_vector<uint4> tex_image_packed_info; KernelData data; diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index 837858d0687..346ccd8ac63 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -38,8 +38,8 @@ CCL_NAMESPACE_BEGIN /* OpenCL */ #define TEX_NUM_BYTE4_IMAGES_OPENCL 1024 -#define TEX_NUM_FLOAT4_IMAGES_OPENCL 0 -#define TEX_IMAGE_BYTE4_START_OPENCL TEX_NUM_FLOAT4_IMAGES_OPENCL +#define TEX_NUM_FLOAT4_IMAGES_OPENCL 1024 +#define TEX_IMAGE_BYTE4_START_OPENCL TEX_NUM_FLOAT4_IMAGES_OPENCL /* Color to use when textures are not found. */ |