Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSybren A. Stüvel <sybren@stuvel.eu>2017-04-28 16:52:07 +0300
committerSybren A. Stüvel <sybren@stuvel.eu>2017-04-28 16:52:07 +0300
commit074c5f0d26b20fbb69e558af3e9a6f4183a2ad29 (patch)
treec724af247c52754db1c8426c2ded278859a8b35c /intern
parent08142dde2c87b14da505c126350bb49054cf054f (diff)
parentaa88796a6cb6cb65b87508d87c51f1d58234f2ee (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/kernel_globals.h2
-rw-r--r--intern/cycles/kernel/kernel_image_opencl.h56
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp10
-rw-r--r--intern/cycles/kernel/svm/svm_voxel.h7
-rw-r--r--intern/cycles/render/image.cpp225
-rw-r--r--intern/cycles/render/image.h8
-rw-r--r--intern/cycles/render/scene.h14
-rw-r--r--intern/cycles/util/util_types.h4
8 files changed, 146 insertions, 180 deletions
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index b1f3283d5fc..f95f0d98c52 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -20,7 +20,7 @@
#define __KERNEL_GLOBALS_H__
#ifdef __KERNEL_CPU__
-#include "util/util_vector.h"
+# include "util/util_vector.h"
#endif
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/kernel_image_opencl.h b/intern/cycles/kernel/kernel_image_opencl.h
index 15579f55a41..795f2e3149f 100644
--- a/intern/cycles/kernel/kernel_image_opencl.h
+++ b/intern/cycles/kernel/kernel_image_opencl.h
@@ -64,23 +64,34 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix)
return x - (float)i;
}
+ccl_device_inline uint kernel_decode_image_interpolation(uint4 info)
+{
+ return (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
+}
+
+ccl_device_inline uint kernel_decode_image_extension(uint4 info)
+{
+ if(info.w & (1 << 1)) {
+ return EXTENSION_REPEAT;
+ }
+ else if(info.w & (1 << 2)) {
+ return EXTENSION_EXTEND;
+ }
+ else {
+ return EXTENSION_CLIP;
+ }
+}
+
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
uint4 info = kernel_tex_fetch(__tex_image_packed_info, id*2);
uint width = info.x;
uint height = info.y;
uint offset = info.z;
-
- /* Image Options */
- uint interpolation = (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
- uint extension;
- if(info.w & (1 << 1))
- extension = EXTENSION_REPEAT;
- else if(info.w & (1 << 2))
- extension = EXTENSION_EXTEND;
- else
- extension = EXTENSION_CLIP;
-
+ /* Decode image options. */
+ uint interpolation = kernel_decode_image_interpolation(info);
+ uint extension = kernel_decode_image_extension(info);
+ /* Actual sampling. */
float4 r;
int ix, iy, nix, niy;
if(interpolation == INTERPOLATION_CLOSEST) {
@@ -133,7 +144,6 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width);
}
-
return r;
}
@@ -145,17 +155,10 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x,
uint height = info.y;
uint offset = info.z;
uint depth = kernel_tex_fetch(__tex_image_packed_info, id*2+1).x;
-
- /* Image Options */
- uint interpolation = (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
- uint extension;
- if(info.w & (1 << 1))
- extension = EXTENSION_REPEAT;
- else if(info.w & (1 << 2))
- extension = EXTENSION_EXTEND;
- else
- extension = EXTENSION_CLIP;
-
+ /* Decode image options. */
+ uint interpolation = kernel_decode_image_interpolation(info);
+ uint extension = kernel_decode_image_extension(info);
+ /* Actual sampling. */
float4 r;
int ix, iy, iz, nix, niy, niz;
if(interpolation == INTERPOLATION_CLOSEST) {
@@ -172,7 +175,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x,
if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
- {
+ {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
@@ -199,12 +202,13 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x,
niz = svm_image_texture_wrap_periodic(iz+1, depth);
}
else {
- if(extension == EXTENSION_CLIP)
+ if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
+ }
/* Fall through. */
/* EXTENSION_EXTEND */
nix = svm_image_texture_wrap_clamp(ix+1, width);
@@ -225,8 +229,6 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x,
r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + niz*width*height);
r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + niz*width*height);
r += tz*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + niz*width*height);
-
}
-
return r;
}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index db2de6836d3..998619ac897 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -115,7 +115,7 @@ void kernel_tex_copy(KernelGlobals *kg,
texture_image_float *tex = NULL;
int id = atoi(name + strlen("__tex_image_float_"));
int array_index = kernel_tex_index(id);
-
+
if(array_index >= 0) {
if(array_index >= kg->texture_float_images.size()) {
kg->texture_float_images.resize(array_index+1);
@@ -134,7 +134,7 @@ void kernel_tex_copy(KernelGlobals *kg,
texture_image_uchar4 *tex = NULL;
int id = atoi(name + strlen("__tex_image_byte4_"));
int array_index = kernel_tex_index(id);
-
+
if(array_index >= 0) {
if(array_index >= kg->texture_byte4_images.size()) {
kg->texture_byte4_images.resize(array_index+1);
@@ -153,7 +153,7 @@ void kernel_tex_copy(KernelGlobals *kg,
texture_image_uchar *tex = NULL;
int id = atoi(name + strlen("__tex_image_byte_"));
int array_index = kernel_tex_index(id);
-
+
if(array_index >= 0) {
if(array_index >= kg->texture_byte_images.size()) {
kg->texture_byte_images.resize(array_index+1);
@@ -172,7 +172,7 @@ void kernel_tex_copy(KernelGlobals *kg,
texture_image_half4 *tex = NULL;
int id = atoi(name + strlen("__tex_image_half4_"));
int array_index = kernel_tex_index(id);
-
+
if(array_index >= 0) {
if(array_index >= kg->texture_half4_images.size()) {
kg->texture_half4_images.resize(array_index+1);
@@ -191,7 +191,7 @@ void kernel_tex_copy(KernelGlobals *kg,
texture_image_half *tex = NULL;
int id = atoi(name + strlen("__tex_image_half_"));
int array_index = kernel_tex_index(id);
-
+
if(array_index >= 0) {
if(array_index >= kg->texture_half_images.size()) {
kg->texture_half_images.resize(array_index+1);
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 1d97e8344bd..f4a5b2b2994 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -46,8 +46,13 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
# if defined(__KERNEL_CUDA__)
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
- if(kernel_tex_type(id) == IMAGE_DATA_TYPE_FLOAT4 || kernel_tex_type(id) == IMAGE_DATA_TYPE_BYTE4 || kernel_tex_type(id) == IMAGE_DATA_TYPE_HALF4)
+ const int texture_type = kernel_tex_type(id);
+ if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
+ texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+ texture_type == IMAGE_DATA_TYPE_HALF4)
+ {
r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z);
+ }
else {
float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z);
r = make_float4(f, f, f, 1.0f);
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 697b9f75658..04c86732622 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -52,7 +52,7 @@ ImageManager::ImageManager(const DeviceInfo& info)
max_num_images = TEX_NUM_MAX;
has_half_images = true;
cuda_fermi_limits = false;
-
+
if(device_type == DEVICE_CUDA) {
if(!info.has_bindless_textures) {
/* CUDA Fermi hardware (SM 2.x) has a hard limit on the number of textures */
@@ -63,7 +63,7 @@ ImageManager::ImageManager(const DeviceInfo& info)
else if(device_type == DEVICE_OPENCL) {
has_half_images = false;
}
-
+
for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
tex_num_images[type] = 0;
}
@@ -104,8 +104,8 @@ bool ImageManager::set_animation_frame_update(int frame)
}
ImageDataType ImageManager::get_image_metadata(const string& filename,
- void *builtin_data,
- bool& is_linear)
+ void *builtin_data,
+ bool& is_linear)
{
bool is_float = false, is_half = false;
is_linear = false;
@@ -196,12 +196,21 @@ ImageDataType ImageManager::get_image_metadata(const string& filename,
}
}
+int ImageManager::max_flattened_slot(ImageDataType type)
+{
+ if(tex_num_images[type] == 0) {
+ /* No textures for the type, no slots needs allocation. */
+ return 0;
+ }
+ return type_index_to_flattened_slot(tex_num_images[type], type);
+}
+
/* The lower three bits of a device texture slot number indicate its type.
* These functions convert the slot ids from ImageManager "images" ones
- * to device ones and vice versa.
+ * to device ones and vice verse.
*
* There are special cases for CUDA Fermi, since there we have only 90 image texture
- * slots available and shold keep the flattended numbers in the 0-89 range.
+ * slots available and should keep the flattended numbers in the 0-89 range.
*/
int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
{
@@ -354,7 +363,7 @@ int ImageManager::add_image(const string& filename,
return -1;
}
}
-
+
if(slot == images[type].size()) {
images[type].resize(images[type].size() + 1);
}
@@ -372,7 +381,7 @@ int ImageManager::add_image(const string& filename,
img->use_alpha = use_alpha;
images[type][slot] = img;
-
+
++tex_num_images[type];
need_update = true;
@@ -1017,159 +1026,101 @@ void ImageManager::device_update_slot(Device *device,
uint8_t ImageManager::pack_image_options(ImageDataType type, size_t slot)
{
uint8_t options = 0;
-
/* Image Options are packed into one uint:
* bit 0 -> Interpolation
- * bit 1 + 2 + 3-> Extension */
- if(images[type][slot]->interpolation == INTERPOLATION_CLOSEST)
+ * bit 1 + 2 + 3 -> Extension
+ */
+ if(images[type][slot]->interpolation == INTERPOLATION_CLOSEST) {
options |= (1 << 0);
-
- if(images[type][slot]->extension == EXTENSION_REPEAT)
+ }
+ if(images[type][slot]->extension == EXTENSION_REPEAT) {
options |= (1 << 1);
- else if(images[type][slot]->extension == EXTENSION_EXTEND)
+ }
+ else if(images[type][slot]->extension == EXTENSION_EXTEND) {
options |= (1 << 2);
- else /* EXTENSION_CLIP */
+ }
+ else /* EXTENSION_CLIP */ {
options |= (1 << 3);
-
+ }
return options;
}
-void ImageManager::device_pack_images(Device *device,
- DeviceScene *dscene,
- Progress& /*progess*/)
+template<typename T>
+void ImageManager::device_pack_images_type(
+ ImageDataType type,
+ const vector<device_vector<T>*>& cpu_textures,
+ device_vector<T> *device_image,
+ uint4 *info)
{
- /* For OpenCL, we pack all image textures into a single large texture, and
- * do our own interpolation in the kernel. */
size_t size = 0, offset = 0;
- ImageDataType type;
-
- int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]
- + tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE];
- uint4 *info = dscene->tex_image_packed_info.resize(info_size*2);
-
- /* Byte4 Textures*/
- type = IMAGE_DATA_TYPE_BYTE4;
-
- for(size_t slot = 0; slot < images[type].size(); slot++) {
- if(!images[type][slot])
- continue;
-
- device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot];
- size += tex_img.size();
- }
-
- uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size);
-
- for(size_t slot = 0; slot < images[type].size(); slot++) {
- if(!images[type][slot])
- continue;
-
- device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot];
-
- uint8_t options = pack_image_options(type, slot);
-
- int index = type_index_to_flattened_slot(slot, type) * 2;
- info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
- info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
-
- memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
- offset += tex_img.size();
- }
-
- /* Float4 Textures*/
- type = IMAGE_DATA_TYPE_FLOAT4;
- size = 0, offset = 0;
-
- for(size_t slot = 0; slot < images[type].size(); slot++) {
- if(!images[type][slot])
- continue;
-
- device_vector<float4>& tex_img = *dscene->tex_float4_image[slot];
- size += tex_img.size();
- }
-
- float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size);
-
- for(size_t slot = 0; slot < images[type].size(); slot++) {
- if(!images[type][slot])
- continue;
-
- device_vector<float4>& tex_img = *dscene->tex_float4_image[slot];
-
- /* todo: support 3D textures, only CPU for now */
-
- uint8_t options = pack_image_options(type, slot);
-
- int index = type_index_to_flattened_slot(slot, type) * 2;
- info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
- info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
-
- memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
- offset += tex_img.size();
- }
-
- /* Byte Textures*/
- type = IMAGE_DATA_TYPE_BYTE;
- size = 0, offset = 0;
-
+ /* First step is to calculate size of the texture we need. */
for(size_t slot = 0; slot < images[type].size(); slot++) {
- if(!images[type][slot])
+ if(images[type][slot] == NULL) {
continue;
-
- device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot];
+ }
+ device_vector<T>& tex_img = *cpu_textures[slot];
size += tex_img.size();
}
-
- uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size);
-
+ /* Now we know how much memory we need, so we can allocate and fill. */
+ T *pixels = device_image->resize(size);
for(size_t slot = 0; slot < images[type].size(); slot++) {
- if(!images[type][slot])
+ if(images[type][slot] == NULL) {
continue;
-
- device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot];
-
+ }
+ device_vector<T>& tex_img = *cpu_textures[slot];
uint8_t options = pack_image_options(type, slot);
-
- int index = type_index_to_flattened_slot(slot, type) * 2;
- info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+ const int index = type_index_to_flattened_slot(slot, type) * 2;
+ info[index] = make_uint4(tex_img.data_width,
+ tex_img.data_height,
+ offset,
+ options);
info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
-
- memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+ memcpy(pixels + offset,
+ (void*)tex_img.data_pointer,
+ tex_img.memory_size());
offset += tex_img.size();
}
+}
- /* Float Textures*/
- type = IMAGE_DATA_TYPE_FLOAT;
- size = 0, offset = 0;
-
- for(size_t slot = 0; slot < images[type].size(); slot++) {
- if(!images[type][slot])
- continue;
-
- device_vector<float>& tex_img = *dscene->tex_float_image[slot];
- size += tex_img.size();
- }
-
- float *pixels_float = dscene->tex_image_float_packed.resize(size);
-
- for(size_t slot = 0; slot < images[type].size(); slot++) {
- if(!images[type][slot])
- continue;
-
- device_vector<float>& tex_img = *dscene->tex_float_image[slot];
-
- /* todo: support 3D textures, only CPU for now */
-
- uint8_t options = pack_image_options(type, slot);
-
- int index = type_index_to_flattened_slot(slot, type) * 2;
- info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
- info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
+void ImageManager::device_pack_images(Device *device,
+ DeviceScene *dscene,
+ Progress& /*progess*/)
+{
+ /* For OpenCL, we pack all image textures into a single large texture, and
+ * do our own interpolation in the kernel.
+ */
- memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
- offset += tex_img.size();
- }
+ /* TODO(sergey): This will over-allocate a bit, but this is constant memory
+ * so should be fine for a short term.
+ */
+ const size_t info_size = max4(max_flattened_slot(IMAGE_DATA_TYPE_FLOAT4),
+ max_flattened_slot(IMAGE_DATA_TYPE_BYTE4),
+ max_flattened_slot(IMAGE_DATA_TYPE_FLOAT),
+ max_flattened_slot(IMAGE_DATA_TYPE_BYTE));
+ uint4 *info = dscene->tex_image_packed_info.resize(info_size*2);
+ /* Pack byte4 textures. */
+ device_pack_images_type(IMAGE_DATA_TYPE_BYTE4,
+ dscene->tex_byte4_image,
+ &dscene->tex_image_byte4_packed,
+ info);
+ /* Pack float4 textures. */
+ device_pack_images_type(IMAGE_DATA_TYPE_FLOAT4,
+ dscene->tex_float4_image,
+ &dscene->tex_image_float4_packed,
+ info);
+ /* Pack byte textures. */
+ device_pack_images_type(IMAGE_DATA_TYPE_BYTE,
+ dscene->tex_byte_image,
+ &dscene->tex_image_byte_packed,
+ info);
+ /* Pack float textures. */
+ device_pack_images_type(IMAGE_DATA_TYPE_FLOAT,
+ dscene->tex_float_image,
+ &dscene->tex_image_float_packed,
+ info);
+
+ /* Push textures to the device. */
if(dscene->tex_image_byte4_packed.size()) {
if(dscene->tex_image_byte4_packed.device_pointer) {
thread_scoped_lock device_lock(device_mutex);
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index 76c2cc46f12..5550d019868 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -134,6 +134,7 @@ private:
int texture_limit,
device_vector<DeviceType>& tex_img);
+ int max_flattened_slot(ImageDataType type);
int type_index_to_flattened_slot(int slot, ImageDataType type);
int flattened_slot_to_type_index(int flat_slot, ImageDataType *type);
string name_from_type(int type);
@@ -151,6 +152,13 @@ private:
ImageDataType type,
int slot);
+ template<typename T>
+ void device_pack_images_type(
+ ImageDataType type,
+ const vector<device_vector<T>*>& cpu_textures,
+ device_vector<T> *device_image,
+ uint4 *info);
+
void device_pack_images(Device *device,
DeviceScene *dscene,
Progress& progess);
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index b02f9f35393..4c2c4f5fcc3 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -114,13 +114,13 @@ public:
device_vector<uint> sobol_directions;
/* cpu images */
- std::vector<device_vector<float4>* > tex_float4_image;
- std::vector<device_vector<uchar4>* > tex_byte4_image;
- std::vector<device_vector<half4>* > tex_half4_image;
- std::vector<device_vector<float>* > tex_float_image;
- std::vector<device_vector<uchar>* > tex_byte_image;
- std::vector<device_vector<half>* > tex_half_image;
-
+ vector<device_vector<float4>* > tex_float4_image;
+ vector<device_vector<uchar4>* > tex_byte4_image;
+ vector<device_vector<half4>* > tex_half4_image;
+ vector<device_vector<float>* > tex_float_image;
+ vector<device_vector<uchar>* > tex_byte_image;
+ vector<device_vector<half>* > tex_half_image;
+
/* opencl images */
device_vector<float4> tex_image_float4_packed;
device_vector<uchar4> tex_image_byte4_packed;
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 296343ecfd3..aacf7c1bd8e 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -156,7 +156,7 @@ enum InterpolationType {
/* Texture types
* Since we store the type in the lower bits of a flat index,
- * the shift and bit mask constant below need to be kept in sync.
+ * the shift and bit mask constant below need to be kept in sync.
*/
enum ImageDataType {
@@ -166,7 +166,7 @@ enum ImageDataType {
IMAGE_DATA_TYPE_FLOAT = 3,
IMAGE_DATA_TYPE_BYTE = 4,
IMAGE_DATA_TYPE_HALF = 5,
-
+
IMAGE_DATA_NUM_TYPES
};